From ffdd0f84516bd39601220e636cb0b119620452e4 Mon Sep 17 00:00:00 2001 From: Pierre Choffet Date: Sat, 15 Jul 2023 02:40:28 -0400 Subject: [PATCH] =?utf8?q?Add=20script=20to=20compare=20with=20Wikidata?= =?utf8?q?=E2=80=99s=20RDF?= MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Same as get_merged_element.sh except it tests all wdef elements that have an equivalent into Wikidata. --- scripts/get_merged_all.sh | 61 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100755 scripts/get_merged_all.sh diff --git a/scripts/get_merged_all.sh b/scripts/get_merged_all.sh new file mode 100755 index 0000000..548a1c3 --- /dev/null +++ b/scripts/get_merged_all.sh @@ -0,0 +1,61 @@ +#!/bin/bash + +# get_merged_all.sh - In a WDEF, reduce all properties already in Wikidata. +# Copyright (C) 2023 Pierre Choffet +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of version 3 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +readonly SCRIPT_DIR="$(dirname "$0")" + +set -euo pipefail + +function usage() { + cat << EOF +USAGE: get_merged_all.sh + +From a given WDEF, remove all properties that are already in Wikidata. The reduced +WDEF is returned. +EOF +} + +if [ "$#" -ne 1 ] +then + usage >&2 + exit 1 +fi + +readonly WDEF_PATH="${1}" + +# Check wdef exists +if [ ! -s "${WDEF_PATH}" ] +then + echo "WDEF file doesn't exist. Exiting" >&2 + exit +fi + +read -r -a WDIDS <<<"$(xmlstarlet sel -N wdef=https://purl.choffet.net/wdef -t -m "/wdef:knowledge/wdef:element[substring(@wdef:id, 1, 1) = 'Q']" -v '@wdef:id' -v "' '" "${WDEF_PATH}")" + +LAST_ITERATION_PATH=$(mktemp) +cp "${WDEF_PATH}" "${LAST_ITERATION_PATH}" + +for qid in "${WDIDS[@]}" +do + NEW_WDEF_PATH="$(mktemp)" + "${SCRIPT_DIR}/get_merged_element.sh" "${LAST_ITERATION_PATH}" "${qid}" > "${NEW_WDEF_PATH}" + + rm "${LAST_ITERATION_PATH}" + LAST_ITERATION_PATH="${NEW_WDEF_PATH}" +done + +cat "${NEW_WDEF_PATH}" +rm "${NEW_WDEF_PATH}" -- 2.47.0