From: Pierre Choffet Date: Sat, 15 Jul 2023 06:40:28 +0000 (-0400) Subject: Add script to compare with Wikidata’s RDF X-Git-Url: https://git.choffet.net/?p=wdef_tools.git;a=commitdiff_plain;h=HEAD Add script to compare with Wikidata’s RDF Same as get_merged_element.sh except it tests all wdef elements that have an equivalent into Wikidata. --- diff --git a/scripts/get_merged_all.sh b/scripts/get_merged_all.sh new file mode 100755 index 0000000..548a1c3 --- /dev/null +++ b/scripts/get_merged_all.sh @@ -0,0 +1,61 @@ +#!/bin/bash + +# get_merged_all.sh - In a WDEF, reduce all properties already in Wikidata. +# Copyright (C) 2023 Pierre Choffet +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of version 3 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +readonly SCRIPT_DIR="$(dirname "$0")" + +set -euo pipefail + +function usage() { + cat << EOF +USAGE: get_merged_all.sh + +From a given WDEF, remove all properties that are already in Wikidata. The reduced +WDEF is returned. +EOF +} + +if [ "$#" -ne 1 ] +then + usage >&2 + exit 1 +fi + +readonly WDEF_PATH="${1}" + +# Check wdef exists +if [ ! -s "${WDEF_PATH}" ] +then + echo "WDEF file doesn't exist. Exiting" >&2 + exit +fi + +read -r -a WDIDS <<<"$(xmlstarlet sel -N wdef=https://purl.choffet.net/wdef -t -m "/wdef:knowledge/wdef:element[substring(@wdef:id, 1, 1) = 'Q']" -v '@wdef:id' -v "' '" "${WDEF_PATH}")" + +LAST_ITERATION_PATH=$(mktemp) +cp "${WDEF_PATH}" "${LAST_ITERATION_PATH}" + +for qid in "${WDIDS[@]}" +do + NEW_WDEF_PATH="$(mktemp)" + "${SCRIPT_DIR}/get_merged_element.sh" "${LAST_ITERATION_PATH}" "${qid}" > "${NEW_WDEF_PATH}" + + rm "${LAST_ITERATION_PATH}" + LAST_ITERATION_PATH="${NEW_WDEF_PATH}" +done + +cat "${NEW_WDEF_PATH}" +rm "${NEW_WDEF_PATH}"