]> Pierre Choffet | Git repositories - wdef_tools.git/commitdiff
Add script to compare with Wikidata’s RDF master
authorPierre Choffet <peuc@wanadoo.fr>
Sat, 15 Jul 2023 06:40:28 +0000 (02:40 -0400)
committerPierre Choffet <peuc@wanadoo.fr>
Sat, 15 Jul 2023 06:40:28 +0000 (02:40 -0400)
Same as get_merged_element.sh except it tests all wdef elements that have an equivalent into Wikidata.

scripts/get_merged_all.sh [new file with mode: 0755]

diff --git a/scripts/get_merged_all.sh b/scripts/get_merged_all.sh
new file mode 100755 (executable)
index 0000000..548a1c3
--- /dev/null
@@ -0,0 +1,61 @@
+#!/bin/bash
+
+# get_merged_all.sh - In a WDEF, reduce all properties already in Wikidata.
+# Copyright (C) 2023  Pierre Choffet
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of version 3 of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+readonly SCRIPT_DIR="$(dirname "$0")"
+
+set -euo pipefail
+
+function usage() {
+       cat << EOF
+USAGE: get_merged_all.sh <wdef_path>
+
+From a given WDEF, remove all properties that are already in Wikidata. The reduced
+WDEF is returned.
+EOF
+}
+
+if [ "$#" -ne 1 ]
+then
+       usage >&2
+       exit 1
+fi
+
+readonly WDEF_PATH="${1}"
+
+# Check wdef exists
+if [ ! -s "${WDEF_PATH}" ]
+then
+       echo "WDEF file doesn't exist. Exiting" >&2
+       exit
+fi
+
+read -r -a WDIDS <<<"$(xmlstarlet sel -N wdef=https://purl.choffet.net/wdef -t -m "/wdef:knowledge/wdef:element[substring(@wdef:id, 1, 1) = 'Q']" -v '@wdef:id' -v "' '" "${WDEF_PATH}")"
+
+LAST_ITERATION_PATH=$(mktemp)
+cp "${WDEF_PATH}" "${LAST_ITERATION_PATH}"
+
+for qid in "${WDIDS[@]}"
+do
+       NEW_WDEF_PATH="$(mktemp)"
+       "${SCRIPT_DIR}/get_merged_element.sh" "${LAST_ITERATION_PATH}" "${qid}" > "${NEW_WDEF_PATH}"
+       
+       rm "${LAST_ITERATION_PATH}"
+       LAST_ITERATION_PATH="${NEW_WDEF_PATH}"
+done
+
+cat "${NEW_WDEF_PATH}"
+rm "${NEW_WDEF_PATH}"