From 0626fd256212f4fb5e3cc19a798c786f3c281125 Mon Sep 17 00:00:00 2001 From: Pierre Choffet Date: Thu, 30 Dec 2021 12:11:38 -0500 Subject: [PATCH] Add WMO demonstration tools --- demo.sh | 82 +++++++++ import.sh | 149 ++++++++++++++++ xslts/canonicalize.xslt | 43 +++++ xslts/get_next_step.xslt | 244 ++++++++++++++++++++++++++ xslts/isolate_station.xslt | 45 +++++ xslts/remove_labels_descriptions.xslt | 37 ++++ xslts/replace_id.xslt | 59 +++++++ 7 files changed, 659 insertions(+) create mode 100755 demo.sh create mode 100755 import.sh create mode 100644 xslts/canonicalize.xslt create mode 100644 xslts/get_next_step.xslt create mode 100644 xslts/isolate_station.xslt create mode 100644 xslts/remove_labels_descriptions.xslt create mode 100644 xslts/replace_id.xslt diff --git a/demo.sh b/demo.sh new file mode 100755 index 0000000..c708db5 --- /dev/null +++ b/demo.sh @@ -0,0 +1,82 @@ +#!/bin/bash + +# demo.sh - Process demonstration for WMO. +# Copyright (C) 2021 Pierre Choffet +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of version 3 of the GNU General Public License as published +# by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +set -euo pipefail + +function usage { + cat << "EOF" +Import OSCAR station metadata to Wikidata + +USAGE: intro.sh +EOF +} + +function intro { + cat >&2 << "EOF" + + .g8""8q. .M"""bgd .g8"""bgd db `7MM"""Mq. mm + .dP' `YM. ,MI "Y .dP' `M ;MM: MM `MM. MM + dM' `MM `MMb. dM' ` ,V^MM. MM ,M9 mmMMmm ,pW"Wq. + MM MM `YMMNq. MM ,M `MM MMmmdM9 MM 6W' `Wb + MM. ,MP . `MM MM. AbmmmqMA MM YM. MM 8M M8 + `Mb. ,dP' Mb dM `Mb. ,' A' VML MM `Mb. MM YA. ,A9 + `"bmmd"' P"Ybmmd" `"bmmmd'.AMA. .AMMA..JMML. .JMM. `Mbmo`Ybmd9' + + ,, ,, ,, + `7MMF' A `7MF'db `7MM db `7MM mm + `MA ,MA ,V MM MM MM + VM: ,VVM: ,V `7MM MM ,MP'`7MM ,M""bMM ,6"Yb.mmMMmm ,6"Yb. + MM. M' MM. M' MM MM ;Y MM ,AP MM 8) MM MM 8) MM + `MM A' `MM A' MM MM;Mm MM 8MI MM ,pm9MM MM ,pm9MM + :MM; :MM; MM MM `Mb. MM `Mb MM 8M MM MM 8M MM + VF VF .JMML..JMML. YA..JMML.`Wbmd"MML.`Moo9^Yo.`Mbmo`Moo9^Yo. + + +This tool has been written in 2021 as a one-time proof of concept to import +Oscar station metadata into Wikidata. It's not meant to be used outside of this +controlled environment as it would inevitably lead any careful user to +insert duplicates and wrong information into the Wikimedia project. + +The full original code remains available here for educational purpose. + +For up to date code, see the repository at + +EOF +} + +# Check user args +if [ "$#" -ne 2 ] +then + usage + exit +elif [ ! -f "${1}" ] +then + echo "File \"${1}\" doesn't exist." >&2 + exit +fi + +intro +exit 1 + +# Generate partial wdef file +PARTIAL_WDEF_PATH="$(mktemp)" +xmlstarlet tr xslts/isolate_station.xslt -s "wigos-id=${2}" "${1}" | xmlstarlet fo > "${PARTIAL_WDEF_PATH}" + +# Import it +./import.sh "${PARTIAL_WDEF_PATH}" elements.log +rm "${PARTIAL_WDEF_PATH}" +echo "Import done" >&2 diff --git a/import.sh b/import.sh new file mode 100755 index 0000000..a0bc477 --- /dev/null +++ b/import.sh @@ -0,0 +1,149 @@ +#!/bin/bash + +# import.sh - Import data wdef into Wikidata. +# Copyright (C) 2020-2021 Pierre Choffet +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of version 3 of the GNU General Public License as published +# by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +set -eu + +# Internals config +IMPORT_PLAN_XSLT_PATH='xslts/get_next_step.xslt' +REPLACE_WDEF_ID_XSLT_PATH='xslts/replace_id.xslt' +REMOVE_LABELS_DESCRIPTIONS_XSLT_PATH='xslts/remove_labels_descriptions.xslt' +CANONICALIZE_WDEF_XSLT_PATH='xslts/canonicalize.xslt' + +WDEF_PATH="${1}" +NEW_ELEMENTS_LOG_PATH="${2}" + +function createEntity { + local -r wdef_id="${1}" + local -r entity_json="${2}" + + echo "→ wd create-entity ${entity_json}" > $(tty) + sleep 5 + + # Create entity and get ID afterwards: + local -r element_id=$(wd create-entity "${entity_json}" | jq -r .entity.id) + + echo "${wdef_id} = ${element_id}" >> "${NEW_ELEMENTS_LOG_PATH}" + + # Replace id + local -r new_id_xml=$(mktemp) + xmlstarlet tr "${REPLACE_WDEF_ID_XSLT_PATH}" -s old-id="${wdef_id}" -s new-id="${element_id}" "${WDEF_PATH}" > "${new_id_xml}" + + # Remove labels and descriptions + local -r reduced_xml=$(mktemp) + xmlstarlet tr "${REMOVE_LABELS_DESCRIPTIONS_XSLT_PATH}" -s wdef-id="${element_id}" "${new_id_xml}" > "${reduced_xml}" + rm "${new_id_xml}" + + echo "${reduced_xml}" +} + +function addClaim { + local -r wdef_id="${1}" + local -r wd_id="${2}" + local -r wd_pid="${3}" + local -r value="${4}" + + echo "→ wd add-claim ${wd_id} ${wd_pid} ${value}" > $(tty) + sleep $((3 + $RANDOM % 10)) + + # Create claim and get ID afterwards: + local -r claim_id=$(wd add-claim "${wd_id}" "${wd_pid}" "${value}" | jq -r .claim.id) + + local -r reduced_xml=$(mktemp) + xmlstarlet tr "${REPLACE_WDEF_ID_XSLT_PATH}" -s old-id="${wdef_id}" -s new-id="${claim_id}" "${WDEF_PATH}" > "${reduced_xml}" + + echo "${reduced_xml}" +} + +function addQualifier { + local -r qualifier_value_id="${1}" + local -r value_id="${2}" + local -r wd_pid="${3}" + local -r value="${4}" + + echo "→ wd add-qualifier ${value_id} ${wd_pid} ${value}" > $(tty) + sleep $((3 + $RANDOM % 10)) + + # Create qualifier and get ID afterwards: + local -r qualifier_id=$(wd add-qualifier "${value_id}" "${wd_pid}" "${value}" | jq -r .claim.qualifiers.${wd_pid}[].hash) + + local -r reduced_xml=$(mktemp) + xmlstarlet tr "${REPLACE_WDEF_ID_XSLT_PATH}" -s old-id="${qualifier_value_id}" -s new-id="${qualifier_id}" "${WDEF_PATH}" > "${reduced_xml}" + + echo "${reduced_xml}" +} + +# Check user parameters +if [ ! -s "${1}" ] +then + echo "${1} doesn't exist or is not readable." + exit 1 +fi + +# Track import generations +IMPORT_HISTORY_DIR="$(mktemp -d)/" +GENERATION=1 + +echo "New elements corresponding IDs will be append in ${NEW_ELEMENTS_LOG_PATH}" >&2 +echo "Import generations log will be in ${IMPORT_HISTORY_DIR}" >&2 + +# Prepare first step +cp "${WDEF_PATH}" "${IMPORT_HISTORY_DIR}${GENERATION}.xml" +WDEF_PATH="${IMPORT_HISTORY_DIR}${GENERATION}.xml" + +# Get first step +NEXT_STEP=$(xmlstarlet tr "${IMPORT_PLAN_XSLT_PATH}" "${WDEF_PATH}" | head -1) + +while [ "${NEXT_STEP}" != '' ] +do + WB_COMMAND="${NEXT_STEP%% *}" + + case $WB_COMMAND in + create-entity) + NEW_XML_PATH=$(createEntity $(echo "${NEXT_STEP}" | cut -d ' ' -f2) "$(echo "${NEXT_STEP}" | cut -d ' ' -f3-)") + ;; + add-claim) + NEW_XML_PATH=$(addClaim $(echo "${NEXT_STEP}" | cut -d ' ' -f2) "$(echo "${NEXT_STEP}" | cut -d ' ' -f3)" "$(echo "${NEXT_STEP}" | cut -d ' ' -f4)" "$(echo "${NEXT_STEP}" | cut -d ' ' -f5-)") + ;; + add-claim-no-value) + NEW_XML_PATH=$(addClaim $(echo "${NEXT_STEP}" | cut -d ' ' -f2) "$(echo "${NEXT_STEP}" | cut -d ' ' -f3)" "$(echo "${NEXT_STEP}" | cut -d ' ' -f4)" '{"snaktype": "novalue"}') + ;; + add-qualifier) + NEW_XML_PATH=$(addQualifier $(echo "${NEXT_STEP}" | cut -d ' ' -f2) "$(echo "${NEXT_STEP}" | cut -d ' ' -f3)" "$(echo "${NEXT_STEP}" | cut -d ' ' -f4)" "$(echo "${NEXT_STEP}" | cut -d ' ' -f5)") + ;; + *) + echo "Unexpected \"${WB_COMMAND}\" command." + exit 1 + ;; + esac + + # Check returned string is path + if [ ! -s "${NEW_XML_PATH}" ] + then + exit 1 + fi + + # Generate new canonical version + CANONICALIZED_XML_PATH="$(mktemp)" + xmlstarlet tr "${CANONICALIZE_WDEF_XSLT_PATH}" "${NEW_XML_PATH}" > "${CANONICALIZED_XML_PATH}" +# rm "${NEW_XML_PATH}" + + # Prepare next step + GENERATION=$((GENERATION + 1)) + WDEF_PATH="${IMPORT_HISTORY_DIR}${GENERATION}.xml" + mv "${CANONICALIZED_XML_PATH}" "${WDEF_PATH}" + NEXT_STEP=$(xmlstarlet tr "${IMPORT_PLAN_XSLT_PATH}" "${WDEF_PATH}" | head -1) +done diff --git a/xslts/canonicalize.xslt b/xslts/canonicalize.xslt new file mode 100644 index 0000000..ae31a64 --- /dev/null +++ b/xslts/canonicalize.xslt @@ -0,0 +1,43 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/xslts/get_next_step.xslt b/xslts/get_next_step.xslt new file mode 100644 index 0000000..a7977ff --- /dev/null +++ b/xslts/get_next_step.xslt @@ -0,0 +1,244 @@ + + +]> + + + + + + + + + create-entity + + { + + "labels":{ + + } + + + , + + + "descriptions":{ + + } + + } + + + + + + + + + + " + + ": " + + " + + , + + + + + + + + + + " + + ": " + + " + + , + + + + + add-claim + + + + + + + + + + + + + {"amount": " + + ", "unit": " + + "} + + + + + + + {"time": " + + + + + + + + + + + + + Cannot import time with precision" + + " for now. Exiting. + + + + ", "precision": + + , "calendar": " + + "} + + + {"language": " + + ", "text": " + + "} + + + {"latitude": + + , "longitude": + + , "precision": + + , "globe": "http://www.wikidata.org/entity/ + + "} + + + + Cannot import " + + " type for now. Exiting. + + + + + + + + + add-claim-no-value + + + + + + + + + + add-qualifier + + + + + + + + + + + + {"amount": " + + ", "unit": " + + "} + + + + + + {"time": " + + ", "precision": " + + ", "calendar": " + + "} + + + + Cannot import " + + " type for now. Exiting. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/xslts/isolate_station.xslt b/xslts/isolate_station.xslt new file mode 100644 index 0000000..f3d5bc0 --- /dev/null +++ b/xslts/isolate_station.xslt @@ -0,0 +1,45 @@ + + + + + + + + + + + + + + + + + + + + + + + No station with + + WIGOS id found. + + + + + diff --git a/xslts/remove_labels_descriptions.xslt b/xslts/remove_labels_descriptions.xslt new file mode 100644 index 0000000..e873be9 --- /dev/null +++ b/xslts/remove_labels_descriptions.xslt @@ -0,0 +1,37 @@ + + + + + + + + + + + + + + + + + + + + + + diff --git a/xslts/replace_id.xslt b/xslts/replace_id.xslt new file mode 100644 index 0000000..a8bf420 --- /dev/null +++ b/xslts/replace_id.xslt @@ -0,0 +1,59 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + -- 2.47.0