#!/bin/bash # import.sh - Import data wdef into Wikidata. # Copyright (C) 2020-2021 Pierre Choffet # # This program is free software: you can redistribute it and/or modify # it under the terms of version 3 of the GNU General Public License as published # by the Free Software Foundation. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . set -eu # Internals config IMPORT_PLAN_XSLT_PATH='xslts/get_next_step.xslt' REPLACE_WDEF_ID_XSLT_PATH='xslts/replace_id.xslt' REMOVE_LABELS_DESCRIPTIONS_XSLT_PATH='xslts/remove_labels_descriptions.xslt' CANONICALIZE_WDEF_XSLT_PATH='xslts/canonicalize.xslt' WDEF_PATH="${1}" NEW_ELEMENTS_LOG_PATH="${2}" function createEntity { local -r wdef_id="${1}" local -r entity_json="${2}" echo "→ wd create-entity ${entity_json}" > $(tty) sleep 5 # Create entity and get ID afterwards: local -r element_id=$(wd create-entity "${entity_json}" | jq -r .entity.id) echo "${wdef_id} = ${element_id}" >> "${NEW_ELEMENTS_LOG_PATH}" # Replace id local -r new_id_xml=$(mktemp) xmlstarlet tr "${REPLACE_WDEF_ID_XSLT_PATH}" -s old-id="${wdef_id}" -s new-id="${element_id}" "${WDEF_PATH}" > "${new_id_xml}" # Remove labels and descriptions local -r reduced_xml=$(mktemp) xmlstarlet tr "${REMOVE_LABELS_DESCRIPTIONS_XSLT_PATH}" -s wdef-id="${element_id}" "${new_id_xml}" > "${reduced_xml}" rm "${new_id_xml}" echo "${reduced_xml}" } function addClaim { local -r wdef_id="${1}" local -r wd_id="${2}" local -r wd_pid="${3}" local -r value="${4}" echo "→ wd add-claim ${wd_id} ${wd_pid} ${value}" > $(tty) sleep $((3 + $RANDOM % 10)) # Create claim and get ID afterwards: local -r claim_id=$(wd add-claim "${wd_id}" "${wd_pid}" "${value}" | jq -r .claim.id) local -r reduced_xml=$(mktemp) xmlstarlet tr "${REPLACE_WDEF_ID_XSLT_PATH}" -s old-id="${wdef_id}" -s new-id="${claim_id}" "${WDEF_PATH}" > "${reduced_xml}" echo "${reduced_xml}" } function addQualifier { local -r qualifier_value_id="${1}" local -r value_id="${2}" local -r wd_pid="${3}" local -r value="${4}" echo "→ wd add-qualifier ${value_id} ${wd_pid} ${value}" > $(tty) sleep $((3 + $RANDOM % 10)) # Create qualifier and get ID afterwards: local -r qualifier_id=$(wd add-qualifier "${value_id}" "${wd_pid}" "${value}" | jq -r .claim.qualifiers.${wd_pid}[].hash) local -r reduced_xml=$(mktemp) xmlstarlet tr "${REPLACE_WDEF_ID_XSLT_PATH}" -s old-id="${qualifier_value_id}" -s new-id="${qualifier_id}" "${WDEF_PATH}" > "${reduced_xml}" echo "${reduced_xml}" } # Check user parameters if [ ! -s "${1}" ] then echo "${1} doesn't exist or is not readable." exit 1 fi # Track import generations IMPORT_HISTORY_DIR="$(mktemp -d)/" GENERATION=1 echo "New elements corresponding IDs will be append in ${NEW_ELEMENTS_LOG_PATH}" >&2 echo "Import generations log will be in ${IMPORT_HISTORY_DIR}" >&2 # Prepare first step cp "${WDEF_PATH}" "${IMPORT_HISTORY_DIR}${GENERATION}.xml" WDEF_PATH="${IMPORT_HISTORY_DIR}${GENERATION}.xml" # Get first step NEXT_STEP=$(xmlstarlet tr "${IMPORT_PLAN_XSLT_PATH}" "${WDEF_PATH}" | head -1) while [ "${NEXT_STEP}" != '' ] do WB_COMMAND="${NEXT_STEP%% *}" case $WB_COMMAND in create-entity) NEW_XML_PATH=$(createEntity $(echo "${NEXT_STEP}" | cut -d ' ' -f2) "$(echo "${NEXT_STEP}" | cut -d ' ' -f3-)") ;; add-claim) NEW_XML_PATH=$(addClaim $(echo "${NEXT_STEP}" | cut -d ' ' -f2) "$(echo "${NEXT_STEP}" | cut -d ' ' -f3)" "$(echo "${NEXT_STEP}" | cut -d ' ' -f4)" "$(echo "${NEXT_STEP}" | cut -d ' ' -f5-)") ;; add-claim-no-value) NEW_XML_PATH=$(addClaim $(echo "${NEXT_STEP}" | cut -d ' ' -f2) "$(echo "${NEXT_STEP}" | cut -d ' ' -f3)" "$(echo "${NEXT_STEP}" | cut -d ' ' -f4)" '{"snaktype": "novalue"}') ;; add-qualifier) NEW_XML_PATH=$(addQualifier $(echo "${NEXT_STEP}" | cut -d ' ' -f2) "$(echo "${NEXT_STEP}" | cut -d ' ' -f3)" "$(echo "${NEXT_STEP}" | cut -d ' ' -f4)" "$(echo "${NEXT_STEP}" | cut -d ' ' -f5)") ;; *) echo "Unexpected \"${WB_COMMAND}\" command." exit 1 ;; esac # Check returned string is path if [ ! -s "${NEW_XML_PATH}" ] then exit 1 fi # Generate new canonical version CANONICALIZED_XML_PATH="$(mktemp)" xmlstarlet tr "${CANONICALIZE_WDEF_XSLT_PATH}" "${NEW_XML_PATH}" > "${CANONICALIZED_XML_PATH}" # rm "${NEW_XML_PATH}" # Prepare next step GENERATION=$((GENERATION + 1)) WDEF_PATH="${IMPORT_HISTORY_DIR}${GENERATION}.xml" mv "${CANONICALIZED_XML_PATH}" "${WDEF_PATH}" NEXT_STEP=$(xmlstarlet tr "${IMPORT_PLAN_XSLT_PATH}" "${WDEF_PATH}" | head -1) done