]> Pierre Choffet | Git repositories - wdef_tools.git/commitdiff
Add import tools
authorPierre Choffet <peuc@wanadoo.fr>
Thu, 19 Mar 2026 15:12:42 +0000 (11:12 -0400)
committerPierre Choffet <peuc@wanadoo.fr>
Thu, 19 Mar 2026 15:13:50 +0000 (11:13 -0400)
README
scripts/import.sh [new file with mode: 0755]
xslts/get_next_step.xslt [new file with mode: 0644]

diff --git a/README b/README
index 6bf2fb6f2a282e242944e0f23e1352473f94d7be..1c90c2cc16c03e861b9d3377bcf7fb8affc57d33 100644 (file)
--- a/README
+++ b/README
@@ -10,6 +10,9 @@ applying on non-compliant file is undefined.
 Runtime dependencies are:
   - Bash - https://www.gnu.org/software/bash/
     Shell script interpreter
+  
+  - jq - https://jqlang.org/
+    JSON parser
 
   - Wikibase-cli - https://github.com/maxlath/wikibase-cli
     Interface to Wikidata operations
@@ -25,6 +28,9 @@ Description of the provided tools:
   - scripts/get_qid_from_property.sh
     Search Wikidata elements based on a value, return its QID when found.
 
+  - scripts/import.sh
+    Import a wdef file into Wikidata.
+
   - xslts/canonicalize.xslt
     Return a wdef under its normal form.
 
@@ -38,4 +44,4 @@ Description of the provided tools:
     Change the wdef:id of an element and its references.
   
   - xslts/report_label_description_duplicates.xslt
-    Detect elements with duplicate labels/descriptions (this is illegal in Wikidata)
+    Detect elements with duplicate labels/descriptions (this is illegal in Wikidata).
diff --git a/scripts/import.sh b/scripts/import.sh
new file mode 100755 (executable)
index 0000000..778c2f8
--- /dev/null
@@ -0,0 +1,168 @@
+#!/bin/bash
+
+# import.sh - Import data wdef into Wikidata.
+# Copyright (C) 2020, 2021, 2023  Pierre Choffet
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of version 3 of the GNU General Public License as published
+# by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+readonly SCRIPT_DIR="$(dirname "$0")"
+
+set -euo pipefail
+
+readonly GET_NEXT_STEP_XSLT_PATH="${SCRIPT_DIR}/../xslts/get_next_step.xslt"
+readonly REPLACE_ID_XSLT_PATH="${SCRIPT_DIR}/../xslts/replace_id.xslt"
+readonly REMOVE_LABELS_DESCRIPTIONS_XSLT_PATH="${SCRIPT_DIR}/../xslts/remove_labels_descriptions.xslt"
+readonly CANONICALIZE_XSLT_PATH="${SCRIPT_DIR}/../xslts/canonicalize.xslt"
+
+function usage() {
+       cat << EOF
+USAGE: import.sh <wdef_path> <new_elements_qids_path>
+
+Import a wdef file into Wikidata. The new elements qids are appended into the
+given "new_elements_qids_path".
+EOF
+}
+
+function createEntity {
+       local -r wdef_id="${1}"
+       local -r entity_json="${2}"
+       
+       echo "→ wd create-entity ${entity_json}" >&2
+       sleep 2
+       
+       # Create entity and get ID afterwards:
+       local -r element_id=$(wd create-entity "${entity_json}" | jq -r .entity.id)
+       
+       echo "${wdef_id} = ${element_id}" >> "${NEW_ELEMENTS_LOG_PATH}"
+       
+       # Replace id
+       local -r new_id_xml=$(mktemp)
+       xmlstarlet tr "${REPLACE_ID_XSLT_PATH}" -s old-id="${wdef_id}" -s new-id="${element_id}" "${WDEF_PATH}" > "${new_id_xml}"
+       
+       # Remove labels and descriptions
+       local -r reduced_xml=$(mktemp)
+       xmlstarlet tr "${REMOVE_LABELS_DESCRIPTIONS_XSLT_PATH}" -s wdef-id="${element_id}" "${new_id_xml}" > "${reduced_xml}"
+       rm "${new_id_xml}"
+       
+       echo "${reduced_xml}"
+}
+
+function addClaim {
+       local -r wdef_id="${1}"
+       local -r wd_id="${2}"
+       local -r wd_pid="${3}"
+       local -r value="${4}"
+       
+       echo "→ wd add-claim ${wd_id} ${wd_pid} ${value}" >&2
+       sleep 1
+       
+       # Create claim and get ID afterwards:
+       local -r claim_id=$(wd add-claim "${wd_id}" "${wd_pid}" "${value}" | jq -r .claim.id)
+       
+       local -r reduced_xml=$(mktemp)
+       xmlstarlet tr "${REPLACE_ID_XSLT_PATH}" -s old-id="${wdef_id}" -s new-id="${claim_id}" "${WDEF_PATH}" > "${reduced_xml}"
+       
+       echo "${reduced_xml}"
+}
+
+function addQualifier {
+       local -r qualifier_value_id="${1}"
+       local -r value_id="${2}"
+       local -r wd_pid="${3}"
+       local -r value="${4}"
+       
+       echo "→ wd add-qualifier ${value_id} ${wd_pid} ${value}" >&2
+       sleep 1
+       
+       # Create qualifier and get ID afterwards:
+       local -r qualifier_id=$(wd add-qualifier "${value_id}" "${wd_pid}" "${value}" | jq -r .claim.qualifiers.${wd_pid}[].hash)
+       
+       local -r reduced_xml=$(mktemp)
+       xmlstarlet tr "${REPLACE_ID_XSLT_PATH}" -s old-id="${qualifier_value_id}" -s new-id="${qualifier_id}" "${WDEF_PATH}" > "${reduced_xml}"
+       
+       echo "${reduced_xml}"
+}
+
+if [ "$#" -ne 2 ]
+then
+       usage >&2
+       exit 1
+fi
+
+WDEF_PATH="${1}"
+readonly NEW_ELEMENTS_LOG_PATH="${2}"
+
+# Check user parameters
+if [ ! -s "${1}" ]
+then
+       echo "${1} doesn't exist or is not readable."
+       exit 1
+fi
+
+# Track import generations
+IMPORT_HISTORY_DIR="$(mktemp -d)/"
+GENERATION=1
+
+echo "New elements corresponding IDs will be append in ${NEW_ELEMENTS_LOG_PATH}" >&2
+echo "Import generations log will be in ${IMPORT_HISTORY_DIR}" >&2
+
+# Prepare first step
+cp "${WDEF_PATH}" "${IMPORT_HISTORY_DIR}${GENERATION}.xml"
+WDEF_PATH="${IMPORT_HISTORY_DIR}${GENERATION}.xml"
+
+# Get first step
+NEXT_STEP=$(head -1 <(xmlstarlet tr "${GET_NEXT_STEP_XSLT_PATH}" "${WDEF_PATH}"))
+
+while [ "${NEXT_STEP}" != '' ]
+do
+       WB_COMMAND="${NEXT_STEP%% *}"
+
+       case $WB_COMMAND in
+               create-entity)
+                       NEW_XML_PATH=$(createEntity $(echo "${NEXT_STEP}" | cut -d ' ' -f2) "$(echo "${NEXT_STEP}" | cut -d ' ' -f3-)")
+                       ;;
+               add-claim)
+                       NEW_XML_PATH=$(addClaim $(echo "${NEXT_STEP}" | cut -d ' ' -f2) "$(echo "${NEXT_STEP}" | cut -d ' ' -f3)" "$(echo "${NEXT_STEP}" | cut -d ' ' -f4)" "$(echo "${NEXT_STEP}" | cut -d ' ' -f5-)")
+                       ;;
+               add-claim-no-value)
+                       NEW_XML_PATH=$(addClaim $(echo "${NEXT_STEP}" | cut -d ' ' -f2) "$(echo "${NEXT_STEP}" | cut -d ' ' -f3)" "$(echo "${NEXT_STEP}" | cut -d ' ' -f4)" '{"snaktype": "novalue"}')
+                       ;;
+               add-qualifier)
+                       NEW_XML_PATH=$(addQualifier $(echo "${NEXT_STEP}" | cut -d ' ' -f2) "$(echo "${NEXT_STEP}" | cut -d ' ' -f3)" "$(echo "${NEXT_STEP}" | cut -d ' ' -f4)" "$(echo "${NEXT_STEP}" | cut -d ' ' -f5)")
+                       ;;
+               *)
+                       echo "Unexpected \"${WB_COMMAND}\" command."
+                       exit 1
+                       ;;
+       esac
+       
+       # Check returned string is path
+       if [ ! -s "${NEW_XML_PATH}" ]
+       then
+               exit 1
+       fi
+       
+       # Delete last generation wdef file
+       rm "${WDEF_PATH}"
+       
+       # Generate new canonical version
+       CANONICALIZED_XML_PATH="$(mktemp)"
+       xmlstarlet tr "${CANONICALIZE_XSLT_PATH}" "${NEW_XML_PATH}" > "${CANONICALIZED_XML_PATH}"
+       rm "${NEW_XML_PATH}"
+       
+       # Prepare next step
+       GENERATION=$((GENERATION + 1))
+       WDEF_PATH="${IMPORT_HISTORY_DIR}${GENERATION}.xml"
+       mv "${CANONICALIZED_XML_PATH}" "${WDEF_PATH}"
+       NEXT_STEP=$(head -1 <(xmlstarlet tr "${GET_NEXT_STEP_XSLT_PATH}" "${WDEF_PATH}"))
+done
diff --git a/xslts/get_next_step.xslt b/xslts/get_next_step.xslt
new file mode 100644 (file)
index 0000000..b46e5ec
--- /dev/null
@@ -0,0 +1,244 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE wdef:knowledge[
+       <!ENTITY AP "&apos;">
+]>
+
+<xsl:stylesheet version="1.0" exclude-result-prefixes=""
+                xmlns:wdef="https://purl.choffet.net/wdef"
+                xmlns:xml="http://www.w3.org/XML/1998/namespace"
+                xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+<!-- get_next_step.xslt - Output import next step one-liner.
+     Copyright (C) 2020, 2021, 2022  Pierre Choffet
+
+     This program is free software: you can redistribute it and/or modify
+     it under the terms of version 3 of the GNU General Public License as
+     published by the Free Software Foundation.
+
+     This program is distributed in the hope that it will be useful,
+     but WITHOUT ANY WARRANTY; without even the implied warranty of
+     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+     GNU General Public License for more details.
+
+     You should have received a copy of the GNU General Public License
+     along with this program.  If not, see <http://www.gnu.org/licenses/>.
+     -->
+       <xsl:output method="text" encoding="utf-8" indent="yes" />
+       
+       <xsl:template match="text()" />
+       
+       <xsl:template match="/wdef:knowledge/wdef:element[substring(@wdef:id, 1, 1) = '?']">
+               <xsl:text>create-entity </xsl:text>
+               <xsl:value-of select="@wdef:id" />
+               <xsl:text> {</xsl:text>
+               <xsl:if test="wdef:label">
+                       <xsl:text>"labels":{</xsl:text>
+                               <xsl:apply-templates select="wdef:label" />
+                       <xsl:text>}</xsl:text>
+               </xsl:if>
+               <xsl:if test="wdef:label and wdef:description">
+                       <xsl:text>, </xsl:text>
+               </xsl:if>
+               <xsl:if test="wdef:description">
+                       <xsl:text>"descriptions":{</xsl:text>
+                               <xsl:apply-templates select="wdef:description" />
+                       <xsl:text>}</xsl:text>
+               </xsl:if>
+               <xsl:text>}&#xa;</xsl:text>
+       </xsl:template>
+       
+       <xsl:template match="wdef:label">
+               <xsl:variable name="text">
+                       <xsl:call-template name="escape-quotes">
+                               <xsl:with-param name="text" select="." />
+                       </xsl:call-template>
+               </xsl:variable>
+               
+               <xsl:text>"</xsl:text>
+               <xsl:value-of select="@wdef:interface-lang" />
+               <xsl:text>": "</xsl:text>
+               <xsl:value-of select="$text" />
+               <xsl:text>"</xsl:text>
+               <xsl:if test="position() != last()">
+                       <xsl:text>, </xsl:text>
+               </xsl:if>
+       </xsl:template>
+       
+       <xsl:template match="wdef:description">
+               <xsl:variable name="text">
+                       <xsl:call-template name="escape-quotes">
+                               <xsl:with-param name="text" select="." />
+                       </xsl:call-template>
+               </xsl:variable>
+               <xsl:text>"</xsl:text>
+               <xsl:value-of select="@wdef:interface-lang" />
+               <xsl:text>": "</xsl:text>
+               <xsl:value-of select="$text" />
+               <xsl:text>"</xsl:text>
+               <xsl:if test="position() != last()">
+                       <xsl:text>, </xsl:text>
+               </xsl:if>
+       </xsl:template>
+       
+       <xsl:template match="/wdef:knowledge/wdef:element[substring(@wdef:id, 1, 1) != '?']/wdef:property/wdef:value[substring(@wdef:id, 1, 1) = '?' and * and not(.//ref-element[substring(., 1, 1) = '?'])]">
+               <xsl:text>add-claim </xsl:text>
+               <xsl:value-of select="@wdef:id" />
+               <xsl:text> </xsl:text>
+               <xsl:value-of select="../../@wdef:id" />
+               <xsl:text> </xsl:text>
+               <xsl:value-of select="../@wdef:pid" />
+               <xsl:text> </xsl:text>
+               
+               <xsl:choose>
+                       <xsl:when test="wdef:literal">
+                               <xsl:value-of select="wdef:literal" />
+                       </xsl:when>
+                       <xsl:when test="wdef:quantity">
+                               <xsl:text>{"amount": "</xsl:text>
+                               <xsl:value-of select="wdef:quantity" />
+                               <xsl:text>", "unit": "</xsl:text>
+                               <xsl:value-of select="wdef:quantity/@wdef:unit" />
+                               <xsl:text>"}</xsl:text>
+                       </xsl:when>
+                       <xsl:when test="wdef:qualifier"></xsl:when>
+                       <xsl:when test="wdef:ref-element">
+                               <xsl:value-of select="wdef:ref-element" />
+                       </xsl:when>
+                       <xsl:when test="wdef:time">
+                               <xsl:text>{"time": "</xsl:text>
+                               <xsl:choose>
+                                       <xsl:when test="wdef:time/@wdef:precision = '11'">
+                                               <xsl:value-of select="substring(wdef:time, 2, 10)" />
+                                       </xsl:when>
+                                       <xsl:when test="wdef:time/@wdef:precision = '10'">
+                                               <xsl:value-of select="substring(wdef:time, 2, 7)" />
+                                       </xsl:when>
+                                       <xsl:when test="wdef:time/@wdef:precision = '9'">
+                                               <xsl:value-of select="substring(wdef:time, 2, 4)" />
+                                       </xsl:when>
+                                       <xsl:otherwise>
+                                               <xsl:message terminate="yes">
+                                                       <xsl:text>Cannot import time with precision"</xsl:text>
+                                                       <xsl:value-of select="wdef:time/@wdef:precision" />
+                                                       <xsl:text>" for now. Exiting.</xsl:text>
+                                               </xsl:message>
+                                       </xsl:otherwise>
+                               </xsl:choose>
+                               <xsl:text>", "precision": </xsl:text>
+                               <xsl:value-of select="wdef:time/@wdef:precision" />
+                               <xsl:text>, "calendar": "</xsl:text>
+                               <xsl:value-of select="wdef:time/@wdef:calendar" />
+                               <xsl:text>"}</xsl:text>
+                       </xsl:when>
+                       <xsl:when test="wdef:translation">
+                               <xsl:text>{"language": "</xsl:text>
+                               <xsl:value-of select="wdef:translation/@xml:lang" />
+                               <xsl:text>", "text": "</xsl:text>
+                               <xsl:value-of select="wdef:translation" />
+                               <xsl:text>"}</xsl:text>
+                       </xsl:when>
+                       <xsl:when test="wdef:coordinate">
+                               <xsl:text>{"latitude": </xsl:text>
+                               <xsl:value-of select="wdef:coordinate/@wdef:latitude" />
+                               <xsl:text>, "longitude": </xsl:text>
+                               <xsl:value-of select="wdef:coordinate/@wdef:longitude" />
+                               <xsl:text>, "precision": </xsl:text>
+                               <xsl:value-of select="wdef:coordinate/@wdef:precision" />
+                               <xsl:text>, "globe": "http://www.wikidata.org/entity/</xsl:text>
+                               <xsl:value-of select="wdef:coordinate/wdef:ref-element" />
+                               <xsl:text>"}</xsl:text>
+                       </xsl:when>
+                       <xsl:otherwise>
+                               <xsl:message terminate="yes">
+                                       <xsl:text>Cannot import "</xsl:text>
+                                       <xsl:value-of select="name(*[1])" />
+                                       <xsl:text>" type for now. Exiting.</xsl:text>
+                               </xsl:message>
+                       </xsl:otherwise>
+               </xsl:choose>
+               
+               <xsl:text>&#xa;</xsl:text>
+       </xsl:template>
+       
+       <xsl:template match="/wdef:knowledge/wdef:element[substring(@wdef:id, 1, 1) != '?']/wdef:property/wdef:novalue[substring(@wdef:id, 1, 1) = '?']">
+               <xsl:text>add-claim-no-value </xsl:text>
+               <xsl:value-of select="@wdef:id" />
+               <xsl:text> </xsl:text>
+               <xsl:value-of select="../../@wdef:id" />
+               <xsl:text> </xsl:text>
+               <xsl:value-of select="../@wdef:pid" />
+               <xsl:text>&#xa;</xsl:text>
+       </xsl:template>
+       
+       <xsl:template match="/wdef:knowledge/wdef:element[substring(@wdef:id, 1, 1) != '?']/wdef:property/wdef:value[substring(@wdef:id, 1, 1) != '?']/wdef:qualifier/wdef:property/wdef:value | /wdef:knowledge/wdef:element[substring(@wdef:id, 1, 1) != '?']/wdef:property/wdef:novalue[substring(@wdef:id, 1, 1) != '?']/wdef:qualifier/wdef:property/wdef:value">
+               <xsl:text>add-qualifier </xsl:text>
+               <xsl:value-of select="@wdef:id" />
+               <xsl:text> </xsl:text>
+               <xsl:value-of select="../../../@wdef:id" />
+               <xsl:text> </xsl:text>
+               <xsl:value-of select="../@wdef:pid" />
+               <xsl:text> </xsl:text>
+               <xsl:choose>
+                       <xsl:when test="wdef:literal">
+                               <xsl:value-of select="wdef:literal" />
+                       </xsl:when>
+                       <xsl:when test="wdef:quantity">
+                               <xsl:text>{"amount": "</xsl:text>
+                               <xsl:value-of select="wdef:quantity" />
+                               <xsl:text>", "unit": "</xsl:text>
+                               <xsl:value-of select="wdef:quantity/@wdef:unit" />
+                               <xsl:text>"}</xsl:text>
+                       </xsl:when>
+                       <xsl:when test="wdef:ref-element">
+                               <xsl:value-of select="wdef:ref-element" />
+                       </xsl:when>
+                       <xsl:when test="wdef:time">
+                               <xsl:text>{"time": "</xsl:text>
+                               <xsl:value-of select="substring(wdef:time, 2)" />
+                               <xsl:text>", "precision": "</xsl:text>
+                               <xsl:value-of select="wdef:time/@wdef:precision" />
+                               <xsl:text>", "calendar": "</xsl:text>
+                               <xsl:value-of select="wdef:time/@wdef:calendar" />
+                               <xsl:text>"}</xsl:text>
+                       </xsl:when>
+                       <xsl:otherwise>
+                               <xsl:message terminate="yes">
+                                       <xsl:text>Cannot import "</xsl:text>
+                                       <xsl:value-of select="name(*[1])" />
+                                       <xsl:text>" type for now. Exiting.</xsl:text>
+                               </xsl:message>
+                       </xsl:otherwise>
+               </xsl:choose>
+               <xsl:text>&#xa;</xsl:text>
+       </xsl:template>
+       
+       <xsl:template name="escape-quotes">
+               <xsl:param name="text" />
+               
+               <xsl:call-template name="escape-target">
+                       <xsl:with-param name="text" select="$text" />
+                       <xsl:with-param name="target" select="'&quot;'" />
+                       <xsl:with-param name="escaper" select="'\'" />
+               </xsl:call-template>
+       </xsl:template>
+
+       <xsl:template name="escape-target">
+               <xsl:param name="text" />
+               <xsl:param name="target" />
+               <xsl:param name="escaper" />
+               
+               <xsl:choose>
+                       <xsl:when test="contains($text, $target)">
+                               <xsl:value-of select="substring-before($text, $target)" />
+                               <xsl:value-of select="concat($escaper, $target)" />
+                               <xsl:call-template name="escape-target">
+                                       <xsl:with-param name="text" select="substring-after($text, $target)" />
+                                       <xsl:with-param name="target" select="$target" />
+                                       <xsl:with-param name="escaper" select="$escaper" />
+                               </xsl:call-template>
+                       </xsl:when>
+                       <xsl:otherwise>
+                               <xsl:value-of select="$text" />
+                       </xsl:otherwise>
+               </xsl:choose>
+       </xsl:template>
+</xsl:stylesheet>