3 # import.sh - Import data wdef into Wikidata.
4 # Copyright (C) 2020-2021 Pierre Choffet
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of version 3 of the GNU General Public License as published
8 # by the Free Software Foundation.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
21 IMPORT_PLAN_XSLT_PATH
='xslts/get_next_step.xslt'
22 REPLACE_WDEF_ID_XSLT_PATH
='xslts/replace_id.xslt'
23 REMOVE_LABELS_DESCRIPTIONS_XSLT_PATH
='xslts/remove_labels_descriptions.xslt'
24 CANONICALIZE_WDEF_XSLT_PATH
='xslts/canonicalize.xslt'
27 NEW_ELEMENTS_LOG_PATH
="${2}"
29 function createEntity
{
30 local -r wdef_id
="${1}"
31 local -r entity_json
="${2}"
33 echo "→ wd create-entity ${entity_json}" > $(tty)
36 # Create entity and get ID afterwards:
37 local -r element_id
=$(wd create-entity "${entity_json}" | jq -r .entity.id)
39 echo "${wdef_id} = ${element_id}" >> "${NEW_ELEMENTS_LOG_PATH}"
42 local -r new_id_xml
=$(mktemp)
43 xmlstarlet
tr "${REPLACE_WDEF_ID_XSLT_PATH}" -s old
-id="${wdef_id}" -s new
-id="${element_id}" "${WDEF_PATH}" > "${new_id_xml}"
45 # Remove labels and descriptions
46 local -r reduced_xml
=$(mktemp)
47 xmlstarlet
tr "${REMOVE_LABELS_DESCRIPTIONS_XSLT_PATH}" -s wdef
-id="${element_id}" "${new_id_xml}" > "${reduced_xml}"
54 local -r wdef_id
="${1}"
56 local -r wd_pid
="${3}"
59 echo "→ wd add-claim ${wd_id} ${wd_pid} ${value}" > $(tty)
60 sleep $((3 + $RANDOM % 10))
62 # Create claim and get ID afterwards:
63 local -r claim_id
=$(wd add-claim "${wd_id}" "${wd_pid}" "${value}" | jq -r .claim.id)
65 local -r reduced_xml
=$(mktemp)
66 xmlstarlet
tr "${REPLACE_WDEF_ID_XSLT_PATH}" -s old
-id="${wdef_id}" -s new
-id="${claim_id}" "${WDEF_PATH}" > "${reduced_xml}"
71 function addQualifier
{
72 local -r qualifier_value_id
="${1}"
73 local -r value_id
="${2}"
74 local -r wd_pid
="${3}"
77 echo "→ wd add-qualifier ${value_id} ${wd_pid} ${value}" > $(tty)
78 sleep $((3 + $RANDOM % 10))
80 # Create qualifier and get ID afterwards:
81 local -r qualifier_id
=$(wd add-qualifier "${value_id}" "${wd_pid}" "${value}" | jq -r .claim.qualifiers.${wd_pid}[].hash)
83 local -r reduced_xml
=$(mktemp)
84 xmlstarlet
tr "${REPLACE_WDEF_ID_XSLT_PATH}" -s old
-id="${qualifier_value_id}" -s new
-id="${qualifier_id}" "${WDEF_PATH}" > "${reduced_xml}"
89 # Check user parameters
92 echo "${1} doesn't exist or is not readable."
96 # Track import generations
97 IMPORT_HISTORY_DIR
="$(mktemp -d)/"
100 echo "New elements corresponding IDs will be append in ${NEW_ELEMENTS_LOG_PATH}" >&2
101 echo "Import generations log will be in ${IMPORT_HISTORY_DIR}" >&2
104 cp "${WDEF_PATH}" "${IMPORT_HISTORY_DIR}${GENERATION}.xml"
105 WDEF_PATH
="${IMPORT_HISTORY_DIR}${GENERATION}.xml"
108 NEXT_STEP
=$(xmlstarlet tr "${IMPORT_PLAN_XSLT_PATH}" "${WDEF_PATH}" | head -1)
110 while [ "${NEXT_STEP}" != '' ]
112 WB_COMMAND
="${NEXT_STEP%% *}"
116 NEW_XML_PATH
=$(createEntity $(echo "${NEXT_STEP}" | cut -d ' ' -f2) "$(echo "${NEXT_STEP}" | cut -d ' ' -f3-)")
119 NEW_XML_PATH
=$(addClaim $(echo "${NEXT_STEP}" | cut -d ' ' -f2) "$(echo "${NEXT_STEP}" | cut -d ' ' -f3)" "$(echo "${NEXT_STEP}" | cut -d ' ' -f4)" "$(echo "${NEXT_STEP}" | cut -d ' ' -f5-)")
122 NEW_XML_PATH
=$(addClaim $(echo "${NEXT_STEP}" | cut -d ' ' -f2) "$(echo "${NEXT_STEP}" | cut -d ' ' -f3)" "$(echo "${NEXT_STEP}" | cut -d ' ' -f4)" '{"snaktype": "novalue"}')
125 NEW_XML_PATH
=$(addQualifier $(echo "${NEXT_STEP}" | cut -d ' ' -f2) "$(echo "${NEXT_STEP}" | cut -d ' ' -f3)" "$(echo "${NEXT_STEP}" | cut -d ' ' -f4)" "$(echo "${NEXT_STEP}" | cut -d ' ' -f5)")
128 echo "Unexpected \"${WB_COMMAND}\" command."
133 # Check returned string is path
134 if [ ! -s "${NEW_XML_PATH}" ]
139 # Generate new canonical version
140 CANONICALIZED_XML_PATH
="$(mktemp)"
141 xmlstarlet
tr "${CANONICALIZE_WDEF_XSLT_PATH}" "${NEW_XML_PATH}" > "${CANONICALIZED_XML_PATH}"
142 # rm "${NEW_XML_PATH}"
145 GENERATION
=$((GENERATION + 1))
146 WDEF_PATH
="${IMPORT_HISTORY_DIR}${GENERATION}.xml"
147 mv "${CANONICALIZED_XML_PATH}" "${WDEF_PATH}"
148 NEXT_STEP
=$(xmlstarlet tr "${IMPORT_PLAN_XSLT_PATH}" "${WDEF_PATH}" | head -1)