]> Pierre Choffet | Git repositories - wmo_to_wikidata.git/blob - import.sh
Add WMO demonstration tools
[wmo_to_wikidata.git] / import.sh
1 #!/bin/bash
2
3 # import.sh - Import data wdef into Wikidata.
4 # Copyright (C) 2020-2021 Pierre Choffet
5 #
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of version 3 of the GNU General Public License as published
8 # by the Free Software Foundation.
9 #
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
14 #
15 # You should have received a copy of the GNU General Public License
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
17
18 set -eu
19
20 # Internals config
21 IMPORT_PLAN_XSLT_PATH='xslts/get_next_step.xslt'
22 REPLACE_WDEF_ID_XSLT_PATH='xslts/replace_id.xslt'
23 REMOVE_LABELS_DESCRIPTIONS_XSLT_PATH='xslts/remove_labels_descriptions.xslt'
24 CANONICALIZE_WDEF_XSLT_PATH='xslts/canonicalize.xslt'
25
26 WDEF_PATH="${1}"
27 NEW_ELEMENTS_LOG_PATH="${2}"
28
29 function createEntity {
30 local -r wdef_id="${1}"
31 local -r entity_json="${2}"
32
33 echo "→ wd create-entity ${entity_json}" > $(tty)
34 sleep 5
35
36 # Create entity and get ID afterwards:
37 local -r element_id=$(wd create-entity "${entity_json}" | jq -r .entity.id)
38
39 echo "${wdef_id} = ${element_id}" >> "${NEW_ELEMENTS_LOG_PATH}"
40
41 # Replace id
42 local -r new_id_xml=$(mktemp)
43 xmlstarlet tr "${REPLACE_WDEF_ID_XSLT_PATH}" -s old-id="${wdef_id}" -s new-id="${element_id}" "${WDEF_PATH}" > "${new_id_xml}"
44
45 # Remove labels and descriptions
46 local -r reduced_xml=$(mktemp)
47 xmlstarlet tr "${REMOVE_LABELS_DESCRIPTIONS_XSLT_PATH}" -s wdef-id="${element_id}" "${new_id_xml}" > "${reduced_xml}"
48 rm "${new_id_xml}"
49
50 echo "${reduced_xml}"
51 }
52
53 function addClaim {
54 local -r wdef_id="${1}"
55 local -r wd_id="${2}"
56 local -r wd_pid="${3}"
57 local -r value="${4}"
58
59 echo "→ wd add-claim ${wd_id} ${wd_pid} ${value}" > $(tty)
60 sleep $((3 + $RANDOM % 10))
61
62 # Create claim and get ID afterwards:
63 local -r claim_id=$(wd add-claim "${wd_id}" "${wd_pid}" "${value}" | jq -r .claim.id)
64
65 local -r reduced_xml=$(mktemp)
66 xmlstarlet tr "${REPLACE_WDEF_ID_XSLT_PATH}" -s old-id="${wdef_id}" -s new-id="${claim_id}" "${WDEF_PATH}" > "${reduced_xml}"
67
68 echo "${reduced_xml}"
69 }
70
71 function addQualifier {
72 local -r qualifier_value_id="${1}"
73 local -r value_id="${2}"
74 local -r wd_pid="${3}"
75 local -r value="${4}"
76
77 echo "→ wd add-qualifier ${value_id} ${wd_pid} ${value}" > $(tty)
78 sleep $((3 + $RANDOM % 10))
79
80 # Create qualifier and get ID afterwards:
81 local -r qualifier_id=$(wd add-qualifier "${value_id}" "${wd_pid}" "${value}" | jq -r .claim.qualifiers.${wd_pid}[].hash)
82
83 local -r reduced_xml=$(mktemp)
84 xmlstarlet tr "${REPLACE_WDEF_ID_XSLT_PATH}" -s old-id="${qualifier_value_id}" -s new-id="${qualifier_id}" "${WDEF_PATH}" > "${reduced_xml}"
85
86 echo "${reduced_xml}"
87 }
88
89 # Check user parameters
90 if [ ! -s "${1}" ]
91 then
92 echo "${1} doesn't exist or is not readable."
93 exit 1
94 fi
95
96 # Track import generations
97 IMPORT_HISTORY_DIR="$(mktemp -d)/"
98 GENERATION=1
99
100 echo "New elements corresponding IDs will be append in ${NEW_ELEMENTS_LOG_PATH}" >&2
101 echo "Import generations log will be in ${IMPORT_HISTORY_DIR}" >&2
102
103 # Prepare first step
104 cp "${WDEF_PATH}" "${IMPORT_HISTORY_DIR}${GENERATION}.xml"
105 WDEF_PATH="${IMPORT_HISTORY_DIR}${GENERATION}.xml"
106
107 # Get first step
108 NEXT_STEP=$(xmlstarlet tr "${IMPORT_PLAN_XSLT_PATH}" "${WDEF_PATH}" | head -1)
109
110 while [ "${NEXT_STEP}" != '' ]
111 do
112 WB_COMMAND="${NEXT_STEP%% *}"
113
114 case $WB_COMMAND in
115 create-entity)
116 NEW_XML_PATH=$(createEntity $(echo "${NEXT_STEP}" | cut -d ' ' -f2) "$(echo "${NEXT_STEP}" | cut -d ' ' -f3-)")
117 ;;
118 add-claim)
119 NEW_XML_PATH=$(addClaim $(echo "${NEXT_STEP}" | cut -d ' ' -f2) "$(echo "${NEXT_STEP}" | cut -d ' ' -f3)" "$(echo "${NEXT_STEP}" | cut -d ' ' -f4)" "$(echo "${NEXT_STEP}" | cut -d ' ' -f5-)")
120 ;;
121 add-claim-no-value)
122 NEW_XML_PATH=$(addClaim $(echo "${NEXT_STEP}" | cut -d ' ' -f2) "$(echo "${NEXT_STEP}" | cut -d ' ' -f3)" "$(echo "${NEXT_STEP}" | cut -d ' ' -f4)" '{"snaktype": "novalue"}')
123 ;;
124 add-qualifier)
125 NEW_XML_PATH=$(addQualifier $(echo "${NEXT_STEP}" | cut -d ' ' -f2) "$(echo "${NEXT_STEP}" | cut -d ' ' -f3)" "$(echo "${NEXT_STEP}" | cut -d ' ' -f4)" "$(echo "${NEXT_STEP}" | cut -d ' ' -f5)")
126 ;;
127 *)
128 echo "Unexpected \"${WB_COMMAND}\" command."
129 exit 1
130 ;;
131 esac
132
133 # Check returned string is path
134 if [ ! -s "${NEW_XML_PATH}" ]
135 then
136 exit 1
137 fi
138
139 # Generate new canonical version
140 CANONICALIZED_XML_PATH="$(mktemp)"
141 xmlstarlet tr "${CANONICALIZE_WDEF_XSLT_PATH}" "${NEW_XML_PATH}" > "${CANONICALIZED_XML_PATH}"
142 # rm "${NEW_XML_PATH}"
143
144 # Prepare next step
145 GENERATION=$((GENERATION + 1))
146 WDEF_PATH="${IMPORT_HISTORY_DIR}${GENERATION}.xml"
147 mv "${CANONICALIZED_XML_PATH}" "${WDEF_PATH}"
148 NEXT_STEP=$(xmlstarlet tr "${IMPORT_PLAN_XSLT_PATH}" "${WDEF_PATH}" | head -1)
149 done