]> Pierre Choffet | Git repositories - wdef_tools.git/blob - scripts/query.sh
Add script to compare with Wikidata’s RDF
[wdef_tools.git] / scripts / query.sh
1 #!/bin/bash
2
3 # query.sh - Set of Bash functions to work with wdef files.
4 # Copyright (C) 2022 Pierre Choffet
5 #
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of version 3 of the GNU General Public License as
8 # published by the Free Software Foundation.
9 #
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
14 #
15 # You should have received a copy of the GNU General Public License
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
17
18 set -euo pipefail
19
20 readonly WIKIDATA_SPARQL_ENDPOINT='https://query.wikidata.org/bigdata/namespace/wdq/sparql'
21
22 readonly CACHE_DIR=${CACHE_DIR:-"${HOME}/.cache/wdef_tools/"}
23 readonly QUERY_CACHE_DIR="${CACHE_DIR}/queries/"
24
25 # Send query and return a path to the result into cache dir
26 # Parameter:
27 # $1: Path to file containing query
28 # Output:
29 # Path to the file containing result, in cache dir
30 function query() {
31 local -r query_path="${1}"
32
33 if [ ! -s "${query_path}" ]
34 then
35 echo "Query not found at path ${query_path}. Exiting." >&2
36 return 1
37 fi
38
39 local -r query_checksum=$(md5sum "${query_path}" | cut -d ' ' -f 1)
40 local -r result_path="${QUERY_CACHE_DIR}/${query_checksum}"
41
42 # Prepare query
43 local -r query=$(cat "${query_path}" | jq -sRr @uri)
44
45 # Send query, cache result
46 mkdir -p "${QUERY_CACHE_DIR}"
47 wget -O "${result_path}" "${WIKIDATA_SPARQL_ENDPOINT}?query=${query}"
48
49 echo "${result_path}"
50 }
51
52 # Build query from template and a set of variables
53 # Params:
54 # $1: Path to file containing query template
55 # $2: Associative array with variables values, indexed on their names
56 # Output:
57 # Path to the file containing result, in cache dir
58 function queryVariables() {
59 local -r template_path="${1}"
60 local -nr variables=${2}
61
62 # Check parameters
63 if [ ! -s "${template_path}" ]
64 then
65 echo "Query template not found at path ${template_path}. Exiting." >&2
66 return 1
67 fi
68 if [ ${#variables[@]} -eq 0 ]
69 then
70 echo "Missing query variables. Exiting." >&2
71 return 1
72 fi
73
74 local -r template_checksum=$(md5sum "${template_path}" | cut -d ' ' -f 1)
75 local -r variables_checksum=$(echo "$(typeset -p ${2})" | md5sum - | cut -d ' ' -f 1)
76
77 # Build query
78 local query=$(cat "${template_path}")
79 for var_name in ${!variables[@]}
80 do
81 query=${query//\%${var_name}\%/${variables[${var_name}]}}
82 done
83
84 # Save query into a temporary file
85 local -r query_path="$(mktemp)"
86 echo "${query}" > "${query_path}"
87
88 # Check at least one variable has been substituted (file name collision otherwise)
89 if [ "$(md5sum "${query_path}" | cut -d ' ' -f 1)" == "${template_checksum}" ]
90 then
91 echo "No variable substituted. Exiting." >&2
92 return 1
93 fi
94
95 # Send query
96 local -r query_result_path=$(query "${query_path}")
97
98 # Generate query result cache path, create dir if needed
99 local -r query_cache_path="${QUERY_CACHE_DIR}/${template_checksum}/${variables_checksum}"
100 mkdir -p "$(dirname "${query_cache_path}")"
101
102 # Link result
103 if [ ! -f "${query_cache_path}" ]
104 then
105 ln "${query_result_path}" "${query_cache_path}"
106 fi
107
108 # Remove query
109 rm "${query_path}"
110
111 # Return result path
112 echo "${query_cache_path}"
113 }