]> Pierre Choffet | Git repositories - wdef_tools.git/blob - xslts/merge_rdf.xslt
Add script to compare with Wikidata’s RDF
[wdef_tools.git] / xslts / merge_rdf.xslt
1 <?xml version="1.0" encoding="UTF-8"?>
2 <!-- merge_rdf.xslt - Merge Wikidata element properties from its RDF.
3 Copyright (C) 2020, 2021, 2022, 2023 Pierre Choffet
4
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of version 3 of the GNU General Public License as
7 published by the Free Software Foundation.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>.
16 -->
17
18 <!-- LIMITATIONS:
19 - If WD already has P31, we don't use our value to prevent subclasses to be added
20 -->
21 <xsl:stylesheet version="1.0" exclude-result-prefixes=""
22 xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
23 xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
24 xmlns:schema="http://schema.org/"
25 xmlns:wdef="https://purl.choffet.net/wdef"
26 xmlns:wdt="http://www.wikidata.org/prop/direct/"
27 xmlns:wikibase="http://wikiba.se/ontology#"
28 xmlns:xml="http://www.w3.org/XML/1998/namespace"
29 xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
30 <xsl:output method="xml" encoding="utf-8" indent="yes" />
31 <xsl:strip-space elements="*" />
32
33 <xsl:param name="action" select='reduce' />
34 <xsl:param name="rdf-path" />
35
36 <xsl:variable name="wd-doc" select="document($rdf-path)" />
37 <xsl:key name="wd-description" match="rdf:RDF/rdf:Description" use="@rdf:about" />
38
39 <xsl:variable name="element-id">
40 <xsl:call-template name="substring-after-last">
41 <xsl:with-param name="string" select="$wd-doc/rdf:RDF/rdf:Description[1]/@rdf:about" />
42 <xsl:with-param name="delimiter" select="'/'" />
43 </xsl:call-template>
44 </xsl:variable>
45 <xsl:variable name="wd-resource-prefix" select="'http://www.wikidata.org/entity/'" />
46 <xsl:variable name="wd-resource" select="$wd-doc/rdf:RDF/rdf:Description[@rdf:about = concat($wd-resource-prefix, $element-id)]" />
47
48
49 <xsl:template match="@*|node()">
50 <xsl:copy>
51 <xsl:apply-templates select="@*|node()" />
52 </xsl:copy>
53 </xsl:template>
54
55 <xsl:template match="/">
56 <xsl:if test="$action != 'reduce'">
57 <xsl:message terminate="yes">"reduce" is the only available action for now.</xsl:message>
58 </xsl:if>
59
60 <xsl:apply-templates />
61 </xsl:template>
62
63 <!-- Take action on WDEF label if wd has any -->
64 <xsl:template match="/wdef:knowledge/wdef:element/wdef:label">
65 <xsl:if test="../@wdef:id != $element-id or not($wd-resource/schema:name[@xml:lang = current()/@xml:lang])">
66 <xsl:copy-of select="." />
67 </xsl:if>
68 </xsl:template>
69
70 <!-- Take action on WDEF description if wd has any -->
71 <xsl:template match="/wdef:knowledge/wdef:element/wdef:description">
72 <xsl:if test="../@wdef:id != $element-id or not($wd-resource/schema:description[@xml:lang = current()/@xml:lang])">
73 <xsl:copy-of select="." />
74 </xsl:if>
75 </xsl:template>
76
77 <xsl:template match="/wdef:knowledge/wdef:element/wdef:property">
78 <xsl:choose>
79 <xsl:when test="../@wdef:id != $element-id">
80 <xsl:copy>
81 <xsl:apply-templates select="@*|node()" />
82 </xsl:copy>
83 </xsl:when>
84 <xsl:otherwise>
85 <xsl:variable name="all-values-have-wd-equivalent">
86 <xsl:call-template name="all-values-have-wd-equivalent" />
87 </xsl:variable>
88 <xsl:if test="$all-values-have-wd-equivalent = 'no'">
89 <xsl:copy>
90 <xsl:apply-templates select="@*|node()" />
91 </xsl:copy>
92 </xsl:if>
93 </xsl:otherwise>
94 </xsl:choose>
95 </xsl:template>
96
97 <xsl:template match="/wdef:knowledge/wdef:element/wdef:property/wdef:value">
98 <xsl:choose>
99 <xsl:when test="../../@wdef:id != $element-id">
100 <xsl:copy-of select="." />
101 </xsl:when>
102 <xsl:otherwise>
103 <xsl:variable name="has-wd-equivalent">
104 <xsl:call-template name="value-has-wd-equivalent" />
105 </xsl:variable>
106 <xsl:if test="$has-wd-equivalent = 'no'">
107 <!-- <xsl:copy-of select="." /> -->
108 <xsl:copy>
109 <xsl:apply-templates select="@*|node()" />
110 </xsl:copy>
111 </xsl:if>
112 </xsl:otherwise>
113 </xsl:choose>
114 </xsl:template>
115
116 <xsl:template match="/wdef:knowledge/wdef:element/wdef:property/wdef:novalue">
117 <xsl:if test="../../@wdef:id != $element-id or not($wd-resource/rdf:type[@rdf:resource = concat('http://www.wikidata.org/prop/novalue/', ../@wdef:pid)])">
118 <xsl:copy-of select="." />
119 </xsl:if>
120 </xsl:template>
121
122 <xsl:template match="/wdef:knowledge/wdef:element/wdef:property/wdef:somevalue">
123 <xsl:message terminate="yes">Cannot deal with wdef:somevalue for now</xsl:message>
124 </xsl:template>
125
126 <xsl:template match="wdef:qualifier">
127 <xsl:variable name="has-wd-equivalent">
128 <xsl:call-template name="qualifier-has-wd-equivalent" />
129 </xsl:variable>
130
131 <xsl:if test="$has-wd-equivalent != 'yes'" >
132 <xsl:copy-of select="." />
133 </xsl:if>
134 </xsl:template>
135
136 <!-- To be called in a property context -->
137 <xsl:template name="all-values-have-wd-equivalent">
138 <xsl:variable name="all-outputs">
139 <xsl:for-each select="*">
140 <xsl:call-template name="value-has-wd-equivalent" />
141 </xsl:for-each>
142 </xsl:variable>
143
144 <xsl:choose>
145 <xsl:when test="contains($all-outputs, 'no')">
146 <xsl:text>no</xsl:text>
147 </xsl:when>
148 <xsl:otherwise>
149 <xsl:text>yes</xsl:text>
150 </xsl:otherwise>
151 </xsl:choose>
152 </xsl:template>
153
154 <!-- To be called in a property context -->
155 <xsl:template name="any-value-has-wd-equivalent">
156 <xsl:variable name="all-outputs">
157 <xsl:for-each select="*">
158 <xsl:call-template name="value-has-wd-equivalent" />
159 </xsl:for-each>
160 </xsl:variable>
161
162 <xsl:choose>
163 <xsl:when test="contains($all-outputs, 'yes')">
164 <xsl:text>yes</xsl:text>
165 </xsl:when>
166 <xsl:otherwise>
167 <xsl:text>no</xsl:text>
168 </xsl:otherwise>
169 </xsl:choose>
170 </xsl:template>
171
172 <!-- To be called in a value context -->
173 <xsl:template name="value-has-wd-equivalent">
174 <xsl:variable name="PID" select="../@wdef:pid" />
175
176 <xsl:choose>
177 <xsl:when test="wdef:literal">
178 <xsl:choose>
179 <xsl:when test="$wd-resource/*[name(.) = concat('wdt:', $PID) and text() = current()/wdef:literal/text()]">
180 <xsl:text>yes</xsl:text>
181 </xsl:when>
182 <xsl:otherwise>
183 <xsl:text>no</xsl:text>
184 </xsl:otherwise>
185 </xsl:choose>
186 </xsl:when>
187 <xsl:when test="wdef:ref-element">
188 <xsl:choose>
189 <!-- If WD already has P31, we take action on our value to prevent subclasses to be added -->
190 <xsl:when test="($PID = 'P31' and $wd-resource/*[name(.) = 'wdt:P31']) or ($wd-resource/*[name(.) = concat('wdt:', $PID) and @rdf:resource = concat($wd-resource-prefix, current()/wdef:ref-element)])">
191 <xsl:text>yes</xsl:text>
192 </xsl:when>
193 <xsl:otherwise>
194 <xsl:text>no</xsl:text>
195 </xsl:otherwise>
196 </xsl:choose>
197 </xsl:when>
198 <xsl:when test="wdef:translation">
199 <xsl:choose>
200 <xsl:when test="$wd-resource/*[name(.) = concat('wdt:', $PID) and @xml:lang = current()/wdef:translation/@xml:lang]">
201 <xsl:variable name="string1" select="$wd-resource/*[name(.) = concat('wdt:', $PID) and @xml:lang = current()/wdef:translation/@xml:lang]/text()"/>
202 <xsl:variable name="string2" select="current()/wdef:translation/text()"/>
203 <xsl:variable name="similar-translation">
204 <xsl:call-template name="strings-similar" mode="relax">
205 <xsl:with-param name="string1" select="$string1"/>
206 <xsl:with-param name="string2" select="$string2"/>
207 </xsl:call-template>
208 </xsl:variable>
209
210 <xsl:if test="$similar-translation = 'no'">
211 <xsl:message terminate="no">
212 <xsl:text>WARNING: </xsl:text>
213 <xsl:value-of select="$element-id"/>
214 <xsl:text>-</xsl:text>
215 <xsl:value-of select="$PID"/>
216 <xsl:text>: Add translation (</xsl:text>
217 <xsl:value-of select="$string2"/>
218 <xsl:text>) while another different one (</xsl:text>
219 <xsl:value-of select="$string1"/>
220 <xsl:text>) already exists in this language.</xsl:text>
221 </xsl:message>
222 </xsl:if>
223 <xsl:value-of select="$similar-translation"/>
224 </xsl:when>
225 <xsl:otherwise>
226 <xsl:text>no</xsl:text>
227 </xsl:otherwise>
228 </xsl:choose>
229 </xsl:when>
230 <xsl:when test="wdef:quantity">
231 <xsl:variable name="wd-quantity-description" select="$wd-doc/rdf:RDF/rdf:Description[@rdf:about = $wd-doc/rdf:RDF/rdf:Description/*[name(.) = concat('psv:', $PID)]/@rdf:resource]" />
232
233 <xsl:choose>
234 <xsl:when test="not($wd-quantity-description)">
235 <xsl:text>no</xsl:text>
236 </xsl:when>
237 <xsl:otherwise>
238 <xsl:variable name="quantity-wdef-format">
239 <xsl:if test="substring(wdef:quantity, 1, 1) != '-' and substring(wdef:quantity, 1, 1) != '+'">
240 <xsl:text>+</xsl:text>
241 </xsl:if>
242 <xsl:value-of select="wdef:quantity" />
243 </xsl:variable>
244
245 <xsl:choose>
246 <xsl:when test="$wd-quantity-description/wikibase:quantityAmount = $quantity-wdef-format and $wd-quantity-description/wikibase:quantityUnit[@rdf:resource = concat($wd-resource-prefix, current()/wdef:quantity/@wdef:unit)]">
247 <xsl:text>yes</xsl:text>
248 </xsl:when>
249 <xsl:otherwise>
250 <xsl:text>no</xsl:text>
251 </xsl:otherwise>
252 </xsl:choose>
253 </xsl:otherwise>
254 </xsl:choose>
255 </xsl:when>
256 <xsl:when test="wdef:time">
257 <!-- Generate wikidata date format -->
258 <xsl:variable name="wd-date-description" select="$wd-doc/rdf:RDF/rdf:Description[@rdf:about = $wd-doc/rdf:RDF/rdf:Description/*[name(.) = concat('psv:', $PID)]/@rdf:resource]" />
259
260 <xsl:choose>
261 <xsl:when test="not($wd-date-description)">
262 <xsl:text>no</xsl:text>
263 </xsl:when>
264 <xsl:otherwise>
265 <xsl:variable name="date-wd-format">
266 <xsl:choose>
267 <xsl:when test="$wd-date-description/wikibase:timePrecision = 9">
268 <xsl:value-of select="substring($wd-date-description/wikibase:timeValue, 1, 4)" />
269 </xsl:when>
270 <xsl:when test="$wd-date-description/wikibase:timePrecision = 10">
271 <xsl:value-of select="substring($wd-date-description/wikibase:timeValue, 1, 7)" />
272 </xsl:when>
273 <xsl:when test="$wd-date-description/wikibase:timePrecision = 11">
274 <xsl:value-of select="substring($wd-date-description/wikibase:timeValue, 1, 10)" />
275 </xsl:when>
276 <xsl:otherwise>
277 <xsl:message terminate="yes">Can only deal with precision between 9 and 11</xsl:message>
278 </xsl:otherwise>
279 </xsl:choose>
280 </xsl:variable>
281 <xsl:variable name="date-wdef-format">
282 <xsl:choose>
283 <xsl:when test="wdef:time/@wdef:precision = 9">
284 <xsl:value-of select="substring(wdef:time, 2, 4)" />
285 </xsl:when>
286 <xsl:when test="wdef:time/@wdef:precision = 10">
287 <xsl:value-of select="substring(wdef:time, 2, 7)" />
288 </xsl:when>
289 <xsl:when test="wdef:time/@wdef:precision = 11">
290 <xsl:value-of select="substring(wdef:time, 2, 10)" />
291 </xsl:when>
292 <xsl:otherwise>
293 <xsl:message terminate="yes">Can only deal with precision between 9 and 11</xsl:message>
294 </xsl:otherwise>
295 </xsl:choose>
296 </xsl:variable>
297 <xsl:variable name="wd-time-at-least-precise">
298 <xsl:choose>
299 <xsl:when test="string-length($date-wd-format) &gt;= string-length($date-wdef-format)">
300 <xsl:text>yes</xsl:text>
301 </xsl:when>
302 <xsl:otherwise>
303 <xsl:text>no</xsl:text>
304 </xsl:otherwise>
305 </xsl:choose>
306 </xsl:variable>
307 <xsl:variable name="wd-time-compatible">
308 <xsl:choose>
309 <xsl:when test="$wd-time-at-least-precise = 'yes' and substring($date-wd-format, 1, string-length($date-wdef-format)) = $date-wdef-format">
310 <xsl:text>yes</xsl:text>
311 </xsl:when>
312 <xsl:otherwise>
313 <xsl:text>no</xsl:text>
314 </xsl:otherwise>
315 </xsl:choose>
316 </xsl:variable>
317
318 <xsl:if test="not($wd-date-description/wikibase:timeCalendarModel[@rdf:resource = concat($wd-resource-prefix, 'Q1985727')])">
319 <xsl:message terminate="yes">Can only deal with gregorian calendar for now</xsl:message>
320 </xsl:if>
321
322 <xsl:choose>
323 <!-- Return true if wd is the same time, at least as precise as wdef -->
324 <xsl:when test="$wd-time-at-least-precise = 'yes' and $wd-time-compatible = 'yes'">
325 <xsl:text>yes</xsl:text>
326 </xsl:when>
327 <xsl:otherwise>
328 <xsl:if test="$wd-time-at-least-precise = 'no' or $wd-time-compatible = 'no'">
329 <xsl:message terminate="yes">WD has time data but incompatible or less precise. We cannot deal with that for now.</xsl:message>
330 </xsl:if>
331 <xsl:text>no</xsl:text>
332 </xsl:otherwise>
333 </xsl:choose>
334 </xsl:otherwise>
335 </xsl:choose>
336 </xsl:when>
337 <xsl:when test="wdef:qualifier">
338 <xsl:call-template name="all-qualifiers-have-wd-equivalent" />
339 </xsl:when>
340 <xsl:when test="count(*) > 1">
341 <xsl:text>no</xsl:text>
342 <xsl:message terminate="yes">cannot deal with more than one value for now</xsl:message>
343 </xsl:when>
344 <xsl:otherwise>
345 <xsl:text>no</xsl:text>
346 </xsl:otherwise>
347 </xsl:choose>
348 </xsl:template>
349
350 <xsl:template name="qualifier-has-wd-equivalent">
351 <!-- WARNING: Not extensively tested for now -->
352 <xsl:choose>
353 <xsl:when test="substring(../@wdef:id, 1, 1) = '?' or not($wd-doc/rdf:RDF/rdf:Description[@rdf:about = ../@wdef:id] and *[name(.) = concat('pq:', wdef:property/@wdef:pid) and @rdf:resource = concat($wd-resource-prefix, wdef:property/wdef:value/wdef:ref-element)])">
354 <xsl:text>no</xsl:text>
355 </xsl:when>
356 <xsl:otherwise>
357 <xsl:text>yes</xsl:text>
358 </xsl:otherwise>
359 </xsl:choose>
360 </xsl:template>
361
362 <xsl:template name="all-qualifiers-have-wd-equivalent">
363 <xsl:variable name="all-outputs">
364 <xsl:for-each select="wdef:qualifier">
365 <xsl:call-template name="qualifier-has-wd-equivalent" />
366 </xsl:for-each>
367 </xsl:variable>
368
369 <xsl:choose>
370 <xsl:when test="contains($all-outputs, 'no')">
371 <xsl:text>no</xsl:text>
372 </xsl:when>
373 <xsl:otherwise>
374 <xsl:text>yes</xsl:text>
375 </xsl:otherwise>
376 </xsl:choose>
377 </xsl:template>
378
379 <xsl:template name="strings-similar" mode="relax">
380 <xsl:param name="string1"/>
381 <xsl:param name="string2"/>
382
383 <xsl:variable name="translate-from">ABCDEFGHIJKLMNOPQRSTUVWXYZÀàÂâÇçÉÈéèêÎîÔôÛûÙù  :-_’',?!()</xsl:variable>
384 <xsl:variable name="translate-to" >abcdefghijklmnopqrstuvwxyzaaaacceeeeeiioouuuu </xsl:variable>
385
386 <xsl:choose>
387 <xsl:when test="translate(translate($string1, $translate-from, $translate-to), ' ', '') = translate(translate($string2, $translate-from, $translate-to), ' ', '')">
388 <xsl:text>yes</xsl:text>
389 </xsl:when>
390 <xsl:otherwise>
391 <xsl:text>no</xsl:text>
392 </xsl:otherwise>
393 </xsl:choose>
394 </xsl:template>
395
396 <xsl:template name="substring-after-last">
397 <xsl:param name="string" />
398 <xsl:param name="delimiter" />
399
400 <xsl:choose>
401 <xsl:when test="contains($string, $delimiter)">
402 <xsl:call-template name="substring-after-last">
403 <xsl:with-param name="string" select="substring-after($string, $delimiter)" />
404 <xsl:with-param name="delimiter" select="$delimiter" />
405 </xsl:call-template>
406 </xsl:when>
407 <xsl:otherwise>
408 <xsl:value-of select="$string" />
409 </xsl:otherwise>
410 </xsl:choose>
411 </xsl:template>
412 </xsl:stylesheet>