]> Pierre Choffet | Git repositories - wdef_tools.git/commitdiff
Add tool to detect elements with duplicate label/description
authorPierre Choffet <peuc@wanadoo.fr>
Thu, 19 Mar 2026 15:09:23 +0000 (11:09 -0400)
committerPierre Choffet <peuc@wanadoo.fr>
Thu, 19 Mar 2026 15:09:23 +0000 (11:09 -0400)
README
xslts/report_label_description_duplicates.xslt [new file with mode: 0644]

diff --git a/README b/README
index 2217d9f05b8c31d408f8cbc668ceaaa6031d7186..6bf2fb6f2a282e242944e0f23e1352473f94d7be 100644 (file)
--- a/README
+++ b/README
@@ -36,3 +36,6 @@ Description of the provided tools:
 
   - xslts/replace_id.xslt
     Change the wdef:id of an element and its references.
+  
+  - xslts/report_label_description_duplicates.xslt
+    Detect elements with duplicate labels/descriptions (this is illegal in Wikidata)
diff --git a/xslts/report_label_description_duplicates.xslt b/xslts/report_label_description_duplicates.xslt
new file mode 100644 (file)
index 0000000..7bdbb6e
--- /dev/null
@@ -0,0 +1,68 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<xsl:stylesheet version="1.0" xmlns:wdef="https://purl.choffet.net/wdef"
+                xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+<!-- report_label_description_duplicates.xslt - Report duplicate label/description, forbidden in Wikidata
+     Copyright (C) 2026  Pierre Choffet
+
+     This program is free software: you can redistribute it and/or modify
+     it under the terms of version 3 of the GNU General Public License as
+     published by the Free Software Foundation.
+
+     This program is distributed in the hope that it will be useful,
+     but WITHOUT ANY WARRANTY; without even the implied warranty of
+     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+     GNU General Public License for more details.
+
+     You should have received a copy of the GNU General Public License
+     along with this program.  If not, see <http://www.gnu.org/licenses/>.
+     -->
+       <xsl:output method="text" encoding="utf-8" />
+       
+       <xsl:key name="element-from-label" match="/wdef:knowledge/wdef:element" use="wdef:label" />
+       <xsl:key name="element-from-description" match="/wdef:knowledge/wdef:element" use="wdef:description" />
+       
+       <xsl:template match="text()" />
+       
+       <xsl:template match="/wdef:knowledge/wdef:element[wdef:label]">
+               <xsl:variable name="element" select="." />
+               <xsl:variable name="languages" select="wdef:label/@interface-lang|wdef:description/@interface-lang" />
+               
+               <xsl:for-each select="$languages">
+                       <xsl:variable name="language" select="current()" />
+                       
+                       <!-- Deduplicate languages first -->
+                       <xsl:if test="generate-id($languages[. = $language][1]) = generate-id()">
+                               
+                               <!-- If we have no label and description in the same language, no requirement exist in Wikibase -->
+                               <xsl:if test="$element/wdef:label[@interface-lang = $language] and $element/wdef:description[@interface-lang = $language]">
+                                       <xsl:variable name="label" select="$element/wdef:label[@interface-lang = $language]" />
+                                       <xsl:variable name="same-label" select="key('element-from-label', $label)[wdef:label[@interface-lang = $language]]" />
+                                       
+                                       <!-- To prevent duplicate reports, we only continue if we're in the element returned at first position -->
+                                       <xsl:if test="$same-label[1]/@wdef:id = $element/@wdef:id">
+                                               <xsl:variable name="description" select="$element/wdef:description[@interface-lang = $language]" />
+                                               <xsl:variable name="same-description" select="key('element-from-description', $description)[wdef:description[@interface-lang = $language]]" />
+                                               
+                                               <xsl:variable name="occurrences-count" select="count($same-label[@wdef:id = $same-description/@wdef:id]/@wdef:id)" />
+                                               
+                                               <xsl:if test="$occurrences-count &gt; 1">
+                                                       <xsl:message>
+                                                               <xsl:value-of select="$element/@wdef:id" />
+                                                               <xsl:text>: </xsl:text>
+                                                               <xsl:text>In language "</xsl:text>
+                                                               <xsl:value-of select="$language" />
+                                                               <xsl:text>", the label "</xsl:text>
+                                                               <xsl:value-of select="$label" />
+                                                               <xsl:text>" with description "</xsl:text>
+                                                               <xsl:value-of select="$description" />
+                                                               <xsl:text>" has </xsl:text>
+                                                               <xsl:value-of select="$occurrences-count" />
+                                                               <xsl:text> occurrences.</xsl:text>
+                                                       </xsl:message>
+                                               </xsl:if>
+                                       </xsl:if>
+                               </xsl:if>
+                       </xsl:if>
+               </xsl:for-each>
+       </xsl:template>
+</xsl:stylesheet>