]> Pierre Choffet | Git repositories - banq_wikidata.git/commitdiff
Fix buggy titles fix step
authorPierre Choffet <peuc@wanadoo.fr>
Wed, 3 Jun 2026 20:27:58 +0000 (16:27 -0400)
committerPierre Choffet <peuc@wanadoo.fr>
Wed, 3 Jun 2026 20:27:58 +0000 (16:27 -0400)
README
banq2wd.sh
merge_fixes_bibliographic.xslt [new file with mode: 0644]

diff --git a/README b/README
index d53424bf265529b4adab2b5d46f5305f21ac73eb..e98eb67bb8859e1f1c4c7020890b0fd4737fa1f3 100644 (file)
--- a/README
+++ b/README
@@ -118,7 +118,14 @@ pour les titres d’œuvres en vigueur sur Wikipédia.
 Une fois l’édition terminée, les correctifs sur les titres peuvent être
 réintégrés dans le fichier records_fix.xml via la commande suivante :
 
-  $ ./banq2wd.sh --merge-fixes records_fix.xml /tmp/titres.tsv
+  $ ./banq2wd.sh --merge-fixes records_fix.xml 2-filtered.xml > records_fix_new.xml 2>records_fix.log
+
+Un journal du traitement est créé dans records_fix.log ainsi qu'un fichier
+records_fix_new.xml intégrant les nouvelles modifications. Ce dernier peut,
+pour validation, être comparé au fichier original « records_fix.xml » et doit le
+remplacer si le résultat est correct :
+
+  $ mv records_fix_new.xml records_fix.xml
 
   
 3. L’import vers Wikidata requiert que les données soient dans un format dont la
@@ -185,7 +192,7 @@ seule la structure est purgée des entrées inutiles :
 pour l’opération est bien valide. Cette validation se fait au moyen d’un schéma
 XSD fourni dans le dépôt wdef_schemas :
 
-  $ xmlstarlet val -e -s wdef_schemas/wdef.xsd 6-wdef-canonicalized.xml 2> /tmp/validation.log
+  $ xmlstarlet val -e -s wdef_schemas/wdef.xsd 6-wdef-canonicalized.xml 2>/tmp/validation.log
 
   
 9. Cette dernière étape couvre l’import effectif vers Wikidata. L’opération se
index 7877fad7708db2c2b402349124d9817cff0cdab8..c6439b7c07f3ea0c467251db8c1fa59014d91580 100755 (executable)
@@ -264,9 +264,8 @@ mergeFixes() {
                tr_parameters=(-s "leader=${leader}" -s "original-title=${source_title//\"/${doublequote_workaround}}" -s "title-lang=${title_lang}" -s "fixed-title=${fixed_title//\"/${doublequote_workaround}}" -s "original-subtitle=${source_subtitle//\"/${doublequote_workaround}}" -s "subtitle-lang=${subtitle_lang}" -s "fixed-subtitle=${fixed_subtitle//\"/${doublequote_workaround}}")
                
                local fixes_path_tmp="$(mktemp)"
-               xmlstarlet tr merge_fixes.xslt "${tr_parameters[@]}" "${old_fixes_path_tmp}" >| "${fixes_path_tmp}"
+               xmlstarlet tr merge_fixes_bibliographic.xslt "${tr_parameters[@]}" "${old_fixes_path_tmp}" >| "${fixes_path_tmp}"
 
-               echo ${index} >&2
                rm "${old_fixes_path_tmp}"
                old_fixes_path_tmp="${fixes_path_tmp}"
                shift
diff --git a/merge_fixes_bibliographic.xslt b/merge_fixes_bibliographic.xslt
new file mode 100644 (file)
index 0000000..bcfc8e4
--- /dev/null
@@ -0,0 +1,191 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<xsl:stylesheet version="1.0" xmlns:marc="http://www.loc.gov/MARC21/slim"
+                xmlns:xml="http://www.w3.org/XML/1998/namespace"
+                xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+  <!-- merge_fixes_bibliographic.xslt - Merge title fixes for bibliographic entries.
+    Copyright (C) 2025  Pierre Choffet
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of version 3 of the GNU General Public License as
+    published by the Free Software Foundation.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+    -->
+       <xsl:output indent="yes" method="xml" encoding="utf-8" />
+       
+       <xsl:param name="fixes-path" />
+       <xsl:param name="leader" />
+       <xsl:param name="original-title" />
+       <xsl:param name="title-lang" />
+       <xsl:param name="fixed-title" />
+       <xsl:param name="original-subtitle" />
+       <xsl:param name="subtitle-lang" />
+       <xsl:param name="fixed-subtitle" />
+       
+       <xsl:variable name="doublequote-workaround">%DOUBLEQUOTE-WORKAROUND%</xsl:variable>
+       <xsl:variable name="original-title-clean">
+               <xsl:call-template name="string-replace-all">
+                       <xsl:with-param name="text" select="$original-title" />
+                       <xsl:with-param name="replace" select="$doublequote-workaround" />
+                       <xsl:with-param name="by" select="'&quot;'" />
+               </xsl:call-template>
+       </xsl:variable>
+       <xsl:variable name="fixed-title-clean">
+               <xsl:call-template name="string-replace-all">
+                       <xsl:with-param name="text" select="$fixed-title" />
+                       <xsl:with-param name="replace" select="$doublequote-workaround" />
+                       <xsl:with-param name="by" select="'&quot;'" />
+               </xsl:call-template>
+       </xsl:variable>
+       <xsl:variable name="original-subtitle-clean">
+               <xsl:call-template name="string-replace-all">
+                       <xsl:with-param name="text" select="$original-subtitle" />
+                       <xsl:with-param name="replace" select="$doublequote-workaround" />
+                       <xsl:with-param name="by" select="'&quot;'" />
+               </xsl:call-template>
+       </xsl:variable>
+       <xsl:variable name="fixed-subtitle-clean">
+               <xsl:call-template name="string-replace-all">
+                       <xsl:with-param name="text" select="$fixed-subtitle" />
+                       <xsl:with-param name="replace" select="$doublequote-workaround" />
+                       <xsl:with-param name="by" select="'&quot;'" />
+               </xsl:call-template>
+       </xsl:variable>
+       
+       <xsl:key name="record-by-id" match="/records/record" use="@id" />
+       
+       <xsl:template match="node()|@*">
+               <xsl:copy>
+                       <xsl:apply-templates select="node()|@*" />
+               </xsl:copy>
+       </xsl:template>
+       
+       <xsl:template match="/records">
+               <!-- Rewrite  -->
+               
+               <xsl:choose>
+                       <xsl:when test="key('record-by-id', $leader)">
+                               <!-- Fix document has an entry with the given leader -->
+                               <xsl:copy>
+                                       <xsl:apply-templates select="node()|@*" />
+                               </xsl:copy>
+                       </xsl:when>
+                       <xsl:otherwise>
+                               <!-- No entry with the given leader in fix document -->
+                               <xsl:copy select=".">
+                                       <xsl:copy-of select="record" />
+                                       <xsl:element name="record">
+                                               <xsl:attribute name="id">
+                                                       <xsl:value-of select="$leader" />
+                                               </xsl:attribute>
+                                               
+                                               <!-- Add title -->
+                                               <!-- TODO: Deal with "[Sans titre]" -->
+                                               <xsl:if test="$original-title-clean and $title-lang and $fixed-title-clean">
+                                                       <xsl:call-template name="write-title" />
+                                               </xsl:if>
+                                               
+                                               <!-- Add subtitle -->
+                                               <xsl:if test="$original-subtitle-clean and $subtitle-lang and $fixed-subtitle-clean">
+                                                       <xsl:element name="fix">
+                                                               <xsl:attribute name="type">
+                                                                       <xsl:text>subtitle</xsl:text>
+                                                               </xsl:attribute>
+                                                               <xsl:attribute name="source">
+                                                                       <xsl:value-of select="$original-subtitle-clean" />
+                                                               </xsl:attribute>
+                                                               <xsl:attribute name="xml:lang">
+                                                                       <xsl:value-of select="$subtitle-lang" />
+                                                               </xsl:attribute>
+                                                               <xsl:value-of select="$fixed-subtitle-clean" />
+                                                       </xsl:element>
+                                               </xsl:if>
+                                       </xsl:element>
+                               </xsl:copy>
+                       </xsl:otherwise>
+               </xsl:choose>
+       </xsl:template>
+       
+       <xsl:template match="/records/record">
+               <xsl:choose>
+                       <xsl:when test="@id != $leader">
+                               <xsl:copy-of select="." />
+                       </xsl:when>
+                       <xsl:otherwise>
+                               <xsl:copy select=".">
+                                       <xsl:copy-of select="@*" />
+                                       <!-- Keep any title in another language but warn for them -->
+                                       <xsl:copy-of select="fix[@type='title' and @xml:lang != $title-lang]" />
+                                       <xsl:for-each select="fix[@type='title' and @xml:lang != $title-lang]">
+                                               <xsl:message terminate="no">
+                                                       <xsl:text>L’entrée avec le leader </xsl:text>
+                                                       <xsl:value-of select="../@id" />
+                                                       <xsl:text> a un titre dans la langue « </xsl:text>
+                                                       <xsl:value-of select="@xml:lang" />
+                                                       <xsl:text> » qui a été conservé en plus du nouveau titre en « </xsl:text>
+                                                       <xsl:value-of select="$title-lang" />
+                                                       <xsl:text> ».</xsl:text>
+                                               </xsl:message>
+                                       </xsl:for-each>
+                                       <!-- Add new title -->
+                                       <xsl:call-template name="write-title" />
+                               </xsl:copy>
+                       </xsl:otherwise>
+               </xsl:choose>
+       </xsl:template>
+       
+       <xsl:template name="write-title">
+               <xsl:element name="fix">
+                       <xsl:attribute name="type">
+                               <xsl:text>title</xsl:text>
+                       </xsl:attribute>
+                       <xsl:attribute name="source">
+                               <xsl:value-of select="$original-title-clean" />
+                       </xsl:attribute>
+                       <xsl:attribute name="xml:lang">
+                               <xsl:value-of select="$title-lang" />
+                       </xsl:attribute>
+                       <xsl:value-of select="$fixed-title-clean" />
+               </xsl:element>
+       </xsl:template>
+       <xsl:template match="fix" mode="title">
+               <xsl:choose>
+                       <!-- Same type and language -->
+                       <xsl:when test="@type='title' and @xml:lang=$title-lang">
+                               <xsl:call-template name="write-title" />
+                       </xsl:when>
+                       <xsl:otherwise>
+                               <xsl:copy-of select="." />
+                       </xsl:otherwise>
+               </xsl:choose>
+       </xsl:template>
+       
+       <xsl:template name="string-replace-all">
+               <xsl:param name="text" />
+               <xsl:param name="replace" />
+               <xsl:param name="by" />
+               <xsl:choose>
+                       <xsl:when test="$text = '' or $replace = ''or not($replace)" >
+                               <xsl:value-of select="$text" />
+                       </xsl:when>
+                       <xsl:when test="contains($text, $replace)">
+                               <xsl:value-of select="substring-before($text,$replace)" />
+                               <xsl:value-of select="$by" />
+                               <xsl:call-template name="string-replace-all">
+                                       <xsl:with-param name="text" select="substring-after($text,$replace)" />
+                                       <xsl:with-param name="replace" select="$replace" />
+                                       <xsl:with-param name="by" select="$by" />
+                               </xsl:call-template>
+                       </xsl:when>
+                       <xsl:otherwise>
+                               <xsl:value-of select="$text" />
+                       </xsl:otherwise>
+               </xsl:choose>
+       </xsl:template>
+</xsl:stylesheet>