--- /dev/null
+<?xml version="1.0" encoding="UTF-8"?>
+<xsl:stylesheet version="1.0" xmlns:marc="http://www.loc.gov/MARC21/slim"
+ xmlns:xml="http://www.w3.org/XML/1998/namespace"
+ xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+ <!-- filter_marcxml.xslt - Filter out unwanted records.
+ Copyright (C) 2025 Pierre Choffet
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of version 3 of the GNU General Public License as
+ published by the Free Software Foundation.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ -->
+
+ <!--
+ Remove the following bibliographic record:
+ - having 655$a whose values are referenced as do-not-keep in filters.xml
+
+ Remove the following authority entries:
+ - never referenced from any bibliographic record
+ -->
+
+
+ <xsl:output indent="yes" method="xml" encoding="utf-8" />
+ <xsl:strip-space elements="*" />
+
+ <xsl:variable name="filters-doc" select="document('filters.xml')" />
+
+ <!-- Returns all references to the given person authority -->
+ <!-- TBD: The filter on 007 (first character as "k") filters out a lot of
+ prints otherwise valid. Ways to identify prints must be verified -->
+ <xsl:key name="references-to-person" match="/marc:collection/marc:record[substring(marc:leader, 7, 1) != 'z' and (substring(marc:controlfield[@tag='007'], 1, 1) = 'k' or substring(marc:controlfield[@tag='008'], 25, 1) = 'c')]/marc:datafield[@tag='100' or @tag='600' or @tag='700' or @tag='800']" use="concat(marc:subfield[@code='a'], marc:subfield[@code='b'], marc:subfield[@code='c'], marc:subfield[@code='d'], marc:subfield[@code='q'])" />
+ <!-- Returns all references to the given corporate authority -->
+ <!-- TBD: The filter on 007 (first character as "k") filters out a lot of
+ prints otherwise valid. Ways to identify prints must be verified -->
+ <xsl:key name="references-to-corporate" match="/marc:collection/marc:record[substring(marc:leader, 7, 1) != 'z' and (substring(marc:controlfield[@tag='007'], 1, 1) = 'k' or substring(marc:controlfield[@tag='008'], 25, 1) = 'c')]/marc:datafield[@tag='110' or @tag='610' or @tag='710' or @tag='810']" use="concat(marc:subfield[@code='a'], marc:subfield[@code='b'], marc:subfield[@code='c'], marc:subfield[@code='d'])" />
+ <!-- Returns all references to the given meeting authority -->
+ <!-- TBD: The filter on 007 (first character as "k") filters out a lot of
+ prints otherwise valid. Ways to identify prints must be verified -->
+ <xsl:key name="references-to-meeting" match="/marc:collection/marc:record[substring(marc:leader, 7, 1) != 'z' and (substring(marc:controlfield[@tag='007'], 1, 1) = 'k' or substring(marc:controlfield[@tag='008'], 25, 1) = 'c')]/marc:datafield[@tag='111' or @tag='611' or @tag='711' or @tag='811']" use="concat(marc:subfield[@code='a'], marc:subfield[@code='b'], marc:subfield[@code='n'], marc:subfield[@code='d'], marc:subfield[@code='c'])" />
+
+ <!-- Return rule from a given 655$a value -->
+ <xsl:key name="filter-state-655a" match="/filters/field[@tag='655']/subfield[@code='a']/rule" use="@value" />
+
+ <!-- Get record from its 001 -->
+ <xsl:key name="record-control-number" match="/marc:collection/marc:record" use="marc:controlfield[@tag='001']" />
+
+ <!--<xsl:key name="authority-person" match="/marc:collection/marc:record[substring(marc:leader, 7, 1) = 'z' and marc:datafield[@tag='100']]" use="concat(marc:datafield[@tag='100']/marc:subfield[@code='a'], marc:datafield[@tag='100']/marc:subfield[@code='b'], marc:datafield[@tag='100']/marc:subfield[@code='c'], marc:datafield[@tag='100']/marc:subfield[@code='d'], marc:datafield[@tag='100']/marc:subfield[@code='q'])" />
+ <xsl:key name="authority-corporate" match="/marc:collection/marc:record[substring(marc:leader, 7, 1) = 'z' and marc:datafield[@tag='110']]" use="concat(marc:datafield[@tag='110']/marc:subfield[@code='a'], marc:datafield[@tag='110']/marc:subfield[@code='b'], marc:datafield[@tag='110']/marc:subfield[@code='c'], marc:datafield[@tag='110']/marc:subfield[@code='d'])" />
+ <xsl:key name="authority-meeting" match="/marc:collection/marc:record[substring(marc:leader, 7, 1) = 'z' and marc:datafield[@tag='111']]" use="concat(marc:datafield[@tag='111']/marc:subfield[@code='a'], marc:datafield[@tag='111']/marc:subfield[@code='b'], marc:datafield[@tag='111']/marc:subfield[@code='n'], marc:datafield[@tag='111']/marc:subfield[@code='d'], marc:datafield[@tag='111']/marc:subfield[@code='c'])" />-->
+
+ <xsl:template match="node()|@*">
+ <xsl:copy>
+ <xsl:apply-templates select="node()|@*" />
+ </xsl:copy>
+ </xsl:template>
+
+ <!-- Bibliographic records -->
+ <xsl:template match="/marc:collection/marc:record[substring(marc:leader, 7, 1) != 'z']">
+ <xsl:variable name="keep">
+ <!-- Check values in 655$a -->
+ <xsl:call-template name="keep-bibliographic-record" />
+ </xsl:variable>
+
+ <!-- Keep only authorities not referenced in any "no" filter -->
+ <xsl:if test="$keep='yes'">
+ <xsl:copy-of select="." />
+ </xsl:if>
+ </xsl:template>
+
+ <xsl:template name="keep-bibliographic-record">
+ <xsl:variable name="keep">
+ <!-- Check values in 655$a -->
+ <xsl:call-template name="keep-655a" />
+
+ <!-- Check work type is properly identified -->
+ <xsl:call-template name="keep-work-type" />
+ </xsl:variable>
+
+ <!-- Keep only authorities not referenced in any "no" filter -->
+ <xsl:choose>
+ <xsl:when test="contains($keep, 'no')">
+ <xsl:text>no</xsl:text>
+ </xsl:when>
+ <xsl:otherwise>
+ <xsl:text>yes</xsl:text>
+ </xsl:otherwise>
+ </xsl:choose>
+ </xsl:template>
+
+ <!-- Any non-151 authority record -->
+ <xsl:template match="/marc:collection/marc:record[substring(marc:leader, 7, 1) = 'z' and not(marc:datafield[@tag='151'])]">
+ <xsl:variable name="valid">
+ <xsl:call-template name="is-authority-valid" />
+ </xsl:variable>
+
+ <xsl:if test="$valid = 'no'">
+ <xsl:message terminate="no">
+ <xsl:text>Invalide : </xsl:text>
+ <xsl:value-of select="marc:controlfield[@tag='001']" />
+ </xsl:message>
+ </xsl:if>
+ <xsl:if test="$valid = 'yes'">
+ <xsl:variable name="has-valid-reference">
+ <xsl:choose>
+ <xsl:when test="marc:datafield[@tag='100']">
+ <xsl:call-template name="has-valid-references-person" />
+ </xsl:when>
+ <xsl:when test="marc:datafield[@tag='110']">
+ <xsl:call-template name="has-valid-references-corporate" />
+ </xsl:when>
+ <xsl:when test="marc:datafield[@tag='111']">
+ <xsl:call-template name="has-valid-references-meeting" />
+ </xsl:when>
+ <xsl:otherwise>
+ <xsl:message terminate="yes">
+ <xsl:text>Impossible de déterminer le type de notice d’autorité pour </xsl:text>
+ <xsl:value-of select="marc:controlfield[@tag='001']" />
+ <xsl:text>.</xsl:text>
+ </xsl:message>
+ </xsl:otherwise>
+ </xsl:choose>
+ </xsl:variable>
+
+ <!-- Keep only authorities with at least one reference -->
+ <xsl:if test="$has-valid-reference='yes'">
+ <xsl:copy-of select="." />
+ </xsl:if>
+ </xsl:if>
+ </xsl:template>
+
+ <!-- When we have a location authority, don't keep as we don't plan to create
+ Wikidata items for these. Merge will be done manually. -->
+ <xsl:template match="/marc:collection/marc:record[substring(marc:leader, 7, 1) = 'z' and marc:datafield[@tag='151']]">
+ <xsl:message terminate="no">
+ <xsl:text>Notice d’autorité contenant un nom géographique filtrée : </xsl:text>
+ <xsl:value-of select="marc:datafield[@tag='151']/marc:subfield[@code='a']" />
+ <xsl:text>.</xsl:text>
+ </xsl:message>
+ </xsl:template>
+
+ <!-- Return "no" if it contains any value to filter out record in 655$a, "yes" otherwise -->
+ <xsl:template name="keep-655a">
+ <xsl:variable name="filters">
+ <xsl:for-each select="marc:datafield[@tag='655']/marc:subfield[@code='a']">
+ <xsl:variable name="value-655a" select="." />
+ <xsl:variable name="filter">
+ <xsl:for-each select="$filters-doc">
+ <xsl:value-of select="key('filter-state-655a', $value-655a)/@keep" />
+ </xsl:for-each>
+ </xsl:variable>
+
+ <xsl:if test="$filter = ''">
+ <xsl:message terminate="yes">
+ <xsl:text>Valeur inconnue trouvée en 655 $a pour l’entrée </xsl:text>
+ <xsl:value-of select="marc:controlfield[@tag='001']" />
+ <xsl:text> : </xsl:text>
+ <xsl:value-of select="." />
+ </xsl:message>
+ </xsl:if>
+
+ <xsl:value-of select="$filter" />
+ </xsl:for-each>
+ </xsl:variable>
+
+ <xsl:choose>
+ <xsl:when test="contains($filters, 'no')">
+ <xsl:text>no</xsl:text>
+ </xsl:when>
+ <xsl:otherwise>
+ <xsl:text>yes</xsl:text>
+ </xsl:otherwise>
+ </xsl:choose>
+ </xsl:template>
+
+ <!-- Return "no" if the given work record doesn't identify work with a type to
+ be kept-->
+ <xsl:template name="keep-work-type">
+ <xsl:choose>
+ <!-- TBD: The filter on 007 (first character as "k") filters out a lot of
+ prints otherwise valid. Ways to identify prints must be verified -->
+ <xsl:when test="substring(marc:controlfield[@tag='007'], 1, 1) = 'k' or substring(marc:controlfield[@tag='008'], 25, 1) = 'c'">
+ <xsl:text>yes</xsl:text>
+ </xsl:when>
+ <xsl:otherwise>
+ <xsl:message>
+ <xsl:value-of select="marc:controlfield[@tag='001']" />
+ <xsl:text> : l’entrée n’identifie pas le type d’œuvre comme étant un catalogue ou une estampe.</xsl:text>
+ </xsl:message>
+ <xsl:text>no</xsl:text>
+ </xsl:otherwise>
+ </xsl:choose>
+ </xsl:template>
+
+ <!-- Return id of any record -->
+ <xsl:template name="get-fk">
+ <xsl:if test="not(local-name(.) = 'record')">
+ <xsl:message terminate="yes">Not in a record</xsl:message>
+ </xsl:if>
+
+ <xsl:choose>
+ <xsl:when test="substring(marc:leader, 7, 1) != 'z'">
+ <xsl:message terminate="yes">Not implemented yet</xsl:message>
+ </xsl:when>
+ <xsl:when test="substring(marc:leader, 7, 1) = 'z' and marc:datafield[@tag='100']">
+ <xsl:value-of select="marc:datafield[@tag='100']/marc:subfield[@code='a']" />
+ <xsl:value-of select="marc:datafield[@tag='100']/marc:subfield[@code='b']" />
+ <xsl:value-of select="marc:datafield[@tag='100']/marc:subfield[@code='c']" />
+ <xsl:value-of select="marc:datafield[@tag='100']/marc:subfield[@code='d']" />
+ <xsl:value-of select="marc:datafield[@tag='100']/marc:subfield[@code='q']" />
+ </xsl:when>
+ <xsl:when test="substring(marc:leader, 7, 1) = 'z' and marc:datafield[@tag='110']">
+ <xsl:value-of select="marc:datafield[@tag='110']/marc:subfield[@code='a']" />
+ <xsl:value-of select="marc:datafield[@tag='110']/marc:subfield[@code='b']" />
+ <xsl:value-of select="marc:datafield[@tag='110']/marc:subfield[@code='c']" />
+ <xsl:value-of select="marc:datafield[@tag='110']/marc:subfield[@code='d']" />
+ <xsl:value-of select="marc:datafield[@tag='110']/marc:subfield[@code='n']" />
+ </xsl:when>
+ <xsl:when test="substring(marc:leader, 7, 1) = 'z' and marc:datafield[@tag='111']">
+ <xsl:value-of select="marc:datafield[@tag='111']/marc:subfield[@code='a']" />
+ <xsl:value-of select="marc:datafield[@tag='111']/marc:subfield[@code='c']" />
+ <xsl:value-of select="marc:datafield[@tag='111']/marc:subfield[@code='d']" />
+ <xsl:value-of select="marc:datafield[@tag='111']/marc:subfield[@code='e']" />
+ <xsl:value-of select="marc:datafield[@tag='111']/marc:subfield[@code='n']" />
+ </xsl:when>
+ </xsl:choose>
+ </xsl:template>
+
+ <!-- Return yes or no depending on the authority contains enough data to be
+ considered valid. -->
+ <xsl:template name="is-authority-valid">
+ <xsl:if test="substring(marc:leader, 7, 1) != 'z'">
+ <xsl:message terminate="yes">Le contexte d’appel doit être une notice d’autorité.</xsl:message>
+ </xsl:if>
+
+ <xsl:choose>
+ <xsl:when test="not(marc:datafield[@tag='100']) and not(marc:datafield[@tag='110']) and not(marc:datafield[@tag='111'])">
+ <xsl:text>no</xsl:text>
+ </xsl:when>
+ <xsl:otherwise>
+ <xsl:text>yes</xsl:text>
+ </xsl:otherwise>
+ </xsl:choose>
+ </xsl:template>
+
+ <!-- Return yes or no depending on the authority is referenced from bibliographic
+ records marked as to keep. To be called from a person/corporate/meeting
+ authority -->
+ <xsl:template name="has-valid-references-person">
+ <xsl:variable name="authority-fk">
+ <xsl:call-template name="get-fk" />
+ </xsl:variable>
+
+ <xsl:variable name="valid-reference">
+ <xsl:for-each select="key('references-to-person', $authority-fk)">
+ <xsl:variable name="bibliographic-fk" select="../marc:controlfield[@tag='001']" />
+
+ <xsl:for-each select="key('record-control-number', $bibliographic-fk)">
+ <xsl:variable name="keep">
+ <xsl:call-template name="keep-bibliographic-record" />
+ </xsl:variable>
+
+ <xsl:if test="$keep='yes'">
+ <xsl:text>yes</xsl:text>
+ </xsl:if>
+ </xsl:for-each>
+ </xsl:for-each>
+ </xsl:variable>
+
+ <xsl:choose>
+ <xsl:when test="$valid-reference = ''">
+ <xsl:message>
+ <xsl:text>Pas de référence valide vers </xsl:text>
+ <xsl:value-of select="marc:controlfield[@tag='001']" />
+ </xsl:message>
+ <xsl:text>no</xsl:text>
+ </xsl:when>
+ <xsl:otherwise>
+ <xsl:text>yes</xsl:text>
+ </xsl:otherwise>
+ </xsl:choose>
+ </xsl:template>
+ <xsl:template name="has-valid-references-corporate">
+ <xsl:variable name="authority-fk">
+ <xsl:call-template name="get-fk" />
+ </xsl:variable>
+
+ <xsl:variable name="valid-reference">
+ <xsl:for-each select="key('references-to-corporate', $authority-fk)">
+ <xsl:variable name="bibliographic-fk" select="../marc:controlfield[@tag='001']" />
+
+ <xsl:for-each select="key('record-control-number', $bibliographic-fk)">
+ <xsl:variable name="keep">
+ <xsl:call-template name="keep-bibliographic-record" />
+ </xsl:variable>
+
+ <xsl:if test="$keep='yes'">
+ <xsl:text>yes</xsl:text>
+ </xsl:if>
+ </xsl:for-each>
+ </xsl:for-each>
+ </xsl:variable>
+
+ <xsl:choose>
+ <xsl:when test="$valid-reference = ''">
+ <xsl:text>no</xsl:text>
+ </xsl:when>
+ <xsl:otherwise>
+ <xsl:text>yes</xsl:text>
+ </xsl:otherwise>
+ </xsl:choose>
+ </xsl:template>
+ <xsl:template name="has-valid-references-meeting">
+ <xsl:variable name="authority-fk">
+ <xsl:call-template name="get-fk" />
+ </xsl:variable>
+
+ <xsl:variable name="valid-reference">
+ <xsl:for-each select="key('references-to-meeting', $authority-fk)">
+ <xsl:variable name="bibliographic-fk" select="../marc:controlfield[@tag='001']" />
+
+ <xsl:for-each select="key('record-control-number', $bibliographic-fk)">
+ <xsl:variable name="keep">
+ <xsl:call-template name="keep-bibliographic-record" />
+ </xsl:variable>
+
+ <xsl:if test="$keep='yes'">
+ <xsl:text>yes</xsl:text>
+ </xsl:if>
+ </xsl:for-each>
+ </xsl:for-each>
+ </xsl:variable>
+
+ <xsl:choose>
+ <xsl:when test="$valid-reference = ''">
+ <xsl:text>no</xsl:text>
+ </xsl:when>
+ <xsl:otherwise>
+ <xsl:text>yes</xsl:text>
+ </xsl:otherwise>
+ </xsl:choose>
+ </xsl:template>
+</xsl:stylesheet>
--- /dev/null
+<?xml version="1.0" encoding="utf-8" ?>
+<filters>
+ <field tag="655">
+ <subfield code="a">
+ <rule value="Affiches" keep="no" />
+ <rule value="Affiches de spectacle" keep="no" />
+ <rule value="Affiches de théâtre" keep="no" />
+ <rule value="Anecdotes" keep="yes" />
+ <rule value="Aquatintes" keep="yes" />
+ <rule value="Biographies" keep="yes" />
+ <rule value="Calendriers" keep="yes" />
+ <rule value="Caricatures" keep="yes" />
+ <rule value="Cartes de vœux" keep="yes" />
+ <rule value="Cartes d'invitation" keep="yes" />
+ <rule value="Cartes de cigarettes" keep="yes" />
+ <rule value="Cartes postales" keep="yes" />
+ <rule value="Catalogues" keep="yes" />
+ <rule value="Catalogues d'exposition" keep="yes" />
+ <rule value="Chromolithographies" keep="yes" />
+ <rule value="Collages (Art)" keep="yes" />
+ <rule value="Collagraphies" keep="yes" />
+ <rule value="Critiques d'art" keep="yes" />
+ <rule value="Dessins" keep="no" />
+ <rule value="Dessins humoristiques" keep="yes" />
+ <rule value="Eaux-fortes en relief" keep="yes" />
+ <rule value="Eaux-fortes (Gravures)" keep="yes" />
+ <rule value="Empreintes naturelles" keep="yes" />
+ <rule value="Estampes" keep="yes" />
+ <rule value="Estampes au pochoir" keep="yes" />
+ <rule value="Estampes de reproduction" keep="yes" />
+ <rule value="Estampes d'interprétation" keep="yes" />
+ <rule value="Estampes inuites" keep="yes" />
+ <rule value="Estampes numériques" keep="yes" />
+ <rule value="Estampes originales" keep="yes" />
+ <rule value="Fac-similés" keep="yes" />
+ <rule value="Gaufrages (Estampes)" keep="yes" />
+ <rule value="Gravures" keep="yes" />
+ <rule value="Gravures à la manière noire" keep="yes" />
+ <rule value="Gravures au carborundum" keep="yes" />
+ <rule value="Gravures au sucre" keep="yes" />
+ <rule value="Gravures au trait" keep="yes" />
+ <rule value="Gravures au vernis mou" keep="yes" />
+ <rule value="Gravures en manière de crayon" keep="yes" />
+ <rule value="Gravures en pointillé" keep="yes" />
+ <rule value="Gravures sur bois" keep="yes" />
+ <rule value="Gravures sur pierre" keep="yes" />
+ <rule value="Iconographie documentaire" keep="yes" />
+ <rule value="Impressions en relief" keep="yes" />
+ <rule value="Installations (Art)" keep="yes" />
+ <rule value="Interviews" keep="yes" />
+ <rule value="Linogravures" keep="yes" />
+ <rule value="Lithographies" keep="yes" />
+ <rule value="Lithographies offsets" keep="yes" />
+ <rule value="Littérature" keep="yes" />
+ <rule value="Livres d'artistes" keep="yes" />
+ <rule value="Livres de photographies" keep="yes" />
+ <rule value="Livres numériques" keep="yes" />
+ <rule value="Matrices (Gravure)" keep="no" />
+ <rule value="Matériel de promotion" keep="yes" />
+ <rule value="Monotypes (Estampes)" keep="yes" />
+ <rule value="Œuvres d'art" keep="yes" />
+ <rule value="Ouvrages illustrés" keep="yes" />
+ <rule value="Photographies" keep="yes" />
+ <rule value="Photogravures" keep="yes" />
+ <rule value="Photolithographies" keep="yes" />
+ <rule value="Poésie" keep="yes" />
+ <rule value="Pointes-sèches" keep="yes" />
+ <rule value="Portraits" keep="yes" />
+ <rule value="Prospectus" keep="yes" />
+ <rule value="Publications en série" keep="no" />
+ <rule value="Publications officielles" keep="yes" />
+ <rule value="Reproductions d'œuvres d'art" keep="no" />
+ <rule value="Sérigraphies" keep="yes" />
+ <rule value="Tailles-douces" keep="yes" />
+ <rule value="Technique mixte (Art)" keep="yes" />
+ <rule value="Vues d'optique" keep="yes" />
+ </subfield>
+ </field>
+</filters>