]> Pierre Choffet | Git repositories - wmo_to_wikidata.git/commitdiff
Fix Wigos identifier regex and stations cleaner
authorPierre Choffet <peuc@wanadoo.fr>
Thu, 23 Dec 2021 21:18:19 +0000 (16:18 -0500)
committerPierre Choffet <peuc@wanadoo.fr>
Thu, 23 Dec 2021 21:49:16 +0000 (16:49 -0500)
schemas/stations.xsd
xslts/stations_clean.xslt

index 6a4013b6957b5f7eb054f35371cf0bbcc918437c..b66c7c6ced4326db495c19ff634e0e86b2ed96d2 100644 (file)
        <!-- Types -->
        <xsd:simpleType name="wigos-id">
                <xsd:restriction base="xsd:normalizedString">
-                       <xsd:pattern value="([0-9]|1[0-4])-(([0-9]|[1-9][0-9]{1,3}|[1-5][0-9]{4}|6[0-4][0-9]{3}|65[0-4][0-9]{2}|655[0-2][0-9]|6553[0-4])-){2}[0-9A-Z]{1,16}(-[0-9A-Z]+)?" />
+                       <xsd:pattern value="(\d|1[0-4])(-(\d|[1-9]\d{1,3}|[1-5]\d{4}|6[0-4]\d{3}|65[0-4]\d{2}|655[0-2]\d|6553[0-4])){2}-([0-9A-Za-z]{1,16})" />
                </xsd:restriction>
        </xsd:simpleType>
 </xsd:schema>
index 5971d2ef548eb41b1e5f801a8351ec3188f3b6e6..35e35daabcce77d7f6ead77d8cc349d5b41e2bbd 100644 (file)
 
        <!-- List known invalid WIGOS IDs in original data -->
        <xsl:variable name="wigos-ids">
+               <wigos-id invalid-value="41247" />
+               <wigos-id invalid-value="72388" />
                <wigos-id invalid-value="0-634-0000-0000" />
+               <wigos-id invalid-value="0-858-02560-COL" />
+               <wigos-id invalid-value="1-620-2001-0507" />
+               <wigos-id invalid-value="NaN-NaN-NaN-undefined" />
        </xsl:variable>
        
        <xsl:param name="hardcoded-wigos-id" select="document('')/*/xsl:variable[@name='wigos-ids']/*"/>
        
        <!-- Remove invalid WIGOS identifiers -->
        <xsl:template match="wigosStationIdentifiers">
-               <xsl:if test="translate(wigosStationIdentifier, '0123456789-', '') = '' and contains(wigosStationIdentifier, '-') and not($hardcoded-wigos-id[@invalid-value = current()/wigosStationIdentifier])">
+               <xsl:if test="translate(wigosStationIdentifier, '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz-', '') = '' and contains(wigosStationIdentifier, '-') and not($hardcoded-wigos-id[@invalid-value = current()/wigosStationIdentifier])">
                        <xsl:copy>
                                <xsl:apply-templates select="node()|@*" />
                        </xsl:copy>
                </xsl:if>
        </xsl:template> 
        <xsl:template match="wigosId">
-               <xsl:if test="translate(., '0123456789-', '') = '' and contains(., '-') and not($hardcoded-wigos-id[@invalid-value = current()])">
+               <xsl:if test="translate(., '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz-', '') = '' and contains(., '-') and not($hardcoded-wigos-id[@invalid-value = current()])">
                        <xsl:copy>
                                <xsl:apply-templates select="node()|@*" />
                        </xsl:copy>