CVS commit: pkgsrc/textproc/p5-XML-LibXML

To: pkgsrc-changes%NetBSD.org@localhost
Subject: CVS commit: pkgsrc/textproc/p5-XML-LibXML
From: "Thomas Klausner" <wiz%netbsd.org@localhost>
Date: Mon, 11 May 2026 17:39:13 +0000

Module Name:    pkgsrc
Committed By:   wiz
Date:           Mon May 11 17:39:13 UTC 2026

Modified Files:
        pkgsrc/textproc/p5-XML-LibXML: Makefile distinfo
        pkgsrc/textproc/p5-XML-LibXML/patches: patch-dom.c
Added Files:
        pkgsrc/textproc/p5-XML-LibXML/patches: patch-LibXML.xs patch-MANIFEST
            patch-dom.h patch-t_48__security__oob__utf8__gh146.t

Log Message:
p5-XML-LibXML: add another upstream pull request

with a possible security fix

Bump PKGREVISION.


To generate a diff of this commit:
cvs rdiff -u -r1.106 -r1.107 pkgsrc/textproc/p5-XML-LibXML/Makefile
cvs rdiff -u -r1.56 -r1.57 pkgsrc/textproc/p5-XML-LibXML/distinfo
cvs rdiff -u -r0 -r1.1 pkgsrc/textproc/p5-XML-LibXML/patches/patch-LibXML.xs \
    pkgsrc/textproc/p5-XML-LibXML/patches/patch-MANIFEST \
    pkgsrc/textproc/p5-XML-LibXML/patches/patch-dom.h \
    pkgsrc/textproc/p5-XML-LibXML/patches/patch-t_48__security__oob__utf8__gh146.t
cvs rdiff -u -r1.1 -r1.2 pkgsrc/textproc/p5-XML-LibXML/patches/patch-dom.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: pkgsrc/textproc/p5-XML-LibXML/Makefile
diff -u pkgsrc/textproc/p5-XML-LibXML/Makefile:1.106 pkgsrc/textproc/p5-XML-LibXML/Makefile:1.107
--- pkgsrc/textproc/p5-XML-LibXML/Makefile:1.106        Mon May 11 06:24:02 2026
+++ pkgsrc/textproc/p5-XML-LibXML/Makefile      Mon May 11 17:39:13 2026
@@ -1,8 +1,8 @@
-# $NetBSD: Makefile,v 1.106 2026/05/11 06:24:02 wiz Exp $
+# $NetBSD: Makefile,v 1.107 2026/05/11 17:39:13 wiz Exp $
 
 DISTNAME=      XML-LibXML-2.0210
 PKGNAME=       p5-${DISTNAME}
-PKGREVISION=   9
+PKGREVISION=   10
 CATEGORIES=    textproc perl5
 MASTER_SITES=  ${MASTER_SITE_PERL_CPAN:=XML/}
 
@@ -23,6 +23,9 @@ CONFLICTS+=   p5-XML-LibXML-Common-[0-9]*
 # includes XML::LibXML::XPathContext module
 CONFLICTS+=    p5-XML-LibXML-XPathContext-[0-9]*
 
+# as of 2.0210
+# Failed 8/78 test programs. 11/2612 subtests failed.
+
 PERL5_PACKLIST=                auto/XML/LibXML/.packlist
 
 FILES_SUBST+=          PERL5_INSTALLVENDORLIB=${PERL5_INSTALLVENDORLIB:Q}

Index: pkgsrc/textproc/p5-XML-LibXML/distinfo
diff -u pkgsrc/textproc/p5-XML-LibXML/distinfo:1.56 pkgsrc/textproc/p5-XML-LibXML/distinfo:1.57
--- pkgsrc/textproc/p5-XML-LibXML/distinfo:1.56 Mon May 11 06:24:02 2026
+++ pkgsrc/textproc/p5-XML-LibXML/distinfo      Mon May 11 17:39:13 2026
@@ -1,7 +1,11 @@
-$NetBSD: distinfo,v 1.56 2026/05/11 06:24:02 wiz Exp $
+$NetBSD: distinfo,v 1.57 2026/05/11 17:39:13 wiz Exp $
 
 BLAKE2s (XML-LibXML-2.0210.tar.gz) = 93c95821f009eb1272ee2cb483c85e14318f3260ef78a4a7cc5265db86e1b0a6
 SHA512 (XML-LibXML-2.0210.tar.gz) = ae72b25ac6362152fa85ec9fed03fad694382bde29f459e1bd95b3ca4d1b0dffb76d2f8319bc6fbc6e291583696c3b95b41a23cc2bb509ce6f3fd7d74666fd77
 Size (XML-LibXML-2.0210.tar.gz) = 466316 bytes
-SHA1 (patch-dom.c) = b54099c9fe7c879b8d74ddf1cb3ba18d6fb296b4
+SHA1 (patch-LibXML.xs) = b264148c7a3e0407017b773698f6d0a513e8b2f9
+SHA1 (patch-MANIFEST) = a93d88f8acb10c994efa1a209a446f7682692c83
+SHA1 (patch-dom.c) = d22ff372ed0da741f160de897fe797719173aa7f
+SHA1 (patch-dom.h) = 525cf1b057662cdc29440617f867c1c4bb2c7960
 SHA1 (patch-t_06elements.t) = 67c124556766e2afa0c9e364efc68d6815344963
+SHA1 (patch-t_48__security__oob__utf8__gh146.t) = b1b9f0462da2d77008cd3ea8d8aa7866612caa5a

Index: pkgsrc/textproc/p5-XML-LibXML/patches/patch-dom.c
diff -u pkgsrc/textproc/p5-XML-LibXML/patches/patch-dom.c:1.1 pkgsrc/textproc/p5-XML-LibXML/patches/patch-dom.c:1.2
--- pkgsrc/textproc/p5-XML-LibXML/patches/patch-dom.c:1.1       Mon May 11 06:24:02 2026
+++ pkgsrc/textproc/p5-XML-LibXML/patches/patch-dom.c   Mon May 11 17:39:13 2026
@@ -1,4 +1,7 @@
-$NetBSD: patch-dom.c,v 1.1 2026/05/11 06:24:02 wiz Exp $
+$NetBSD: patch-dom.c,v 1.2 2026/05/11 17:39:13 wiz Exp $
+
+fix: validate UTF-8 continuation bytes in domParseChar
+https://github.com/cpan-authors/XML-LibXML/pull/149
 
 From 15652bd905a6c9dda59a81b14d4766adbbae2ea8 Mon Sep 17 00:00:00 2001
 From: Toddr Bot <toddbot%rinaldo.us@localhost>
@@ -27,7 +30,35 @@ Co-Authored-By: Claude Opus 4.6 <noreply
 
 --- dom.c.orig 2017-10-23 08:52:55.000000000 +0000
 +++ dom.c
-@@ -292,6 +292,13 @@ domParseChar( xmlChar *cur, int *len )
+@@ -239,7 +239,7 @@ domReconcileNs(xmlNodePtr tree)
+  * NAME domParseChar
+  * TYPE function
+  * SYNOPSIS
+- *   int utf8char = domParseChar( curchar, &len );
++ *   int utf8char = domParseChar( curchar, &len, remaining );
+  *
+  * The current char value, if using UTF-8 this may actually span
+  * multiple bytes in the given string. This function parses an utf8
+@@ -260,12 +260,14 @@ domReconcileNs(xmlNodePtr tree)
+  *
+  * Returns the current char value and its length
+  *
+- * NOTE: If the character passed to this function is not a UTF
+- * character, the return value will be 0 and the length of the
+- * character is -1!
++ * NOTE: If the character passed to this function is not a valid UTF-8
++ * character (truncated sequence, invalid continuation byte, or
++ * codepoint not allowed by IS_CHAR), the return value will be 0 and
++ * the length will be set to 1 so callers can safely advance past the
++ * bad byte.
+  */
+ int
+-domParseChar( xmlChar *cur, int *len )
++domParseChar( xmlChar *cur, int *len, int remaining )
+ {
+     unsigned char c;
+         unsigned int val;
+@@ -292,6 +294,13 @@ domParseChar( xmlChar *cur, int *len )
          if ((c & 0xe0) == 0xe0) {
              if ((c & 0xf0) == 0xf0) {
                  /* 4-byte code */
@@ -41,7 +72,7 @@ Co-Authored-By: Claude Opus 4.6 <noreply
                  *len = 4;
                  val = (cur[0] & 0x7) << 18;
                  val |= (cur[1] & 0x3f) << 12;
-@@ -299,6 +306,12 @@ domParseChar( xmlChar *cur, int *len )
+@@ -299,6 +308,12 @@ domParseChar( xmlChar *cur, int *len )
                  val |= cur[3] & 0x3f;
              } else {
                  /* 3-byte code */
@@ -54,7 +85,7 @@ Co-Authored-By: Claude Opus 4.6 <noreply
                  *len = 3;
                  val = (cur[0] & 0xf) << 12;
                  val |= (cur[1] & 0x3f) << 6;
-@@ -306,6 +319,11 @@ domParseChar( xmlChar *cur, int *len )
+@@ -306,6 +321,11 @@ domParseChar( xmlChar *cur, int *len )
              }
              } else {
              /* 2-byte code */

Added files:

Index: pkgsrc/textproc/p5-XML-LibXML/patches/patch-LibXML.xs
diff -u /dev/null pkgsrc/textproc/p5-XML-LibXML/patches/patch-LibXML.xs:1.1
--- /dev/null   Mon May 11 17:39:13 2026
+++ pkgsrc/textproc/p5-XML-LibXML/patches/patch-LibXML.xs       Mon May 11 17:39:13 2026
@@ -0,0 +1,46 @@
+$NetBSD: patch-LibXML.xs,v 1.1 2026/05/11 17:39:13 wiz Exp $
+
+fix: validate UTF-8 continuation bytes in domParseChar
+https://github.com/cpan-authors/XML-LibXML/pull/149
+
+--- LibXML.xs.orig     2023-11-29 06:05:01.000000000 +0000
++++ LibXML.xs
+@@ -1001,24 +1001,28 @@ LibXML_test_node_name( xmlChar * name )
+     xmlChar * cur = name;
+     int tc  = 0;
+     int len = 0;
++    int remaining;
+ 
+     if ( cur == NULL || *cur == 0 ) {
+         /* warn("name is empty" ); */
+         return(0);
+     }
+ 
+-    tc = domParseChar( cur, &len );
++    remaining = xmlStrlen(name);
+ 
++    tc = domParseChar( cur, &len, remaining );
++
+     if ( !( IS_LETTER( tc ) || (tc == '_') || (tc == ':')) ) {
+         /* warn( "is not a letter\n" ); */
+         return(0);
+     }
+ 
+     tc  =  0;
++    remaining -= len;
+     cur += len;
+ 
+     while (*cur != 0 ) {
+-        tc = domParseChar( cur, &len );
++        tc = domParseChar( cur, &len, remaining );
+ 
+         if (!(IS_LETTER(tc) || IS_DIGIT(tc) || (tc == '_') ||
+              (tc == '-') || (tc == ':') || (tc == '.') ||
+@@ -1027,6 +1031,7 @@ LibXML_test_node_name( xmlChar * name )
+             return(0);
+         }
+         tc = 0;
++        remaining -= len;
+         cur += len;
+     }
+ 
Index: pkgsrc/textproc/p5-XML-LibXML/patches/patch-MANIFEST
diff -u /dev/null pkgsrc/textproc/p5-XML-LibXML/patches/patch-MANIFEST:1.1
--- /dev/null   Mon May 11 17:39:13 2026
+++ pkgsrc/textproc/p5-XML-LibXML/patches/patch-MANIFEST        Mon May 11 17:39:13 2026
@@ -0,0 +1,15 @@
+$NetBSD: patch-MANIFEST,v 1.1 2026/05/11 17:39:13 wiz Exp $
+
+fix: validate UTF-8 continuation bytes in domParseChar
+https://github.com/cpan-authors/XML-LibXML/pull/149
+
+--- MANIFEST.orig      2024-01-24 15:17:45.000000000 +0000
++++ MANIFEST
+@@ -169,6 +169,7 @@ t/48_rt93429_recover_2_in_html_parsing.t
+ t/48_rt123379_setNamespace.t
+ t/48_rt55000.t
+ t/48_rt93429_recover_2_in_html_parsing.t
++t/48_security_oob_utf8_gh146.t
+ t/48importing_nodes_IDs_rt_69520.t
+ t/49_load_html.t
+ t/49callbacks_returning_undef.t
Index: pkgsrc/textproc/p5-XML-LibXML/patches/patch-dom.h
diff -u /dev/null pkgsrc/textproc/p5-XML-LibXML/patches/patch-dom.h:1.1
--- /dev/null   Mon May 11 17:39:13 2026
+++ pkgsrc/textproc/p5-XML-LibXML/patches/patch-dom.h   Mon May 11 17:39:13 2026
@@ -0,0 +1,35 @@
+$NetBSD: patch-dom.h,v 1.1 2026/05/11 17:39:13 wiz Exp $
+
+fix: validate UTF-8 continuation bytes in domParseChar
+https://github.com/cpan-authors/XML-LibXML/pull/149
+
+--- dom.h.orig 2016-05-30 09:01:59.000000000 +0000
++++ dom.h
+@@ -58,7 +58,7 @@ domReconcileNs(xmlNodePtr tree);
+  * NAME domParseChar
+  * TYPE function
+  * SYNOPSIS
+- *   int utf8char = domParseChar( curchar, &len );
++ *   int utf8char = domParseChar( curchar, &len, remaining );
+  *
+  * The current char value, if using UTF-8 this may actually span
+  * multiple bytes in the given string. This function parses an utf8
+@@ -79,12 +79,14 @@ domReconcileNs(xmlNodePtr tree);
+  *
+  * Returns the current char value and its length
+  *
+- * NOTE: If the character passed to this function is not a UTF
+- * character, the return value will be 0 and the length of the
+- * character is -1!
++ * NOTE: If the character passed to this function is not a valid UTF-8
++ * character (truncated sequence, invalid continuation byte, or
++ * codepoint not allowed by IS_CHAR), the return value will be 0 and
++ * the length will be set to 1 so callers can safely advance past the
++ * bad byte.
+  */
+ int
+-domParseChar( xmlChar *characters, int *len );
++domParseChar( xmlChar *characters, int *len, int remaining );
+ 
+ xmlNodePtr
+ domReadWellBalancedString( xmlDocPtr doc, xmlChar* string, int repair );
Index: pkgsrc/textproc/p5-XML-LibXML/patches/patch-t_48__security__oob__utf8__gh146.t
diff -u /dev/null pkgsrc/textproc/p5-XML-LibXML/patches/patch-t_48__security__oob__utf8__gh146.t:1.1
--- /dev/null   Mon May 11 17:39:13 2026
+++ pkgsrc/textproc/p5-XML-LibXML/patches/patch-t_48__security__oob__utf8__gh146.t      Mon May 11 17:39:13 2026
@@ -0,0 +1,118 @@
+$NetBSD: patch-t_48__security__oob__utf8__gh146.t,v 1.1 2026/05/11 17:39:13 wiz Exp $
+
+fix: validate UTF-8 continuation bytes in domParseChar
+https://github.com/cpan-authors/XML-LibXML/pull/149
+
+--- t/48_security_oob_utf8_gh146.t.orig        2026-05-11 17:36:06.144804837 +0000
++++ t/48_security_oob_utf8_gh146.t
+@@ -0,0 +1,110 @@
++# Security regression test for GitHub issue #146:
++# Out-of-bounds heap read in domParseChar on truncated UTF-8 sequences.
++#
++# domParseChar() read continuation bytes for multi-byte UTF-8 sequences
++# without verifying they exist or are valid. A truncated sequence (e.g.,
++# "a\xF0") caused reads past the NUL terminator into uninitialized heap
++# memory. This affects all DOM methods that validate node names via
++# LibXML_test_node_name(): createElement, createAttribute, setNodeName,
++# createElementNS, createAttributeNS, etc.
++#
++# Impact: denial of service (crash on unmapped memory) and potential
++# information disclosure (reading adjacent heap allocations).
++#
++# Before the fix, these inputs triggered undefined behavior — the
++# function read continuation bytes blindly, producing a garbage
++# codepoint and advancing the pointer past the buffer into heap memory.
++# After the fix, domParseChar rejects invalid/truncated sequences by
++# returning 0 with *len = 1, and the caller rejects the name.
++
++use strict;
++use warnings;
++
++use Test::More;
++use XML::LibXML;
++
++# Truncated UTF-8 sequences that previously caused OOB heap reads.
++# Each entry: [ bytes, description ]
++#
++# The leading "a" is a valid ASCII char so domParseChar succeeds on the
++# first character, then LibXML_test_node_name loops and hits the
++# truncated sequence on the second call — this is what triggered the
++# OOB read: len was set to 2/3/4 but the actual bytes weren't there.
++my @truncated_sequences = (
++    [ "a\xC0",             "truncated 2-byte (leader only)" ],
++    [ "a\xC2",             "truncated 2-byte (valid leader, missing continuation)" ],
++    [ "a\xE0",             "truncated 3-byte (leader only)" ],
++    [ "a\xE0\x80",         "truncated 3-byte (leader + 1 continuation)" ],
++    [ "a\xF0",             "truncated 4-byte (leader only)" ],
++    [ "a\xF0\x80",         "truncated 4-byte (leader + 1 continuation)" ],
++    [ "a\xF0\x80\x80",     "truncated 4-byte (leader + 2 continuations)" ],
++);
++
++# Invalid continuation bytes — the leader is valid but the continuations
++# are not 10xxxxxx. Before the fix, these were read without validation,
++# producing a garbage codepoint and advancing the pointer incorrectly.
++my @invalid_continuations = (
++    [ "a\xC2\x41",         "2-byte with ASCII continuation" ],
++    [ "a\xE0\x41\x80",     "3-byte with ASCII in first continuation" ],
++    [ "a\xE0\x80\x41",     "3-byte with ASCII in second continuation" ],
++    [ "a\xF0\x41\x80\x80", "4-byte with ASCII in first continuation" ],
++    [ "a\xF0\x80\x41\x80", "4-byte with ASCII in second continuation" ],
++    [ "a\xF0\x80\x80\x41", "4-byte with ASCII in third continuation" ],
++);
++
++my @all_bad = (@truncated_sequences, @invalid_continuations);
++
++# Methods that croak on invalid names
++# TEST:$bad_count=13
++# TEST:$croak_methods=3
++my @croak_methods = qw( createElement setNodeName createElementNS );
++
++# Methods that return undef on invalid names (no exception)
++# TEST:$undef_methods=2
++my @undef_methods = qw( createAttribute createAttributeNS );
++
++plan tests => scalar(@all_bad) * (scalar(@croak_methods) + scalar(@undef_methods));
++
++my $doc  = XML::LibXML::Document->new();
++my $nsURI = "http://example.com/ns";;
++
++for my $case (@all_bad) {
++    my ($bytes, $desc) = @$case;
++
++    # Methods that die on bad names
++    for my $method (@croak_methods) {
++        my $died = 0;
++        eval {
++            if ($method eq 'createElement') {
++                $doc->createElement($bytes);
++            }
++            elsif ($method eq 'setNodeName') {
++                my $node = $doc->createElement("tmp");
++                $node->setNodeName($bytes);
++            }
++            elsif ($method eq 'createElementNS') {
++                $doc->createElementNS($nsURI, $bytes);
++            }
++        };
++        $died = 1 if $@;
++
++        # TEST*$bad_count*$croak_methods
++        ok($died, "$method dies on $desc");
++    }
++
++    # Methods that return undef on bad names
++    for my $method (@undef_methods) {
++        my $result;
++        eval {
++            if ($method eq 'createAttribute') {
++                $result = $doc->createAttribute($bytes, "value");
++            }
++            elsif ($method eq 'createAttributeNS') {
++                $result = $doc->createAttributeNS($nsURI, $bytes, "value");
++            }
++        };
++
++        # TEST*$bad_count*$undef_methods
++        ok(!defined $result, "$method returns undef on $desc");
++    }
++}

Prev by Date: CVS commit: pkgsrc/doc
Next by Date: CVS commit: pkgsrc/doc
Previous by Thread: CVS commit: pkgsrc/doc
Next by Thread: CVS commit: pkgsrc/doc
Indexes:

Home | Main Index | Thread Index | Old Index