Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/tests/usr.bin/grep tests/grep: demonstrate word search bug i...



details:   https://anonhg.NetBSD.org/src/rev/2a72024bbb7f
branches:  trunk
changeset: 1023267:2a72024bbb7f
user:      rillig <rillig%NetBSD.org@localhost>
date:      Mon Aug 30 23:10:58 2021 +0000

description:
tests/grep: demonstrate word search bug in usr.bin/grep

By default, MKBSDGREP is "no", therefore GNU Grep is installed as
/usr/bin/grep instead of src/usr.bin/grep, and this bug does not occur
in practice.

diffstat:

 tests/usr.bin/grep/t_grep.sh |  86 ++++++++++++++++++++++++++++++++++++++++++-
 1 files changed, 84 insertions(+), 2 deletions(-)

diffs (111 lines):

diff -r 2f73f15bec45 -r 2a72024bbb7f tests/usr.bin/grep/t_grep.sh
--- a/tests/usr.bin/grep/t_grep.sh      Mon Aug 30 22:57:33 2021 +0000
+++ b/tests/usr.bin/grep/t_grep.sh      Mon Aug 30 23:10:58 2021 +0000
@@ -1,4 +1,4 @@
-# $NetBSD: t_grep.sh,v 1.4 2021/08/30 22:17:32 rillig Exp $
+# $NetBSD: t_grep.sh,v 1.5 2021/08/30 23:10:58 rillig Exp $
 #
 # Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
 # All rights reserved.
@@ -104,11 +104,91 @@
        atf_check -o file:"input" \
            env LC_ALL=C grep -w "array" "input"
 
-       # XXX: In an UTF-8 locale, '[' seems to be a word character.
+       # XXX: In an UTF-8 locale, GNU Grep treats '[' as a word character.
        atf_check -s exit:1 -o empty \
            env LC_ALL="C.UTF-8" grep -w "array" "input"
 }
 
+atf_test_case word_in_line
+word_in_line_head()
+{
+       atf_set "descr" "Checks word search at the beginning of a line"
+}
+word_in_line_body()
+{
+       # See usr.bin/grep/util.c, "Check for whole word match", which
+       # looks suspiciously wrong.  And indeed, NetBSD grep does not
+       # survive this test.  GNU Grep does.
+
+       echo "begin middle end" > "input"
+
+       # A word at the beginning of a line is found.
+       atf_check -o file:"input" \
+           env LC_ALL=C grep -w "begin" "input"
+
+       # A word in the middle of a line is found.
+       atf_check -o file:"input" \
+           env LC_ALL=C grep -w "middle" "input"
+
+       # A word at the end of a line is found.
+       atf_check -o file:"input" \
+           env LC_ALL=C grep -w "end" "input"
+
+       # A subword at the beginning of a line is not found.
+       atf_check -s exit:1 -o empty \
+           env LC_ALL=C grep -w "be" "input"
+
+       # A subword in the middle of a line is not found.
+       atf_check -s exit:1 -o empty \
+           env LC_ALL=C grep -w "mid" "input"
+       atf_check -s exit:1 -o empty \
+           env LC_ALL=C grep -w "dle" "input"
+
+       # A subword at the end of a line is not found.
+       atf_check -s exit:1 -o empty \
+           env LC_ALL=C grep -w "nd" "input"
+}
+
+atf_test_case word_in_line_utf8
+word_in_line_utf8_head()
+{
+       atf_set "descr" "Checks word search at the beginning of a line"
+}
+word_in_line_utf8_body()
+{
+       # See usr.bin/grep/util.c, "Check for whole word match", which
+       # looks suspiciously wrong.  And indeed, NetBSD grep does not
+       # survive this test.  GNU Grep does.
+
+       echo "begin middle end" > "input"
+
+       # A word at the beginning of a line is found.
+       atf_check -o file:"input" \
+           env LC_ALL="C.UTF-8" grep -w "begin" "input"
+
+       # A word in the middle of a line is found.
+       atf_check -o file:"input" \
+           env LC_ALL="C.UTF-8" grep -w "middle" "input"
+
+       # A word at the end of a line is found.
+       atf_check -o file:"input" \
+           env LC_ALL="C.UTF-8" grep -w "end" "input"
+
+       # A subword at the beginning of a line is not found.
+       atf_check -s exit:1 -o empty \
+           env LC_ALL="C.UTF-8" grep -w "be" "input"
+
+       # A subword in the middle of a line is not found.
+       atf_check -s exit:1 -o empty \
+           env LC_ALL="C.UTF-8" grep -w "mid" "input"
+       atf_check -s exit:1 -o empty \
+           env LC_ALL="C.UTF-8" grep -w "dle" "input"
+
+       # A subword at the end of a line is not found.
+       atf_check -s exit:1 -o empty \
+           env LC_ALL="C.UTF-8" grep -w "nd" "input"
+}
+
 atf_test_case begin_end
 begin_end_head()
 {
@@ -255,6 +335,8 @@
        atf_add_test_case recurse_symlink
        atf_add_test_case word_regexps
        atf_add_test_case word_locale
+       atf_add_test_case word_in_line
+       atf_add_test_case word_in_line_utf8
        atf_add_test_case begin_end
        atf_add_test_case ignore_case
        atf_add_test_case invert



Home | Main Index | Thread Index | Old Index