Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/distrib/notes/common Detect more ad clauses, pick more dupli...



details:   https://anonhg.NetBSD.org/src/rev/d81208f1b021
branches:  trunk
changeset: 465733:d81208f1b021
user:      uwe <uwe%NetBSD.org@localhost>
date:      Tue Dec 03 01:24:45 2019 +0000

description:
Detect more ad clauses, pick more duplicates.

If there are notices that differ in the final dot, prefer the version
with the dot.  Sort "by the" enries after all other "by Name" entries.
With the corresponding re-ordering of the legal.common the output of
this script is getting useful again.

diffstat:

 distrib/notes/common/extract-contrib-string.pl |  79 ++++++++++---------------
 1 files changed, 33 insertions(+), 46 deletions(-)

diffs (136 lines):

diff -r 808e66737599 -r d81208f1b021 distrib/notes/common/extract-contrib-string.pl
--- a/distrib/notes/common/extract-contrib-string.pl    Tue Dec 03 01:19:33 2019 +0000
+++ b/distrib/notes/common/extract-contrib-string.pl    Tue Dec 03 01:24:45 2019 +0000
@@ -50,7 +50,7 @@
 
 
 $ack_line1="[aA]ll( commercial)?( marketing or)? advertising materials mentioning( features)?";
-$ack_line2="display the following( acknowledge?ment)?";
+$ack_line2="display the( following)?( acknowledge?ment)?";
 $ack_endline=
       '(\d\.\s*(Neither the name'
     .         '|The name of the company nor the name'  # Wasn't my idea
@@ -59,11 +59,15 @@
     .         '|The names? (of )?.* nor the names? of'
     .         '|The names? (of )?.* or any of it\'?s members'
     .         '|Redistributions of any form whatsoever'
-    .         '|The names .*"OpenSSL Toolkit.*" and .*"OpenSSL Project.*" must not be used))'
+    .         '|The names .*"OpenSSL Toolkit.*" and .*"OpenSSL Project.*" must not be used'
+    .         "|Urbana-Champaign Independent Media Center's name"
+    . '))'
+    .'|(^Neither the name)'
     .'|(THIS SOFTWARE IS PROVIDED)'
     .'|(The word \'cryptographic\' can be left out if)'
     .'|(may be used to endorse)'
     .'|(@end cartouche)'
+    .'|(</para>)'
     .'|(Redistribution and use in source and binary forms)'
     .'|(may not be used to endorse)'
     .'|(\.IP 4)'
@@ -177,6 +181,11 @@
                        $msg =~ s/''.*$//o;
                }
 
+               # XXX: pcap &c - add to known_bad_clause_3_wording but
+               # that code seems to have problems.  Easier to add a
+               # hack here, shouldn't affect good clause 3.
+               $msg =~ s/''\s+Neither the name.*$//;
+
                # *roff
                while ($msg =~ /^\.\\"\s*/) {
                        $msg =~ s/^\.\\"\s*//o;
@@ -259,52 +268,27 @@
                        print "$msg";
                        print "\n\n";
                    }
-                   
-                   # Figure out if there's a version w/ or w/o trailing dot
-                   # 
-                   if ($msg =~ /\.$/) {
-                       # check if there's a version of the same msg
-                       # w/o a trailing dot
-                       $msg2=$msg;
-                       $msg2=~s,\.$,,;
-                       if ($copyrights{"$msg2"}) {
-                           # already there - skip
-                           print "already there, w/o dot - skipping!\n"
-                               if $debug;
-                           next msg;
+
+                   my $key = lc($msg); # ignore difference in case
+                   $key =~ s/\n/ /g;   # ignore difference in line breaks
+                   $key =~ s/\.$//g;   # drop the final dot
+
+                   # push organizations ("by the") to the end of the
+                   # sorting order
+                   $key =~ s/(developed by) the/$1 ~the/;
+
+                   if (defined $copyrights{$key}) {
+                       if ($copyrights{$key} !~ /\.$/ && $msg =~ /\.$/) {
+                           print "already there, without dot - overriding!\n"
+                               if 1 || $debug;
                        }
-                       
-                       # ... maybe with other case?
-                       $lc_msg2=lc($msg2);
-                       if ($lc_copyrights{$lc_msg2}) {
-                           print "already there, in different case - skipping\n"
-                               if $debug;
-                           next msg;
-                       }
-                   } else {
-                       # check if there's a version of the same msg
-                       # with a trailing dot
-                       $msg2=$msg;
-                       $msg2.=".";
-                       if ($copyrights{"$msg2"}) {
-                           # already there - skip
-                           print "already there, w/ dot - skipping!\n"
-                               if $debug;
-                           next msg;
-                       }
-                       
-                       # ... maybe with other case?
-                       $lc_msg2=lc($msg2);
-                       if ($lc_copyrights{$lc_msg2}) {
-                           print "already there, in different case - skipping\n"
-                               if $debug;
+                       else {
                            next msg;
                        }
                    }
 
-                   $copyrights{$msg} = 1;
-                   $lc_copyrights{lc($msg)} = 1;
-               }                
+                   $copyrights{$key} = $msg;
+               }
 
            } else {
                print "?> $_" if $debug;
@@ -322,19 +306,22 @@
 
 if ($html) {
     print "<ul>\n";
-    foreach $msg (sort keys %copyrights) {
+    foreach $key (sort keys %copyrights) {
+       my $msg = $copyrights{$key};
        print "<li>$msg</li>\n";
     }
     print "</ul>\n";
 } elsif ($xml) {
-    foreach $msg (sort keys %copyrights) {
+    foreach $key (sort keys %copyrights) {
+       my $msg = $copyrights{$key};
        print "<listitem>$msg</listitem>\n";
     }
 } else {
     print "------------------------------------------------------------\n";
 
     $firsttime=1;
-    foreach $msg (sort keys %copyrights) {
+    foreach $key (sort keys %copyrights) {
+       my $msg = $copyrights{$key};
        if ($firsttime) {
            $firsttime=0;
        } else {



Home | Main Index | Thread Index | Old Index