Subject: Switching from old-style getopt to new-style one
To: None <tech-userlevel@netbsd.org>
From: Thomas Klausner <wiz@danbala.ifoer.tuwien.ac.at>
List: tech-userlevel
Date: 11/02/2000 03:52:28
--Fba/0zbH8Xs+Fj9o
Content-Type: text/plain; charset=us-ascii

Hi!

Since getopt_long in NetBSD's base system seems to be working without
problems now, I'd like to use its code to replace the current getopt
function, too. The hooks are already there, but dependent on
REPLACE_GETOPT.

One of the major improvements this would bring is that in the
old-style getopt, with an option string of "a:b", a call like
	program -a file file2 -b file3
would not handle '-b' as an option; the program would get
	file2 -b file3
as options it has to parse itself. The new-style getopt by default
handles this command line like
	program -a file -b file2 file3
and modifies the argv pointers so that the program gets
	file2 file3
as remaining arguments.

The old-style getopt is still an option, just use a '+' as the first
character of the option string; or use the command line option '--',
which stops argument parsing at this position.

Most of the programs in the base systems using getopt will
automatically use the new functionality. There are problems with some
existing format strings though: Some programs have (mostly obsolete)
'-' options in their format string -- but '-' as the first character
has a different meaning for the new-style getopt, for compatibility
reasons with GNU getopt: It means that non-option arguments should be
returned to the getopt-caller as option arguments to the option
'\0x1'.  Fortunately, there's an easy workaround: move the '-' away
from the first position, e.g. to the last.

Another issue are programs that want a special ordering of their
arguments, e.g. the absolutely getopt-incompatible find (which
nevertheless uses getopt for some of its options) -- in this case, a
'+' has to be added at the beginning of the option string.

I eyeballed once through a `grep -r getopt .../cvs/src/*` and tried to
find all the cases where either of the problems instantiates; if I
missed any, please yell.

Attached is a patch that changes libc to use the new-style getopt and
fixes all of the base system tools (if I didn't miss anything), and
also modifies cvs to use NetBSD's getopt instead of its own, as a
bonus.

Comments welcome; I'd like to commit this soon.

Bye,
 Thomas

P.S.: Please CC, not on this list.

-- 
Thomas Klausner - wiz@danbala.tuwien.ac.at
I think...I think it's in my basement. Let me go upstairs and check.
 -- M.C. Escher (1898-1972)

--Fba/0zbH8Xs+Fj9o
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename=getopt-diff

Index: lib/libc/stdlib/Makefile.inc
===================================================================
RCS file: /cvsroot/basesrc/lib/libc/stdlib/Makefile.inc,v
retrieving revision 1.47
diff -u -r1.47 Makefile.inc
--- lib/libc/stdlib/Makefile.inc	2000/04/02 20:27:58	1.47
+++ lib/libc/stdlib/Makefile.inc	2000/11/02 02:19:44
@@ -7,7 +7,7 @@
 SRCS+=	_rand48.c _strtoll.c _strtoull.c \
 	a64l.c abort.c atexit.c atof.c atoi.c atol.c atoll.c \
 	bsearch.c calloc.c drand48.c erand48.c exit.c \
-	getenv.c getopt.c getopt_long.c getsubopt.c heapsort.c jrand48.c \
+	getenv.c getopt_long.c getsubopt.c heapsort.c jrand48.c \
 	l64a.c lldiv.c lcong48.c lrand48.c malloc.c merge.c mrand48.c \
 	multibyte.c nrand48.c putenv.c qabs.c qdiv.c qsort.c radixsort.c \
 	rand.c rand_r.c random.c seed48.c setenv.c srand48.c strtod.c \
Index: lib/libc/stdlib/getopt_long.c
===================================================================
RCS file: /cvsroot/basesrc/lib/libc/stdlib/getopt_long.c,v
retrieving revision 1.8
diff -u -r1.8 getopt_long.c
--- lib/libc/stdlib/getopt_long.c	2000/08/25 17:05:49	1.8
+++ lib/libc/stdlib/getopt_long.c	2000/11/02 02:19:44
@@ -50,7 +50,6 @@
 #include <string.h>
 #include <getopt.h>
 
-#ifdef REPLACE_GETOPT
 #ifdef __weak_alias
 __weak_alias(getopt,_getopt)
 #endif
@@ -59,7 +58,6 @@
 int	optopt = '?';		/* character checked for validity */
 int	optreset;		/* reset getopt */
 char    *optarg;		/* argument associated with option */
-#endif
 
 #ifdef __weak_alias
 __weak_alias(getopt_long,_getopt_long)
@@ -69,7 +67,8 @@
 #define IGNORE_FIRST	(*options == '-' || *options == '+')
 #define PRINT_ERROR	((opterr) && ((*options != ':') \
 				      || (IGNORE_FIRST && options[1] != ':')))
-#define IS_POSIXLY_CORRECT (getenv("POSIXLY_CORRECT") != NULL)
+#define IS_POSIXLY_CORRECT (*options == '+' \
+			    || getenv("POSIXLY_CORRECT") != NULL)
 #define PERMUTE         (!IS_POSIXLY_CORRECT && !IGNORE_FIRST)
 /* XXX: GNU ignores PC if *options == '-' */
 #define IN_ORDER        (!IS_POSIXLY_CORRECT && *options == '-')
@@ -306,12 +305,10 @@
 	return optchar;
 }
 
-#ifdef REPLACE_GETOPT
 /*
  * getopt --
  *	Parse argc/argv argument vector.
  *
- * [eventually this will replace the real getopt]
  */
 int
 getopt(nargc, nargv, options)
@@ -337,7 +334,6 @@
 	}
 	return retval;
 }
-#endif
 
 /*
  * getopt_long --
Index: usr.bin/find/main.c
===================================================================
RCS file: /cvsroot/basesrc/usr.bin/find/main.c,v
retrieving revision 1.17
diff -u -r1.17 main.c
--- usr.bin/find/main.c	2000/08/04 09:01:05	1.17
+++ usr.bin/find/main.c	2000/11/02 02:19:45
@@ -89,7 +89,7 @@
 	p = start = alloca(argc * sizeof (char *));
 
 	ftsoptions = FTS_NOSTAT | FTS_PHYSICAL;
-	while ((ch = getopt(argc, argv, "HLPXdf:hsx")) != -1)
+	while ((ch = getopt(argc, argv, "+HLPXdf:hsx")) != -1)
 		switch (ch) {
 		case 'H':
 			ftsoptions &= ~FTS_LOGICAL;
Index: usr.bin/man/man.c
===================================================================
RCS file: /cvsroot/basesrc/usr.bin/man/man.c,v
retrieving revision 1.25
diff -u -r1.25 man.c
--- usr.bin/man/man.c	2000/06/12 14:53:48	1.25
+++ usr.bin/man/man.c	2000/11/02 02:19:45
@@ -100,7 +100,7 @@
 
 	f_cat = f_how = 0;
 	sectionname = pathsearch = conffile = p_add = p_path = NULL;
-	while ((ch = getopt(argc, argv, "-aC:cfhkM:m:P:s:S:w")) != -1)
+	while ((ch = getopt(argc, argv, "aC:cfhkM:m:P:s:S:w-")) != -1)
 		switch (ch) {
 		case 'a':
 			f_all = 1;
Index: gnu/usr.bin/cvs/libcvs/Makefile
===================================================================
RCS file: /cvsroot/gnusrc/gnu/usr.bin/cvs/libcvs/Makefile,v
retrieving revision 1.2
diff -u -r1.2 Makefile
--- gnu/usr.bin/cvs/libcvs/Makefile	2000/09/04 22:14:31	1.2
+++ gnu/usr.bin/cvs/libcvs/Makefile	2000/11/02 02:19:45
@@ -6,7 +6,7 @@
 DIST=	${IDIST}/lib
 .PATH:	${DIST}
 
-SRCS=	argmatch.c dup2.c getdate.y getline.c getopt.c getopt1.c \
+SRCS=	argmatch.c dup2.c getdate.y getline.c \
 	md5.c regex.c savecwd.c sighandle.c stripslash.c xgetwd.c \
 	yesno.c
 
Index: gnu/dist/cvs/diff/diff.c
===================================================================
RCS file: /cvsroot/gnusrc/gnu/dist/cvs/diff/diff.c,v
retrieving revision 1.1.1.1
diff -u -r1.1.1.1 diff.c
--- gnu/dist/cvs/diff/diff.c	2000/09/04 21:42:48	1.1.1.1
+++ gnu/dist/cvs/diff/diff.c	2000/11/02 02:19:47
@@ -21,7 +21,7 @@
 #define GDIFF_MAIN
 #include "diff.h"
 #include <signal.h>
-#include "getopt.h"
+#include <getopt.h>
 #include "fnmatch.h"
 
 #ifndef DEFAULT_WIDTH
Index: gnu/dist/cvs/diff/diff3.c
===================================================================
RCS file: /cvsroot/gnusrc/gnu/dist/cvs/diff/diff3.c,v
retrieving revision 1.1.1.2
diff -u -r1.1.1.2 diff3.c
--- gnu/dist/cvs/diff/diff3.c	2000/10/26 13:51:06	1.1.1.2
+++ gnu/dist/cvs/diff/diff3.c	2000/11/02 02:19:49
@@ -19,7 +19,7 @@
 #include "system.h"
 #include <stdio.h>
 #include <setjmp.h>
-#include "getopt.h"
+#include <getopt.h>
 #include "diffrun.h"
 
 /* diff3.c has a real initialize_main function. */
Index: gnu/dist/cvs/src/cvs.h
===================================================================
RCS file: /cvsroot/gnusrc/gnu/dist/cvs/src/cvs.h,v
retrieving revision 1.4
diff -u -r1.4 cvs.h
--- gnu/dist/cvs/src/cvs.h	2000/10/26 14:59:08	1.4
+++ gnu/dist/cvs/src/cvs.h	2000/11/02 02:19:50
@@ -92,7 +92,7 @@
 #endif /* MY_NDBM */
 
 #include "regex.h"
-#include "getopt.h"
+#include <getopt.h>
 #include "wait.h"
 
 #include "rcs.h"
Index: usr.bin/calendar/calendar.c
===================================================================
RCS file: /cvsroot/basesrc/usr.bin/calendar/calendar.c,v
retrieving revision 1.22
diff -u -r1.22 calendar.c
--- usr.bin/calendar/calendar.c	2000/04/14 06:07:14	1.22
+++ usr.bin/calendar/calendar.c	2000/11/02 02:19:51
@@ -101,7 +101,7 @@
 	int ch;
 	const char *caldir;
 
-	while ((ch = getopt(argc, argv, "-ad:f:l:w:")) != -1)
+	while ((ch = getopt(argc, argv, "ad:f:l:w:-")) != -1)
 		switch (ch) {
 		case '-':		/* backward contemptible */
 		case 'a':
Index: usr.bin/env/env.c
===================================================================
RCS file: /cvsroot/basesrc/usr.bin/env/env.c,v
retrieving revision 1.13
diff -u -r1.13 env.c
--- usr.bin/env/env.c	2000/04/14 06:11:07	1.13
+++ usr.bin/env/env.c	2000/11/02 02:19:51
@@ -65,7 +65,7 @@
 
 	setlocale(LC_ALL, "");
 
-	while ((ch = getopt(argc, argv, "-i")) != -1)
+	while ((ch = getopt(argc, argv, "i-")) != -1)
 		switch((char)ch) {
 		case '-':			/* obsolete */
 		case 'i':
Index: usr.bin/split/split.c
===================================================================
RCS file: /cvsroot/basesrc/usr.bin/split/split.c,v
retrieving revision 1.8
diff -u -r1.8 split.c
--- usr.bin/split/split.c	1999/11/02 10:55:47	1.8
+++ usr.bin/split/split.c	2000/11/02 02:19:51
@@ -79,7 +79,7 @@
 	int ch;
 	char *ep, *p;
 
-	while ((ch = getopt(argc, argv, "-0123456789b:l:")) != -1)
+	while ((ch = getopt(argc, argv, "0123456789b:l:-")) != -1)
 		switch (ch) {
 		case '0': case '1': case '2': case '3': case '4':
 		case '5': case '6': case '7': case '8': case '9':
Index: usr.bin/su/su.c
===================================================================
RCS file: /cvsroot/basesrc/usr.bin/su/su.c,v
retrieving revision 1.44
diff -u -r1.44 su.c
--- usr.bin/su/su.c	2000/09/09 18:13:05	1.44
+++ usr.bin/su/su.c	2000/11/02 02:19:52
@@ -90,12 +90,12 @@
 
 #if defined(KERBEROS) || defined(KERBEROS5)
 
-#define	ARGSTRX	"-Kflm"
+#define	ARGSTRX	"Kflm-"
 
 int use_kerberos = 1;
 
 #else
-#define	ARGSTRX	"-flm"
+#define	ARGSTRX	"flm-"
 #endif
 
 #ifndef	SUGROUP
Index: usr.bin/tset/tset.c
===================================================================
RCS file: /cvsroot/basesrc/usr.bin/tset/tset.c,v
retrieving revision 1.12
diff -u -r1.12 tset.c
--- usr.bin/tset/tset.c	2000/10/11 14:46:21	1.12
+++ usr.bin/tset/tset.c	2000/11/02 02:19:52
@@ -103,7 +103,7 @@
 
 	obsolete(argv);
 	noinit = noset = quiet = Sflag = sflag = showterm = extended = 0;
-	while ((ch = getopt(argc, argv, "-a:d:e:EIi:k:m:np:QSrs")) != -1) {
+	while ((ch = getopt(argc, argv, "a:d:e:EIi:k:m:np:QSrs-")) != -1) {
 		switch (ch) {
 		case '-':		/* display term only */
 			noset = 1;
Index: usr.bin/uniq/uniq.c
===================================================================
RCS file: /cvsroot/basesrc/usr.bin/uniq/uniq.c,v
retrieving revision 1.9
diff -u -r1.9 uniq.c
--- usr.bin/uniq/uniq.c	1998/12/19 23:23:49	1.9
+++ usr.bin/uniq/uniq.c	2000/11/02 02:19:52
@@ -81,7 +81,7 @@
 
 	ifp = ofp = NULL;
 	obsolete(argv);
-	while ((ch = getopt(argc, argv, "-cdf:s:u")) != -1)
+	while ((ch = getopt(argc, argv, "cdf:s:u-")) != -1)
 		switch (ch) {
 		case '-':
 			--optind;
Index: usr.bin/xstr/xstr.c
===================================================================
RCS file: /cvsroot/basesrc/usr.bin/xstr/xstr.c,v
retrieving revision 1.11
diff -u -r1.11 xstr.c
--- usr.bin/xstr/xstr.c	1999/11/09 15:06:38	1.11
+++ usr.bin/xstr/xstr.c	2000/11/02 02:19:52
@@ -108,7 +108,7 @@
 {
 	int c;
 
-	while ((c = getopt(argc, argv, "-cvl:")) != -1)
+	while ((c = getopt(argc, argv, "cvl:-")) != -1)
 		switch (c) {
 		case '-':
 			readstd++;

--Fba/0zbH8Xs+Fj9o--