tech-userlevel archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

split(1): add '-c' to continue creating files



Hello,

Currently, split(1) will clobber any existing output
files:

$ split file; ls
xaa xab xac xad
$ split second-file; ls
xaa xab xac xad xae xaf

I often would like for it to pick up where it left off
and continue creating files in the sequence.  Right
now, there is no good way for me to yield the desired
output of "xaa xab xac xad xae xaf xag xah xai xaj".

The attached diff adds a flag "-c" (mnemonic "create,
don't overwrite" or "continue where you left off"):

$ split file; ls
xaa xab xac xad
$ split -c second-file; ls
xaa xab xac xad xae xaf xag xah xai xaj

Any objections?

-Jan
Index: split.1
===================================================================
RCS file: /cvsroot/src/usr.bin/split/split.1,v
retrieving revision 1.16
diff -u -p -r1.16 split.1
--- split.1	30 Jan 2023 15:22:02 -0000	1.16
+++ split.1	12 Feb 2023 20:57:09 -0000
@@ -29,7 +29,7 @@
 .\"
 .\"	@(#)split.1	8.3 (Berkeley) 4/16/94
 .\"
-.Dd January 28, 2023
+.Dd February 12, 2023
 .Dt SPLIT 1
 .Os
 .Sh NAME
@@ -37,6 +37,7 @@
 .Nd split a file into pieces
 .Sh SYNOPSIS
 .Nm
+.Op Fl c
 .Op Fl a Ar suffix_length
 .Oo
 .Fl b Ar byte_count Ns Oo Li k|m Oc |
@@ -78,6 +79,9 @@ If
 is appended to the number, the file is split into
 .Ar byte_count
 megabyte pieces.
+.It Fl c
+Continue creating files and do not overwrite existing
+output files.
 .It Fl l
 Create smaller files
 .Ar line_count
@@ -111,6 +115,16 @@ If the
 argument is not specified,
 .Ql x
 is used.
+.Pp
+By default,
+.Nm
+will overwrite any existing output files.
+If the
+.Fl c
+flag is specified,
+.Nm
+will instead continue to generate output file names
+until it finds one that does not already exist.
 .Sh STANDARDS
 The
 .Nm
Index: split.c
===================================================================
RCS file: /cvsroot/src/usr.bin/split/split.c,v
retrieving revision 1.30
diff -u -p -r1.30 split.c
--- split.c	12 Feb 2023 20:43:21 -0000	1.30
+++ split.c	12 Feb 2023 20:57:09 -0000
@@ -56,6 +56,7 @@ __RCSID("$NetBSD: split.c,v 1.30 2023/02
 
 #define DEFLINE	1000		/* Default num lines per file. */
 
+static int clobber = 1;         /* Whether to overwrite existing output files. */
 static int file_open;		/* If a file is open. */
 static int ifd = STDIN_FILENO, ofd = -1; /* Input/output file descriptors. */
 static char *fname;		/* File name prefix. */
@@ -79,7 +80,7 @@ main(int argc, char *argv[])
 	off_t numlines = 0;	/* Line count to split on. */
 	off_t chunks = 0;	/* Number of chunks to split into. */
 
-	while ((ch = getopt(argc, argv, "0123456789b:l:a:n:")) != -1)
+	while ((ch = getopt(argc, argv, "0123456789b:cl:a:n:")) != -1)
 		switch (ch) {
 		case '0': case '1': case '2': case '3': case '4':
 		case '5': case '6': case '7': case '8': case '9':
@@ -115,6 +116,9 @@ main(int argc, char *argv[])
 			else if (*ep == 'm')
 				bytecnt *= 1024 * 1024;
 			break;
+		case 'c':               /* Continue, don't overwrite output files. */
+			clobber = 0;
+			break;
 		case 'l':		/* Line count. */
 			if (numlines != 0)
 				usage();
@@ -324,6 +328,7 @@ newfile(void)
 	} else if (close(ofd) != 0)
 		err(1, "%s", fname);
 
+again:
 	quot = fnum;
 
 	/* If '-a' is not specified, then we automatically expand the
@@ -364,6 +369,11 @@ newfile(void)
 	if (quot > 0)
 		errx(1, "too many files.");
 	++fnum;
+
+	if (!clobber && (access(fname, F_OK) == 0)) {
+		goto again;
+	}
+
 	if ((ofd = open(fname, O_WRONLY | O_CREAT | O_TRUNC, DEFFILEMODE)) < 0)
 		err(1, "%s", fname);
 }


Home | Main Index | Thread Index | Old Index