Subject: Re: split into n chunks
To: None <tech-userlevel@netbsd.org>
From: Jan Schaumann <jschauma@netmeister.org>
List: tech-userlevel
Date: 05/29/2007 21:20:46
--IDYEmSnFhs3mNXr+
Content-Type: multipart/mixed; boundary="JgQwtEuHJzHdouWu"
Content-Disposition: inline


--JgQwtEuHJzHdouWu
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
Content-Transfer-Encoding: quoted-printable

Jachym Holecek <freza@NetBSD.org> wrote:
=20
> # Jan Schaumann 2007-05-28:
> > I've had the need to split a file into N chunks.  Now I could do the
> > math myself by looking at the file size and the specifying "-b" with the
> > appropriate number, but I figured it might be useful to let split(1) do
> > this for me.
> >=20
> > The attached diff adds the "-n chunk_count" flag to split(1).
=20
> Considering
>=20
>   st_size =3D 100
>   chunks =3D 6
>=20
> yields
>=20
>   bcnt =3D 100/6 =3D 16
>   bcnt =3D 16 + 100%6 =3D 16 + 4 =3D 20
>=20
> thus five files would be created -- not what the user asked for.

Ah, yes.  Here's another patch.

-Jan

--=20
Ancient Principle of WYGIWYGAINGW:
What You Get Is What You're Given, And It's No Good Whining.
--Terry Pratchett et al. (The Science of Discworld)

--JgQwtEuHJzHdouWu
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename=diff
Content-Transfer-Encoding: quoted-printable

Index: split.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
RCS file: /cvsroot/src/usr.bin/split/split.c,v
retrieving revision 1.21
diff -b -u -r1.21 split.c
--- split.c	5 Jan 2004 23:23:37 -0000	1.21
+++ split.c	30 May 2007 04:18:48 -0000
@@ -62,8 +62,9 @@
=20
 int  main(int, char **);
 static void newfile(void);
-static void split1(off_t);
+static void split1(off_t, int);
 static void split2(off_t);
+static void split3(off_t);
 static void usage(void) __attribute__((__noreturn__));
 static size_t bigwrite(int, void const *, size_t);
=20
@@ -75,8 +76,9 @@
 	char const *base;
 	off_t bytecnt =3D 0;	/* Byte count to split on. */
 	off_t numlines =3D 0;	/* Line count to split on. */
+	off_t chunks =3D 0;	/* Number of chunks to split into. */
=20
-	while ((ch =3D getopt(argc, argv, "0123456789b:l:a:")) !=3D -1)
+	while ((ch =3D getopt(argc, argv, "0123456789b:l:a:n:")) !=3D -1)
 		switch (ch) {
 		case '0': case '1': case '2': case '3': case '4':
 		case '5': case '6': case '7': case '8': case '9':
@@ -119,6 +121,12 @@
 			    *ep !=3D '\0')
 				errx(1, "%s: illegal suffix length.", optarg);
 			break;
+		case 'n':		/* Chunks. */
+			if (!isdigit((unsigned char)optarg[0]) ||
+			    (chunks =3D (size_t)strtoul(optarg, &ep, 10)) =3D=3D 0 ||
+			    *ep !=3D '\0')
+				errx(1, "%s: illegal number of chunks.", optarg);
+			break;
 		default:
 			usage();
 		}
@@ -143,11 +151,16 @@
=20
 	if (numlines =3D=3D 0)
 		numlines =3D DEFLINE;
-	else if (bytecnt)
+	else if (bytecnt || chunks)
+		usage();
+
+	if (bytecnt && chunks)
 		usage();
=20
 	if (bytecnt)
-		split1(bytecnt);
+		split1(bytecnt, 0);
+	else if (chunks)
+		split3(chunks);
 	else
 		split2(numlines);
=20
@@ -159,12 +172,15 @@
  *	Split the input by bytes.
  */
 static void
-split1(off_t bytecnt)
+split1(off_t bytecnt, int maxcnt)
 {
 	off_t bcnt;
 	ssize_t dist, len;
 	char *C;
 	char bfr[MAXBSIZE];
+	int nfiles;
+
+	nfiles =3D 0;
=20
 	for (bcnt =3D 0;;)
 		switch (len =3D read(ifd, bfr, MAXBSIZE)) {
@@ -176,9 +192,12 @@
 			/* NOTREACHED */
 		default:
 			if (!file_open) {
+				if (!maxcnt || (nfiles < maxcnt)) {
 				newfile();
+					nfiles++;
 				file_open =3D 1;
 			}
+			}
 			if (bcnt + len >=3D bytecnt) {
 				/* LINTED: bytecnt - bcnt <=3D len */
 				dist =3D bytecnt - bcnt;
@@ -188,14 +207,20 @@
 				for (C =3D bfr + dist; len >=3D bytecnt;
 				    /* LINTED: bytecnt <=3D len */
 				    len -=3D bytecnt, C +=3D bytecnt) {
+					if (!maxcnt || (nfiles < maxcnt)) {
 					newfile();
+						nfiles++;
+					}
 					/* LINTED: as above */
 					if (bigwrite(ofd,
 					    C, bytecnt) !=3D bytecnt)
 						err(1, "write");
 				}
 				if (len) {
+					if (!maxcnt || (nfiles < maxcnt)) {
 					newfile();
+						nfiles++;
+					}
 					/* LINTED: len >=3D 0 */
 					if (bigwrite(ofd, C, len) !=3D len)
 						err(1, "write");
@@ -258,6 +283,29 @@
 }
=20
 /*
+ * split3 --
+ *	Split the input into specified number of chunks
+ */
+static void
+split3(off_t chunks)
+{
+	struct stat sb;
+
+	if (fstat(ifd, &sb) =3D=3D -1) {
+		err(1, "stat");
+		/* NOTREACHED */
+	}
+
+	if (chunks > sb.st_size) {
+		errx(1, "can't split into more than %d files",
+				(int)sb.st_size);
+		/* NOTREACHED */
+	}
+
+	split1(sb.st_size/chunks, chunks);
+}
+
+/*
  * newfile --
  *	Open a new output file.
  */
@@ -308,7 +356,7 @@
 usage(void)
 {
 	(void)fprintf(stderr,
-"usage: %s [-b byte_count] [-l line_count] [-a suffix_length] "
+"usage: %s [-b byte_count] [-l line_count] [-n chunk_count] [-a suffix_len=
gth] "
 "[file [prefix]]\n", getprogname());
 	exit(1);
 }

--JgQwtEuHJzHdouWu--

--IDYEmSnFhs3mNXr+
Content-Type: application/pgp-signature
Content-Disposition: inline

-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.6 (NetBSD)

iD8DBQFGXPuefFtkr68iakwRAt6ZAKDXxNeprN6DQ7ywSWF4J1x4pVB1OwCeInIL
GU6JYj/Prr3jyC3Lp5DKebk=
=wjuN
-----END PGP SIGNATURE-----

--IDYEmSnFhs3mNXr+--