Subject: Re: make: suppressing inter-word space in variable expansions
To: None <tech-toolchain@netbsd.org>
From: Simon J. Gerraty <sjg@crufty.net>
List: tech-toolchain
Date: 07/14/2003 00:18:07
Ok after some pondering I decided :ts made sense.

:tu	 to upper
:tl	 to lower
:ts[c]	 set word separator to 'c', no 'c' means no separation.

With the following makefile:

LIST=one two
LIST+= three
LIST+= four

all:
	@echo LIST='${LIST}'
	@echo LIST:ts,='${LIST:ts,}'
	@echo LIST:ts/:tu='${LIST:ts/:tu}'
	@echo LIST:tu:ts/='${LIST:tu:ts/}'
	@echo LIST:ts:='${LIST:ts:}'
	@echo LIST:ts='${LIST:ts}'
	@echo LIST:ts:S/two/2/='${LIST:ts:S/two/2/}'
	@echo LIST:S/two/2/:ts='${LIST:S/two/2/:ts}'
	@echo LIST:ts/:S/two/2/='${LIST:ts/:S/two/2/}'
	@echo "LIST:ts\n='${LIST:ts\n}'"
	@echo "LIST:ts\t='${LIST:ts\t}'"
	@echo "LIST:ts\012:tu='${LIST:ts\012:tu}'"
	@echo "LIST:tx='${LIST:tx}'"
	@echo "LIST:ts\a:tu='${LIST:ts\a:tu}'"

We get:

LIST=one two three four
LIST:ts,=one,two,three,four
LIST:ts/:tu=ONE/TWO/THREE/FOUR
LIST:tu:ts/=ONE/TWO/THREE/FOUR
LIST:ts:=one:two:three:four
LIST:ts=onetwothreefour
LIST:ts:S/two/2/=one2threefour
LIST:S/two/2/:ts=one2threefour
LIST:ts/:S/two/2/=one/2/three/four
LIST:ts\n='one
two
three
four'
LIST:ts\t='one  two     three   four'
LIST:ts\012:tu='ONE
TWO
THREE
FOUR'
make: Bad modifier `:tx' for LIST
LIST:tx='}'
make: Bad modifier `:ts\a' for LIST
LIST:ts\a:tu='\a:tu}'

Which is what you'd expect.  Aside from implementing :ts, the patch
below fixes a couple of bugs in Var_Parse - at least for :t[^uls].
newStr wasn't being initialized in some failure modes - eg :tx
Nor was cp (at least for :t).

--sjg

Index: make.1
===================================================================
RCS file: /cvsroot/src/usr.bin/make/make.1,v
retrieving revision 1.80
diff -u -p -r1.80 make.1
--- make.1	2003/06/26 18:21:45	1.80
+++ make.1	2003/07/14 07:02:40
@@ -625,6 +625,13 @@ Replaces each word in the variable with 
 Converts variable to lower-case letters.
 .It Cm tu
 Converts variable to upper-case letters.
+.It Cm ts Ar c
+Words in the variable are normally separated by a space on expansion.
+This modifier sets the separator to the character
+.Ar c .
+If
+.Ar c
+is omitted, then no separator is used.
 .Sm off
 .It Cm S No \&/ Ar old_string Xo
 .No \&/ Ar new_string
Index: var.c
===================================================================
RCS file: /cvsroot/src/usr.bin/make/var.c,v
retrieving revision 1.72
diff -u -p -r1.72 var.c
--- var.c	2003/05/22 18:20:10	1.72
+++ var.c	2003/07/14 07:02:49
@@ -165,6 +165,8 @@ typedef struct Var {
 #define VAR_MATCH_END	0x10	/* Match at end of word */
 #define VAR_NOSUBST	0x20	/* don't expand vars in VarGetPattern */
 
+static Byte varSpace = ' ';	/* word separator in expansions */
+
 /* Var_Set flags */
 #define VAR_NO_EXPORT	0x01	/* do not export */
 
@@ -654,8 +656,8 @@ VarHead(GNode *ctx, char *word, Boolean 
 
     slash = strrchr (word, '/');
     if (slash != (char *)NULL) {
-	if (addSpace) {
-	    Buf_AddByte (buf, (Byte)' ');
+	if (addSpace && varSpace) {
+	    Buf_AddByte (buf, varSpace);
 	}
 	*slash = '\0';
 	Buf_AddBytes (buf, strlen (word), (Byte *)word);
@@ -665,7 +667,7 @@ VarHead(GNode *ctx, char *word, Boolean 
 	/*
 	 * If no directory part, give . (q.v. the POSIX standard)
 	 */
-	if (addSpace) {
+	if (addSpace && varSpace) {
 	    Buf_AddBytes(buf, 2, (Byte *)" .");
 	} else {
 	    Buf_AddByte(buf, (Byte)'.');
@@ -701,8 +703,8 @@ VarTail(GNode *ctx, char *word, Boolean 
 {
     char *slash;
 
-    if (addSpace) {
-	Buf_AddByte (buf, (Byte)' ');
+    if (addSpace && varSpace) {
+	Buf_AddByte (buf, varSpace);
     }
 
     slash = strrchr (word, '/');
@@ -744,8 +746,8 @@ VarSuffix(GNode *ctx, char *word, Boolea
 
     dot = strrchr (word, '.');
     if (dot != (char *)NULL) {
-	if (addSpace) {
-	    Buf_AddByte (buf, (Byte)' ');
+	if (addSpace && varSpace) {
+	    Buf_AddByte (buf, varSpace);
 	}
 	*dot++ = '\0';
 	Buf_AddBytes (buf, strlen (dot), (Byte *)dot);
@@ -782,8 +784,8 @@ VarRoot(GNode *ctx, char *word, Boolean 
 {
     char *dot;
 
-    if (addSpace) {
-	Buf_AddByte (buf, (Byte)' ');
+    if (addSpace && varSpace) {
+	Buf_AddByte (buf, varSpace);
     }
 
     dot = strrchr (word, '.');
@@ -824,8 +826,8 @@ VarMatch(GNode *ctx, char *word, Boolean
 	 ClientData pattern)
 {
     if (Str_Match(word, (char *) pattern)) {
-	if (addSpace) {
-	    Buf_AddByte(buf, (Byte)' ');
+	if (addSpace && varSpace) {
+	    Buf_AddByte(buf, varSpace);
 	}
 	addSpace = TRUE;
 	Buf_AddBytes(buf, strlen(word), (Byte *)word);
@@ -866,8 +868,8 @@ VarSYSVMatch(GNode *ctx, char *word, Boo
     VarPattern 	  *pat = (VarPattern *) patp;
     char *varexp;
 
-    if (addSpace)
-	Buf_AddByte(buf, (Byte)' ');
+    if (addSpace && varSpace)
+	Buf_AddByte(buf, varSpace);
 
     addSpace = TRUE;
 
@@ -911,8 +913,8 @@ VarNoMatch(GNode *ctx, char *word, Boole
 	   ClientData pattern)
 {
     if (!Str_Match(word, (char *) pattern)) {
-	if (addSpace) {
-	    Buf_AddByte(buf, (Byte)' ');
+	if (addSpace && varSpace) {
+	    Buf_AddByte(buf, varSpace);
 	}
 	addSpace = TRUE;
 	Buf_AddBytes(buf, strlen(word), (Byte *)word);
@@ -970,8 +972,8 @@ VarSubstitute(GNode *ctx, char *word, Bo
 			 * if rhs is non-null.
 			 */
 			if (pattern->rightLen != 0) {
-			    if (addSpace) {
-				Buf_AddByte(buf, (Byte)' ');
+			    if (addSpace && varSpace) {
+				Buf_AddByte(buf, varSpace);
 			    }
 			    addSpace = TRUE;
 			    Buf_AddBytes(buf, pattern->rightLen,
@@ -988,8 +990,8 @@ VarSubstitute(GNode *ctx, char *word, Bo
 		     * Matches at start but need to copy in trailing characters
 		     */
 		    if ((pattern->rightLen + wordLen - pattern->leftLen) != 0){
-			if (addSpace) {
-			    Buf_AddByte(buf, (Byte)' ');
+			if (addSpace && varSpace) {
+			    Buf_AddByte(buf, varSpace);
 			}
 			addSpace = TRUE;
 		    }
@@ -1020,8 +1022,8 @@ VarSubstitute(GNode *ctx, char *word, Bo
 		 * by the right-hand-side.
 		 */
 		if (((cp - word) + pattern->rightLen) != 0) {
-		    if (addSpace) {
-			Buf_AddByte(buf, (Byte)' ');
+		    if (addSpace && varSpace) {
+			Buf_AddByte(buf, varSpace);
 		    }
 		    addSpace = TRUE;
 		}
@@ -1055,7 +1057,7 @@ VarSubstitute(GNode *ctx, char *word, Bo
 		cp = Str_FindSubstring(word, pattern->lhs);
 		if (cp != (char *)NULL) {
 		    if (addSpace && (((cp - word) + pattern->rightLen) != 0)){
-			Buf_AddByte(buf, (Byte)' ');
+			Buf_AddByte(buf, varSpace);
 			addSpace = FALSE;
 		    }
 		    Buf_AddBytes(buf, cp-word, (Byte *)word);
@@ -1074,8 +1076,8 @@ VarSubstitute(GNode *ctx, char *word, Bo
 		}
 	    }
 	    if (wordLen != 0) {
-		if (addSpace) {
-		    Buf_AddByte(buf, (Byte)' ');
+		if (addSpace && varSpace) {
+		    Buf_AddByte(buf, varSpace);
 		}
 		Buf_AddBytes(buf, wordLen, (Byte *)word);
 	    }
@@ -1089,8 +1091,8 @@ VarSubstitute(GNode *ctx, char *word, Bo
 	return (addSpace);
     }
  nosub:
-    if (addSpace) {
-	Buf_AddByte(buf, (Byte)' ');
+    if (addSpace && varSpace) {
+	Buf_AddByte(buf, varSpace);
     }
     Buf_AddBytes(buf, wordLen, (Byte *)word);
     return(TRUE);
@@ -1696,6 +1698,8 @@ Var_Parse(char *str, GNode *ctxt, Boolea
     dynamic = FALSE;
     start = str;
 
+    varSpace = ' ';			/* reset this */
+    
     if (str[1] != '(' && str[1] != '{') {
 	/*
 	 * If it's not bounded by braces of some sort, life is much simpler.
@@ -1961,6 +1965,10 @@ Var_Parse(char *str, GNode *ctxt, Boolea
      *		  :u		("uniq") Remove adjacent duplicate words.
      *		  :tu		Converts the variable contents to uppercase.
      *		  :tl		Converts the variable contents to lowercase.
+     *		  :ts[c]	Sets varSpace - the char used to
+     *				separate words to 'c'. If 'c' is
+     *				omitted then no separation is used.
+     *
      *		  :?<true-value>:<false-value>
      *				If the variable evaluates to true, return
      *				true value, else return the second value.
@@ -2017,6 +2025,7 @@ Var_Parse(char *str, GNode *ctxt, Boolea
 	    if (DEBUG(VAR)) {
 		printf("Applying :%c to \"%s\"\n", *tstr, str);
 	    }
+	    newStr = str;
 	    switch (*tstr) {
 	        case ':':
 		    
@@ -2237,14 +2246,66 @@ Var_Parse(char *str, GNode *ctxt, Boolea
 		}
 	        case 't':
 		{
+		    cp = tstr + 1;	/* make sure it is set */
 		    if (tstr[1] != endc && tstr[1] != ':') {
-		        if (tstr[2] == endc || tstr[2] == ':') {
+			if (tstr[1] == 's') {
+			    /*
+			     * Use the char (if any) at tstr[2]
+			     * as the word separator.
+			     */
+			    VarPattern pattern;
+
+			    if (tstr[3] == endc || tstr[3] == ':') {
+				varSpace = tstr[2];
+				cp = tstr + 3;
+			    } else if (tstr[2] == endc || tstr[2] == ':') {
+				varSpace = 0; /* no separator */
+				cp = tstr + 2;
+			    } else if (tstr[2] == '\\') {
+				switch (tstr[3]) {
+				case 'n':
+				    varSpace = '\n';
+				    cp = tstr + 4;
+				    break;
+				case 't':
+				    varSpace = '\t';
+				    cp = tstr + 4;
+				    break;
+				default:
+				    if (isdigit(tstr[3])) {
+					varSpace = strtoul(&tstr[3], &cp, 0);
+				    } else {
+					goto bad_modifier;
+				    }
+				    break;
+				}
+			    } else				
+				break;	/* not us */
+
+			    termc = *cp;
+
+			    /*
+			     * We cannot be certain that VarModify
+			     * will be used - even if there is a
+			     * subsequent modifier, so do a no-op
+			     * VarSubstitute now to for str to be
+			     * re-expanded without the spaces.
+			     */
+			    pattern.flags = VAR_SUB_ONE;
+			    pattern.lhs = pattern.rhs = "\032";
+			    pattern.leftLen = pattern.rightLen = 1;
+
+			    newStr = VarModify(ctxt, str, VarSubstitute,
+					       (ClientData)&pattern);
+			} else if (tstr[2] == endc || tstr[2] == ':') {
                             if (tstr[1] == 'u' || tstr[1] == 'l') {
                                 newStr = VarChangeCase (str, (tstr[1] == 'u'));
                                 cp = tstr + 2;
                                 termc = *cp;
-                            }
-		        }
+                            } else {
+				goto bad_modifier;
+			    }
+			}
 		    }
 		    break;
 		}
@@ -2456,6 +2517,7 @@ Var_Parse(char *str, GNode *ctxt, Boolea
 			*lengthPtr = cp - start + 1;
 			VarREError(error, &pattern.re, "RE substitution error");
 			free(pattern.replace);
+			varSpace = ' ';	/* reset this */
 			return (var_Error);
 		    }
 
@@ -2697,9 +2759,15 @@ Var_Parse(char *str, GNode *ctxt, Boolea
 	free((Address)v->name);
 	free((Address)v);
     }
+    varSpace = ' ';			/* reset this */
     return (str);
 
+ bad_modifier:
+    Error("Bad modifier `:%.*s' for %s", (int)strcspn(tstr, ":)}"), tstr,
+	  v->name);
+
 cleanup:
+    varSpace = ' ';			/* reset this */
     *lengthPtr = cp - start + 1;
     if (*freePtr)
 	free(str);