Subject: make: adding .KILL ?
To: David Laight <david@l8s.co.uk>
From: Simon J. Gerraty <sjg@crufty.net>
List: tech-toolchain
Date: 10/31/2006 14:41:57
Some time ago, David wrote:
>Given the changes I've made recently in order to get parallel makes to
>stop in a finite time when an error happens on one branch, I suspect
>that a merge in either direction will be painful.

Those changes work nicely btw, but there is still the issue that a job
with _long_ running sub-jobs, won't notice the failure in the other branch
until its sub-jobs finish.  I see this in part of my build at work where
a big chunk of the tree is built using gmake.  All the bmake jobs stop
immediately one of them fails, but the gmake bit chugs on and on...

So, I thought of adding a .KILL special target - to mean, if this job 
fails - kill everyone left standing.  Which does the job nicely ;-)

The patch below is against bmake as of July, since with the recent churn
in make, I'm no longer able to build it by itself on my NetBSD boxes,
which also means I can't plan to update bmake.

It seems the MAKE_NATIVE define has grown to mean something like __NetBSD__
native build, which is not what it used to mean, and I'm currently at a
loss as to how to fix it.  The emalloc stuff is the main issue I think.

Anyway, what do you think of the following idea (could likely use 
some tweaking):

--sjg

Index: job.c
===================================================================
RCS file: /cvs/src/bmake/job.c,v
retrieving revision 1.17
diff -u -p -r1.17 job.c
--- job.c	17 Apr 2006 16:58:40 -0000	1.17
+++ job.c	31 Oct 2006 22:05:05 -0000
@@ -914,7 +914,9 @@ JobClose(Job *job)
 static void
 JobFinish (Job *job, WAIT_T status)
 {
-    Boolean 	 done, return_job_token;
+    Boolean 	 done, return_job_token, killAll;
+
+    killAll = ((job->node->type & OP_KILL) != 0);
 
     if ((WIFEXITED(status) &&
 	  (((WEXITSTATUS(status) != 0) && !(job->flags & JOB_IGNERR)))) ||
@@ -1103,11 +1105,18 @@ JobFinish (Job *job, WAIT_T status)
     if (return_job_token)
 	Job_TokenReturn();
 
-    if (aborting == ABORT_ERROR && jobTokensRunning == 0) {
-	/*
-	 * If we are aborting and the job table is now empty, we finish.
-	 */
-	Finish(errors);
+    if (aborting == ABORT_ERROR) {
+	if (killAll) {
+	    Job_AbortAll(SIGTERM);
+	    jobTokensRunning = 0;
+	}
+	
+	if (jobTokensRunning == 0) {
+	    /*
+	     * If we are aborting and the job table is now empty, we finish.
+	     */
+	    Finish(errors);
+	}
     }
 }
 
@@ -2685,7 +2694,7 @@ Job_Wait(void)
  *-----------------------------------------------------------------------
  */
 void
-Job_AbortAll(void)
+Job_AbortAll(int sig)
 {
     LstNode	ln;	/* element in job table */
     Job		*job;	/* the job descriptor in that element */
@@ -2694,6 +2703,10 @@ Job_AbortAll(void)
 
     aborting = ABORT_ERROR;
 
+    if (DEBUG(JOB)) {
+	(void)fprintf(stdout, "Killing everyone!\n");
+    }
+
     if (jobTokensRunning) {
 
 	JobSigLock(&mask);
@@ -2701,16 +2714,27 @@ Job_AbortAll(void)
 	while ((ln = Lst_Next(jobs)) != NILLNODE) {
 	    job = (Job *)Lst_Datum(ln);
 
+	    if (DEBUG(JOB)) {
+		(void)fprintf(stdout, "\tkill %d\n", job->pid);
+	    }
 	    /*
 	     * kill the child process with increasingly drastic signals to make
 	     * darn sure it's dead.
 	     */
 	    KILL(job->pid, SIGINT);
+	    if (sig != SIGKILL) {
+		sleep(1);
+		KILL(job->pid, sig);
+		sleep(1);
+	    }
 	    KILL(job->pid, SIGKILL);
 	}
 	Lst_Close(jobs);
 	JobSigUnlock(&mask);
     }
+    if (DEBUG(JOB)) {
+	(void)fprintf(stdout, "Reaping the dead...\n");
+    }
 
     /*
      * Catch as many children as want to report in at first, then give up
Index: job.h
===================================================================
RCS file: /cvs/src/bmake/job.h,v
retrieving revision 1.1.1.10
diff -u -p -r1.1.1.10 job.h
--- job.h	17 Apr 2006 16:48:38 -0000	1.1.1.10
+++ job.h	31 Oct 2006 22:05:05 -0000
@@ -280,7 +280,7 @@ ReturnStatus Job_ParseShell(char *);
 int Job_Finish(void);
 void Job_End(void);
 void Job_Wait(void);
-void Job_AbortAll(void);
+void Job_AbortAll(int);
 void JobFlagForMigration(int);
 void Job_TokenReturn(void);
 Boolean Job_TokenWithdraw(void);
Index: main.c
===================================================================
RCS file: /cvs/src/bmake/main.c,v
retrieving revision 1.25
diff -u -p -r1.25 main.c
--- main.c	11 May 2006 19:11:40 -0000	1.25
+++ main.c	31 Oct 2006 22:05:05 -0000
@@ -1622,7 +1622,7 @@ void
 DieHorribly(void)
 {
 	if (jobsRunning)
-		Job_AbortAll();
+		Job_AbortAll(SIGKILL);
 	if (DEBUG(GRAPH2))
 		Targ_PrintGraph(2);
 	Trace_Log(MAKEERROR, 0);
Index: make.h
===================================================================
RCS file: /cvs/src/bmake/make.h,v
retrieving revision 1.15
diff -u -p -r1.15 make.h
--- make.h	20 Mar 2006 17:59:55 -0000	1.15
+++ make.h	31 Oct 2006 22:05:05 -0000
@@ -280,6 +280,8 @@ typedef struct GNode {
 				     * target' processing in parse.c */
 #define OP_PHONY	0x00010000  /* Not a file target; run always */
 #define OP_NOPATH	0x00020000  /* Don't search for file in the path */
+#define OP_KILL		0x00040000  /* Kill everyone on error */
+
 /* Attributes applied by PMake */
 #define OP_TRANSFORM	0x80000000  /* The node is a transformation rule */
 #define OP_MEMBER 	0x40000000  /* Target is a member of an archive */
Index: parse.c
===================================================================
RCS file: /cvs/src/bmake/parse.c,v
retrieving revision 1.19
diff -u -p -r1.19 parse.c
--- parse.c	17 Apr 2006 16:58:40 -0000	1.19
+++ parse.c	31 Oct 2006 22:05:05 -0000
@@ -249,6 +249,7 @@ static struct {
 { ".INTERRUPT",	  Interrupt,	0 },
 { ".INVISIBLE",	  Attribute,   	OP_INVISIBLE },
 { ".JOIN",  	  Attribute,   	OP_JOIN },
+{ ".KILL",	  Attribute,	OP_KILL },
 { ".LIBS",  	  Libs,	    	0 },
 { ".MADE",	  Attribute,	OP_MADE },
 { ".MAIN",	  Main,		0 },