tech-userlevel archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Re: Alternative to hash-bang



Alan Barrett <apb%cequrux.com@localhost> wrote:
 |On Fri, 18 Jul 2014, D'Arcy J.M. Cain wrote:
 |>Greg Troxel <gdt%ir.bbn.com@localhost> wrote:
 |>> Once --!! happens, then there will be requests to add all sorts of
 |>> things, and this seems like a mess.  Surely it's easy enough to teach
 |>
 |>As I said, '#', ';' and '--' seem to cover most situations.  Maybe '%'
 |>to a much smaller extent.  Can you think of many other possibilities?
 |
 |Are you proposing to make the kernel recognise several new
 |#!-equivalents?
 |
 |It would be easy, and flexible, and not especially ugly, to make 
 |the kernel scan for "#!" at some non-zero offset in the file.  Say 
 |at all offsets from 0 to 4, inclusive.  Then you could write up 

How about supporting the more-and-more common Unicode
Byte-Order-Mark for UTF-8 encoded shell scripts?  Even though
i personally don't like it it does have it's merits and it will
become more and more common; which is why i think supporting them
would be an enhancement, the sooner the better; anyway better than:

  ?0[steffen@nhead tmp]$ cat t.sh
  #!/bin/sh
  echo this is a shell script with BOM
  ?0[steffen@nhead tmp]$ s-hex t.sh
  00000000  ef bb bf 23 21 2f 62 69  6e 2f 73 68 0a 65 63 68  |#!/bin/sh.ech|
  00000010  6f 20 74 68 69 73 20 69  73 20 61 20 73 68 65 6c  |o this is a shel|
  00000020  6c 20 73 63 72 69 70 74  20 77 69 74 68 20 42 4f  |l script with BO|
  00000030  4d 0a                                             |M.|
  00000032
  ?0[steffen@nhead tmp]$ ./t.sh
  ./t.sh: #!/bin/sh: not found
  this is a shell script with BOM
  ?0[steffen@nhead tmp]$

I think the attached diffs should do it, but i don't know (repo
from end of june, not even compile tested; but i think it is good)

--steffen
--- sys-kern-exec_script.c.orig 2014-07-19 15:22:40.000000000 +0200
+++ sys-kern-exec_script.c      2014-07-19 15:20:56.000000000 +0200
@@ -56,19 +56,34 @@ __KERNEL_RCSID(0, "$NetBSD: exec_script.
 
 MODULE(MODULE_CLASS_EXEC, exec_script, NULL);
 
-static struct execsw exec_script_execsw = {
-       .es_hdrsz = SCRIPT_HDR_SIZE,
-       .es_makecmds = exec_script_makecmds,
-       .u = {
-               .elf_probe_func = NULL,
-       },
-       .es_emul = NULL,
-       .es_prio = EXECSW_PRIO_ANY,
-       .es_arglen = 0,
-       .es_copyargs = NULL,
-       .es_setregs = NULL,
-       .es_coredump = NULL,
-       .es_setup_stack = exec_setup_stack,
+static struct execsw exec_script_execsw[] = {
+       {
+               .es_hdrsz = SCRIPT_HDR_SIZE,
+               .es_makecmds = exec_script_makecmds,
+               .u = {
+                       .elf_probe_func = NULL,
+               },
+               .es_emul = NULL,
+               .es_prio = EXECSW_PRIO_ANY,
+               .es_arglen = 0,
+               .es_copyargs = NULL,
+               .es_setregs = NULL,
+               .es_coredump = NULL,
+               .es_setup_stack = exec_setup_stack,
+       }, {
+               .es_hdrsz = BOMSCRIPT_HDR_SIZE,
+               .es_makecmds = exec_script_makecmds,
+               .u = {
+                       .elf_probe_func = NULL,
+               },
+               .es_emul = NULL,
+               .es_prio = EXECSW_PRIO_ANY,
+               .es_arglen = 0,
+               .es_copyargs = NULL,
+               .es_setregs = NULL,
+               .es_coredump = NULL,
+               .es_setup_stack = exec_setup_stack,
+       }
 };
 
 static int
@@ -77,10 +92,12 @@ exec_script_modcmd(modcmd_t cmd, void *a
 
        switch (cmd) {
        case MODULE_CMD_INIT:
-               return exec_add(&exec_script_execsw, 1);
+               return exec_add(&exec_script_execsw,
+                       __arraycount(exec_script_execsw));
 
        case MODULE_CMD_FINI:
-               return exec_remove(&exec_script_execsw, 1);
+               return exec_remove(&exec_script_execsw,
+                       __arraycount(exec_script_execsw));
 
        case MODULE_CMD_AUTOUNLOAD:
                /*
@@ -114,11 +131,9 @@ int
 exec_script_makecmds(struct lwp *l, struct exec_package *epp)
 {
        int error, hdrlinelen, shellnamelen, shellarglen;
-       char *hdrstr = epp->ep_hdr;
-       char *cp, *shellname, *shellarg;
+       char *hdrstr = epp->ep_hdr, *cp_base, *cp, *shellname, *shellarg;
        size_t shellargp_len;
-       struct exec_fakearg *shellargp;
-       struct exec_fakearg *tmpsap;
+       struct exec_fakearg *shellargp, *tmpsap;
        struct pathbuf *shell_pathbuf;
        struct vnode *scriptvp;
 #ifdef SETUIDSCRIPTS
@@ -132,9 +147,18 @@ exec_script_makecmds(struct lwp *l, stru
         * if the magic isn't that of a shell script, or we've already
         * done shell script processing for this exec, punt on it.
         */
-       if ((epp->ep_flags & EXEC_INDIR) != 0 ||
-           epp->ep_hdrvalid < EXEC_SCRIPT_MAGICLEN ||
-           strncmp(hdrstr, EXEC_SCRIPT_MAGIC, EXEC_SCRIPT_MAGICLEN))
+       if ((epp->ep_flags & EXEC_INDIR) != 0)
+               return ENOEXEC;
+
+       if (epp->ep_hdrvalid >= EXEC_BOMSCRIPT_MAGICLEN &&
+           !strncmp(hdrstr, EXEC_BOMSCRIPT_MAGIC, EXEC_BOMSCRIPT_MAGICLEN)) {
+               cp_base = hdrstr + EXEC_BOMSCRIPT_MAGICLEN;
+               hdrlinelen = min(epp->ep_hdrvalid, BOMSCRIPT_HDR_SIZE);
+       } else if (epp->ep_hdrvalid >= EXEC_SCRIPT_MAGICLEN &&
+           !strncmp(hdrstr, EXEC_SCRIPT_MAGIC, EXEC_SCRIPT_MAGICLEN)) {
+               cp_base = hdrstr + EXEC_SCRIPT_MAGICLEN;
+               hdrlinelen = min(epp->ep_hdrvalid, SCRIPT_HDR_SIZE);
+       } else
                return ENOEXEC;
 
        /*
@@ -144,9 +168,7 @@ exec_script_makecmds(struct lwp *l, stru
         * (The latter requirement means that we have to check
         * for both spaces and tabs later on.)
         */
-       hdrlinelen = min(epp->ep_hdrvalid, SCRIPT_HDR_SIZE);
-       for (cp = hdrstr + EXEC_SCRIPT_MAGICLEN; cp < hdrstr + hdrlinelen;
-           cp++) {
+       for (cp = cp_base; cp < hdrstr + hdrlinelen; ++cp) {
                if (*cp == '\n') {
                        *cp = '\0';
                        break;
@@ -160,8 +182,7 @@ exec_script_makecmds(struct lwp *l, stru
        shellarglen = 0;
 
        /* strip spaces before the shell name */
-       for (cp = hdrstr + EXEC_SCRIPT_MAGICLEN; *cp == ' ' || *cp == '\t';
-           cp++)
+       for (cp = cp_base; *cp == ' ' || *cp == '\t'; ++cp)
                ;
 
        /* collect the shell name; remember it's length for later */
--- sys-sys-exec_script.h.orig  2014-07-19 15:22:19.000000000 +0200
+++ sys-sys-exec_script.h       2014-07-19 15:14:57.000000000 +0200
@@ -36,8 +36,13 @@
 #define        EXEC_SCRIPT_MAGIC       "#!"
 #define        EXEC_SCRIPT_MAGICLEN    2
 
+/* The same, but UTF-8 file encoding with Byte-Order-Mark */
+#define EXEC_BOMSCRIPT_MAGIC   "\0357\0273\0277#!"
+#define        EXEC_BOMSCRIPT_MAGICLEN 5
+
 /* Extra 2 are for possible space between #! and shell name, and newline.  */
 #define SCRIPT_HDR_SIZE                (EXEC_SCRIPT_MAGICLEN + MAXINTERP + 2)
+#define BOMSCRIPT_HDR_SIZE     (EXEC_BOMSCRIPT_MAGICLEN + MAXINTERP + 2)
 
 #ifdef _KERNEL
 


Home | Main Index | Thread Index | Old Index