Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/fs/cd9660 when Joliet extension is in use, encode the Jo...



details:   https://anonhg.NetBSD.org/src/rev/9a92504b3bdb
branches:  trunk
changeset: 571392:9a92504b3bdb
user:      jdolecek <jdolecek%NetBSD.org@localhost>
date:      Sun Nov 21 21:49:08 2004 +0000

description:
when Joliet extension is in use, encode the Joliet Unicode file names
into UTF-8, rather than filtering them to ISO-8859-1 subset

provide vfs.cd9660.utf8_joliet sysctl to switch to the former
iso-8859-1-only handling, default is to UTF-8 encode

diffstat:

 sys/fs/cd9660/cd9660_extern.h |   16 ++++-
 sys/fs/cd9660/cd9660_rrip.c   |    8 +-
 sys/fs/cd9660/cd9660_util.c   |  141 +++++++++++++++++++++++++++++------------
 sys/fs/cd9660/cd9660_vfsops.c |   17 ++--
 4 files changed, 128 insertions(+), 54 deletions(-)

diffs (truncated from 342 to 300 lines):

diff -r b6194ae809eb -r 9a92504b3bdb sys/fs/cd9660/cd9660_extern.h
--- a/sys/fs/cd9660/cd9660_extern.h     Sun Nov 21 21:07:15 2004 +0000
+++ b/sys/fs/cd9660/cd9660_extern.h     Sun Nov 21 21:49:08 2004 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: cd9660_extern.h,v 1.10 2004/05/20 06:34:26 atatat Exp $        */
+/*     $NetBSD: cd9660_extern.h,v 1.11 2004/11/21 21:49:08 jdolecek Exp $      */
 
 /*-
  * Copyright (c) 1994
@@ -43,6 +43,17 @@
 #include <sys/mallocvar.h>
 MALLOC_DECLARE(M_ISOFSMNT);
 
+/*
+ * Sysctl values for the cd9660 filesystem.
+ */
+#define CD9660_UTF8_JOLIET     1       /* UTF-8 encode Joliet file names */
+
+#define CD9660_NAMES { \
+        { 0, 0 }, \
+        { "utf8_joliet", CTLTYPE_INT }, \
+}
+
+
 /* CD-ROM Format type */
 enum ISO_FTYPE  { ISO_FTYPE_DEFAULT, ISO_FTYPE_9660, ISO_FTYPE_RRIP, ISO_FTYPE_ECMA };
 
@@ -82,6 +93,7 @@
 #define blksize(imp, ip, lbn)  ((imp)->logical_block_size)
 
 extern struct pool cd9660_node_pool;
+extern int cd9660_utf8_joliet;
 
 int cd9660_mount __P((struct mount *,
            const char *, void *, struct nameidata *, struct proc *));
@@ -110,7 +122,7 @@
 extern int (**cd9660_specop_p) __P((void *));
 extern int (**cd9660_fifoop_p) __P((void *));
 
-int isochar __P((const u_char *, const u_char *, int, u_char *));
+int isochar __P((const u_char *, const u_char *, int, u_int16_t *));
 int isofncmp __P((const u_char *, int, const u_char *, int, int));
 void isofntrans __P((u_char *, int, u_char *, u_short *, int, int, int, int));
 ino_t isodirino __P((struct iso_directory_record *, struct iso_mnt *));
diff -r b6194ae809eb -r 9a92504b3bdb sys/fs/cd9660/cd9660_rrip.c
--- a/sys/fs/cd9660/cd9660_rrip.c       Sun Nov 21 21:07:15 2004 +0000
+++ b/sys/fs/cd9660/cd9660_rrip.c       Sun Nov 21 21:49:08 2004 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: cd9660_rrip.c,v 1.4 2004/11/20 19:56:44 jdolecek Exp $ */
+/*     $NetBSD: cd9660_rrip.c,v 1.5 2004/11/21 21:49:08 jdolecek Exp $ */
 
 /*-
  * Copyright (c) 1993, 1994
@@ -37,7 +37,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: cd9660_rrip.c,v 1.4 2004/11/20 19:56:44 jdolecek Exp $");
+__KERNEL_RCSID(0, "$NetBSD: cd9660_rrip.c,v 1.5 2004/11/21 21:49:08 jdolecek Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -518,7 +518,7 @@
        ISO_SUSP_HEADER *pend;
        struct buf *bp = NULL;
        char *pwhead;
-       u_char c;
+       u_int16_t c;
        int result;
 
        /*
@@ -649,7 +649,7 @@
 {
        ISO_RRIP_ANALYZE analyze;
        const RRIP_TABLE *tab;
-       u_char c;
+       u_int16_t c;
 
        analyze.outbuf = outbuf;
        analyze.outlen = outlen;
diff -r b6194ae809eb -r 9a92504b3bdb sys/fs/cd9660/cd9660_util.c
--- a/sys/fs/cd9660/cd9660_util.c       Sun Nov 21 21:07:15 2004 +0000
+++ b/sys/fs/cd9660/cd9660_util.c       Sun Nov 21 21:49:08 2004 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: cd9660_util.c,v 1.2 2003/08/07 16:31:35 agc Exp $      */
+/*     $NetBSD: cd9660_util.c,v 1.3 2004/11/21 21:49:08 jdolecek Exp $ */
 
 /*-
  * Copyright (c) 1994
@@ -37,7 +37,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: cd9660_util.c,v 1.2 2003/08/07 16:31:35 agc Exp $");
+__KERNEL_RCSID(0, "$NetBSD: cd9660_util.c,v 1.3 2004/11/21 21:49:08 jdolecek Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -56,6 +56,13 @@
 #include <fs/cd9660/iso.h>
 #include <fs/cd9660/cd9660_extern.h>
 
+#include <fs/unicode.h>
+
+static u_int16_t wget(const u_char **, int);
+static int wput(u_char *, size_t, u_int16_t, int);
+
+int cd9660_utf8_joliet = 1;
+
 /*
  * Get one character out of an iso filename
  * Return number of bytes consumed
@@ -65,22 +72,24 @@
        const u_char *isofn;
        const u_char *isoend;
        int joliet_level;
-       u_char *c;
+       u_int16_t *c;
 {
-       *c = *isofn++;
-       if (joliet_level == 0 || isofn == isoend)
+       *c = isofn[0];
+       if (joliet_level == 0 || isofn + 1 == isoend) {
                /* (00) and (01) are one byte in Joliet, too */
                return 1;
+       }
 
-       /* No Unicode support yet :-( */
-       switch (*c) {
-       default:
-               *c = '?';
-               break;
-       case '\0':
-               *c = *isofn;
-               break;
+       if (cd9660_utf8_joliet) {
+               *c = (*c << 8) + isofn[1];
+       } else {
+               /* characters outside ISO-8859-1 subset replaced with '?' */
+               if (*c != 0)
+                       *c = '?';
+               else
+                       *c = isofn[1];
        }
+
        return 2;
 }
 
@@ -94,54 +103,57 @@
        int fnlen, isolen, joliet_level;
 {
        int i, j;
-       char c;
+       u_int16_t fc, ic;
        const u_char *isoend = isofn + isolen;
 
-       while (--fnlen >= 0) {
+       /* fn should always contain standard C string, and wget() needs it */
+       KASSERT(fn[fnlen] == 0);
+
+       while ((fc = wget(&fn, joliet_level)) && fc) {
                if (isofn == isoend)
-                       return *fn;
-               isofn += isochar(isofn, isoend, joliet_level, &c);
-               if (c == ';') {
-                       switch (*fn++) {
+                       return fc;
+               isofn += isochar(isofn, isoend, joliet_level, &ic);
+               if (ic == ';') {
+                       switch (fc) {
                        default:
-                               return *--fn;
+                               return fc;
                        case 0:
                                return 0;
                        case ';':
                                break;
                        }
+                       fn++;
                        for (i = 0; --fnlen >= 0; i = i * 10 + *fn++ - '0') {
                                if (*fn < '0' || *fn > '9') {
                                        return -1;
                                }
                        }
-                       for (j = 0; isofn != isoend; j = j * 10 + c - '0')
+                       for (j = 0; isofn != isoend; j = j * 10 + ic - '0')
                                isofn += isochar(isofn, isoend,
-                                                joliet_level, &c);
+                                                joliet_level, &ic);
                        return i - j;
                }
-               if (((u_char) c) != *fn) {
-                       if (c >= 'A' && c <= 'Z') {
-                               if (c + ('a' - 'A') != *fn) {
-                                       if (*fn >= 'a' && *fn <= 'z')
-                                               return *fn - ('a' - 'A') - c;
-                                       else
-                                               return *fn - c;
+               if (ic != fc) {
+                       if (ic >= 'A' && ic <= 'Z') {
+                               if (ic + ('a' - 'A') != fc) {
+                                       if (fc >= 'a' && fc <= 'z')
+                                               fc -= 'a' - 'A';
+
+                                       return (int) fc - (int) ic;
                                }
                        } else
-                               return *fn - c;
+                               return (int) fc - (int) ic;
                }
-               fn++;
        }
        if (isofn != isoend) {
-               isofn += isochar(isofn, isoend, joliet_level, &c);
-               switch (c) {
+               isofn += isochar(isofn, isoend, joliet_level, &ic);
+               switch (ic) {
                default:
                        return -1;
                case '.':
                        if (isofn != isoend) {
-                               isochar(isofn, isoend, joliet_level, &c);
-                               if (c == ';')
+                               isochar(isofn, isoend, joliet_level, &ic);
+                               if (ic == ';')
                                        return 0;
                        }
                        return -1;
@@ -167,24 +179,71 @@
 {
        int fnidx = 0;
        u_char *infnend = infn + infnlen;
-       
+       u_int16_t c;
+       int sz;
+
        if (assoc) {
                *outfn++ = ASSOCCHAR;
                fnidx++;
        }
-       for (; infn != infnend; fnidx++) {
-               char c;
 
+       for(; infn != infnend; fnidx += sz) {
                infn += isochar(infn, infnend, joliet_level, &c);
 
                if (casetrans && joliet_level == 0 && c >= 'A' && c <= 'Z')
-                       *outfn++ = c + ('a' - 'A');
+                       c = c + ('a' - 'A');
                else if (!original && c == ';') {
                        if (fnidx > 0 && outfn[-1] == '.')
                                fnidx--;
                        break;
-               } else
-                       *outfn++ = c;
+               }
+
+               sz = wput(outfn, MAXNAMLEN - fnidx, c, joliet_level);
+               if (sz == 0) {
+                       /* not enough space to write the character */
+                       if (fnidx < MAXNAMLEN) {
+                               *outfn = '?';
+                               fnidx++;
+                       }
+                       break;
+               }
+               outfn += sz;
        }
        *outfnlen = fnidx;
 }
+
+static u_int16_t
+wget(const u_char **str, int joliet_level)
+{
+       if (joliet_level > 0 && cd9660_utf8_joliet) {
+               /* decode UTF-8 sequence */
+               return wget_utf8((const char **) str);
+       } else {
+               /*
+                * Raw 8-bit characters without any conversion. For Joliet,
+                * this effectively assumes provided file name is using
+                * ISO-8859-1 subset.
+                */
+               u_int16_t c = *str[0];
+               (*str)++;
+
+               return c;
+       }
+}
+
+static int
+wput(u_char *s, size_t n, u_int16_t c, int joliet_level)
+{
+       if (joliet_level > 0 && cd9660_utf8_joliet) {
+               /* Store Joliet file name encoded into UTF-8 */
+               return wput_utf8((char *)s, n, c);
+       } else {
+               /*
+                * Store raw 8-bit characters without any conversion.
+                * For Joliet case, this filters the Unicode characters



Home | Main Index | Thread Index | Old Index