Subject: kern/16653: msdosfs mistakenly assumes CP437 as on-disk file name character set.
To: None <gnats-bugs@gnats.netbsd.org>
From: None <svs@ropnet.ru>
List: netbsd-bugs
Date: 05/03/2002 19:08:12
>Number:         16653
>Category:       kern
>Synopsis:       msdosfs mistakenly assumes CP437 as on-disk file name character set.
>Confidential:   no
>Severity:       non-critical
>Priority:       medium
>Responsible:    kern-bug-people
>State:          open
>Class:          sw-bug
>Submitter-Id:   net
>Arrival-Date:   Fri May 03 19:09:00 PDT 2002
>Closed-Date:
>Last-Modified:
>Originator:     Sergey Svishchev
>Release:        1.5ZC
>Organization:
>Environment:
>Description:
Built-in conversion tables, when applied to file names in code page other chan CP437 (e.g. CP866), make these files inaccessible.
>How-To-Repeat:
Mount a filesystem created under localized (Russian) version of Windows.  Try to access files that contain Cyrillic characters.   Lose.

>Fix:
Not a real fix (should probably import FreeBSD's msdosfs code), but a workaround:

Index: sys/msdosfs/msdosfs_conv.c
===================================================================
RCS file: /cvsroot/syssrc/sys/msdosfs/msdosfs_conv.c,v
retrieving revision 1.32
diff -u -r1.32 msdosfs_conv.c
--- msdosfs_conv.c	2002/01/08 20:44:13	1.32
+++ msdosfs_conv.c	2002/04/29 22:21:38
@@ -66,6 +66,16 @@
 #include <msdosfs/direntry.h>
 #include <msdosfs/denode.h>
 
+#ifndef MSDOSFS_NOCONV
+#define U2L(x) u2l[x]
+#define UNIX2DOS(x) unix2dos[x]
+#define DOS2UNIX(x) dos2unix[x]
+#else
+#define U2L(x) (x)
+#define UNIX2DOS(x) (x)
+#define DOS2UNIX(x) (x)
+#endif
+
 /*
  * Days in each month in a regular year.
  */
@@ -232,6 +242,7 @@
 	tsp->tv_nsec = (dh % 100) * 10000000;
 }
 
+#ifndef MSDOSFS_NOCONV
 static const u_char
 unix2dos[256] = {
 	0,    0,    0,    0,    0,    0,    0,    0,	/* 00-07 */
@@ -339,6 +350,7 @@
 	0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* f0-f7 */
 	0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, /* f8-ff */
 };
+#endif
 
 /*
  * DOS filenames are made of 2 parts, the name part and the extension part.
@@ -369,10 +381,10 @@
 	 * directory slot. Another dos quirk.
 	 */
 	if (*dn == SLOT_E5)
-		c = dos2unix[0xe5];
+		c = DOS2UNIX(0xe5);
 	else
-		c = dos2unix[*dn];
-	*un++ = lower ? u2l[c] : c;
+		c = DOS2UNIX(*dn);
+	*un++ = lower ? U2L(c) : c;
 
 	/*
 	 * Copy the rest into the unix filename string, ignoring
@@ -383,8 +395,8 @@
 		;
 
 	for (i = 1; i <= j; i++) {
-		c = dos2unix[dn[i]];
-		*un++ = lower ? u2l[c] : c;
+		c = DOS2UNIX(dn[i]);
+		*un++ = lower ? U2L(c) : c;
 		thislong++;
 	}
 	dn += 8;
@@ -397,8 +409,8 @@
 		*un++ = '.';
 		thislong++;
 		for (i = 0; i < 3 && *dn != ' '; i++) {
-			c = dos2unix[*dn++];
-			*un++ = lower ? u2l[c] : c;
+			c = DOS2UNIX(*dn++);
+			*un++ = lower ? U2L(c) : c;
 			thislong++;
 		}
 	}
@@ -493,7 +505,7 @@
 		else
 			l = unlen - (dp - un);
 		for (i = 0, j = 8; i < l && j < 11; i++, j++) {
-			if (dp[i] != (dn[j] = unix2dos[dp[i]])
+			if (dp[i] != (dn[j] = UNIX2DOS(dp[i]))
 			    && conv != 3)
 				conv = 2;
 			if (!dn[j]) {
@@ -518,7 +530,7 @@
 		if ((*un == ' ') && shortlen)
 			dn[j] = ' ';
 		else
-			dn[j] = unix2dos[*un];
+			dn[j] = UNIX2DOS(*un);
 		if ((*un != dn[j])
 		    && conv != 3)
 			conv = 2;
@@ -693,7 +705,7 @@
 				return chksum;
 			return -1;
 		}
-		if (u2l[*cp++] != u2l[*un++] || *cp++)
+		if (U2L(*cp++) != U2L(*un++) || *cp++)
 			return -1;
 	}
 	for (cp = wep->wePart2, i = sizeof(wep->wePart2)/2; --i >= 0;) {
@@ -702,7 +714,7 @@
 				return chksum;
 			return -1;
 		}
-		if (u2l[*cp++] != u2l[*un++] || *cp++)
+		if (U2L(*cp++) != U2L(*un++) || *cp++)
 			return -1;
 	}
 	for (cp = wep->wePart3, i = sizeof(wep->wePart3)/2; --i >= 0;) {
@@ -711,7 +723,7 @@
 				return chksum;
 			return -1;
 		}
-		if (u2l[*cp++] != u2l[*un++] || *cp++)
+		if (U2L(*cp++) != U2L(*un++) || *cp++)
 			return -1;
 	}
 	return chksum;

Use "options MSDOSFS_NOCONV" to compile new kernel.
>Release-Note:
>Audit-Trail:
>Unformatted: