Subject: kern/34583: patch for write speed improvement for msdosfs
To: None <kern-bug-people@netbsd.org, gnats-admin@netbsd.org,>
From: Rhialto <rhialto@falu.nl>
List: netbsd-bugs
Date: 09/21/2006 23:35:00
>Number:         34583
>Category:       kern
>Synopsis:       patch for write speed improvement for msdosfs
>Confidential:   no
>Severity:       non-critical
>Priority:       medium
>Responsible:    kern-bug-people
>State:          open
>Class:          sw-bug
>Submitter-Id:   net
>Arrival-Date:   Thu Sep 21 23:35:00 +0000 2006
>Originator:     Rhialto
>Release:        NetBSD 3.0 and -current
>Organization:
	
>Environment:
System: NetBSD radl.falu.nl 3.0 NetBSD 3.0 (Radls Doordringend Onjuiste Akkoord) #0: Sat Jan 28 16:44:07 CET 2006 root@radl.falu.nl:/usr/src/sys/arch/amd64/compile/RADL amd64
Architecture: x86_64
Machine: amd64
Actually, I tested it on my i386 laptop running an older -current.
>Description:
	I noticed that when writing large file (hundreds of megabytes)
	to an msdos disk, the writing speed to a file decreases with the
	file length.
	Since I have some experience with messydos filesystems (I wrote
	MSH: for the Amiga) I took a look.
	The obvious suspicion with operations that slow down with the
	length of a file is an excessive traversal of the FAT cluster
	chain. However, there is a cache that caches 2 positions: the
	last cluster in the file, and the "most recently looked up" one.
	Debugging info showed however that frequent full traversals were
	still made. So, apparently when extending a file and after
	updating the end cluster, the previous end is again needed.
	Adding a 3rd entry in the cache, which keeps the end position
	from just before extending a file.
	This has the desired effect of keeping the write speed constant.
	(What it is that needs that position I have not been able to
	ascertain from the filesystem code; it doesn't seem to make
	sense, actually, to read or write clusters before the original
	EOF. I was hoping to find the place where the cache is trashed
	and rewrite it to get the desired info from it beforehand, so
	that the extra cache entry is again unneeded, but alas.)

>How-To-Repeat:
	cp largefile /msdos/file/system/ &
	systat vmstat
	observe I/O speed on the msdos disk going down steadily.
>Fix:
	Patch:

Index: denode.h
===================================================================
RCS file: /cvsroot/src/sys/fs/msdosfs/denode.h,v
retrieving revision 1.13
diff -u -r1.13 denode.h
--- denode.h	14 May 2006 21:31:52 -0000	1.13
+++ denode.h	21 Sep 2006 23:07:44 -0000
@@ -119,10 +119,11 @@
  * cache is probably pretty worthless if a file is opened by multiple
  * processes.
  */
-#define	FC_SIZE		2	/* number of entries in the cache */
+#define	FC_SIZE		3	/* number of entries in the cache */
 #define	FC_LASTMAP	0	/* entry the last call to pcbmap() resolved
 				 * to */
 #define	FC_LASTFC	1	/* entry for the last cluster in the file */
+#define	FC_NEXTTOLASTFC	2	/* entry for a close to the last cluster in the file */
 
 #define	FCE_EMPTY	0xffffffff	/* doesn't represent an actual cluster # */
 
@@ -133,6 +134,13 @@
 	(dep)->de_fc[slot].fc_frcn = frcn; \
 	(dep)->de_fc[slot].fc_fsrcn = fsrcn;
 
+#define fc_last_to_nexttolast(dep) \
+	do {  \
+		(dep)->de_fc[FC_NEXTTOLASTFC].fc_frcn = (dep)->de_fc[FC_LASTFC].fc_frcn; \
+		(dep)->de_fc[FC_NEXTTOLASTFC].fc_fsrcn = (dep)->de_fc[FC_LASTFC].fc_fsrcn; \
+	} while (0)
+	 
+
 /*
  * This is the in memory variant of a dos directory entry.  It is usually
  * contained within a vnode.
Index: msdosfs_fat.c
===================================================================
RCS file: /cvsroot/src/sys/fs/msdosfs/msdosfs_fat.c,v
retrieving revision 1.10
diff -u -r1.10 msdosfs_fat.c
--- msdosfs_fat.c	14 May 2006 21:31:52 -0000	1.10
+++ msdosfs_fat.c	21 Sep 2006 23:07:44 -0000
@@ -85,6 +85,30 @@
 int fc_lmdistance[LMMAX];	/* counters for how far off the last
 				 * cluster mapped entry was. */
 int fc_largedistance;		/* off by more than LMMAX		 */
+int fc_wherefrom, fc_whereto, fc_lastclust;
+int pm_fatblocksize;
+
+#ifdef MSDOSFS_DEBUG
+void print_fat_stats(void);
+
+void
+print_fat_stats(void)
+{
+    int i;
+
+    printf("fc_fileextends=%d fc_lfcempty=%d fc_bmapcalls=%d fc_largedistance=%d [%d->%d=%d] fc_lastclust=%d pm_fatblocksize=%d\n",
+	    fc_fileextends, fc_lfcempty, fc_bmapcalls, fc_largedistance,
+	    fc_wherefrom, fc_whereto, fc_whereto-fc_wherefrom,
+	    fc_lastclust, pm_fatblocksize);
+    fc_fileextends = fc_lfcempty = fc_bmapcalls = fc_wherefrom = fc_whereto = fc_lastclust = 0;
+    for (i = 0; i < LMMAX; i++) {
+	printf("%d:%d ", i, fc_lmdistance[i]);
+	fc_lmdistance[i] = 0;
+    }
+    printf("\n");
+}
+
+#endif
 
 static void fatblock(struct msdosfsmount *, u_long, u_long *, u_long *,
 			  u_long *);
@@ -117,6 +141,7 @@
 		*sizep = size;
 	if (bop)
 		*bop = ofs % pmp->pm_fatblocksize;
+	pm_fatblocksize =  pmp->pm_fatblocksize;
 }
 
 /*
@@ -208,9 +233,12 @@
 	 */
 	i = 0;
 	fc_lookup(dep, findcn, &i, &cn);
-	if ((bn = findcn - i) >= LMMAX)
+	if ((bn = findcn - i) >= LMMAX) {
 		fc_largedistance++;
-	else
+		fc_wherefrom = i;
+		fc_whereto = findcn;
+		fc_lastclust = dep->de_fc[FC_LASTFC].fc_frcn;
+	} else
 		fc_lmdistance[bn]++;
 
 	/*
@@ -1013,6 +1041,8 @@
 			return (error);
 	}
 
+	fc_last_to_nexttolast(dep);
+
 	while (count > 0) {
 
 		/*

This is a actually not a part of my change, just a small generic
improvement that I happened to notice, but I want it checked.
The 2 if-statements test the same condition so they can be combined.

Index: msdosfs_vnops.c
===================================================================
RCS file: /cvsroot/src/sys/fs/msdosfs/msdosfs_vnops.c,v
retrieving revision 1.30
diff -u -r1.30 msdosfs_vnops.c
--- msdosfs_vnops.c	23 Jul 2006 22:06:10 -0000	1.30
+++ msdosfs_vnops.c.new	2006-09-22 01:10:26.000000000 +0200
@@ -637,9 +637,7 @@
 		if ((error = extendfile(dep, count, NULL, NULL, 0)) &&
 		    (error != ENOSPC || (ioflag & IO_UNIT)))
 			goto errexit;
-	}
 
-	if (dep->de_FileSize < uio->uio_offset + resid) {
 		dep->de_FileSize = uio->uio_offset + resid;
 		uvm_vnp_setsize(vp, dep->de_FileSize);
 		extended = 1;

-Olaf.
-- 
___ Olaf 'Rhialto' Seibert      -- You author it, and I'll reader it.
\X/ rhialto/at/xs4all.nl        -- Cetero censeo "authored" delendum esse.