tech-kern archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[PATCH] Support INCOMPAT_64BIT on ext4
This patch adds support for incompat_64bit on ext4 filesystem. This feature is enabled by default on new filesystems on Ubuntu and probably other distros
From 101769076e44208c2fac45aceddd4a46c33eaefb Mon Sep 17 00:00:00 2001
From: Vladimir Serbinenko <phcoder%gmail.com@localhost>
Date: Mon, 21 Aug 2023 20:55:35 +0200
Subject: [PATCH 1/2] ext2fs: Support INCOMPAT_64BIT feature
---
sys/ufs/ext2fs/ext2fs.h | 27 ++++++++----
sys/ufs/ext2fs/ext2fs_alloc.c | 75 ++++++++++++++++++++-------------
sys/ufs/ext2fs/ext2fs_vfsops.c | 76 ++++++++++++++++++++++++++++++----
3 files changed, 132 insertions(+), 46 deletions(-)
diff --git a/sys/ufs/ext2fs/ext2fs.h b/sys/ufs/ext2fs/ext2fs.h
index 320192b03a8..ae814b76798 100644
--- a/sys/ufs/ext2fs/ext2fs.h
+++ b/sys/ufs/ext2fs/ext2fs.h
@@ -252,6 +252,7 @@ struct m_ext2fs {
int32_t e2fs_ngdb; /* number of group descriptor blocks */
int32_t e2fs_ipb; /* number of inodes per block */
int32_t e2fs_itpg; /* number of inode table blocks per group */
+ u_int8_t e2fs_group_desc_shift; /* binary log group desc size */
struct ext2_gd *e2fs_gd; /* group descriptors (data not byteswapped) */
};
@@ -370,7 +371,8 @@ struct m_ext2fs {
| EXT2F_ROCOMPAT_GDT_CSUM)
#define EXT2F_INCOMPAT_SUPP (EXT2F_INCOMPAT_FTYPE \
| EXT2F_INCOMPAT_EXTENTS \
- | EXT2F_INCOMPAT_FLEX_BG)
+ | EXT2F_INCOMPAT_FLEX_BG \
+ | EXT2F_INCOMPAT_64BIT)
/*
* Feature set definitions
@@ -432,10 +434,14 @@ struct ext2_gd {
uint16_t ext2bgd_itable_unused_lo; /* Low unused inode offset */
uint16_t ext2bgd_checksum; /* Group desc checksum */
- /*
- * XXX disk32 Further fields only exist if 64BIT feature is on
- * and superblock desc_size > 32, not supported for now.
- */
+ u_int32_t ext2bgd_b_bitmap_hi; /* blocks bitmap block (high bits) */
+ u_int32_t ext2bgd_i_bitmap_hi; /* inodes bitmap block (high bits) */
+ u_int32_t ext2bgd_i_tables_hi; /* inodes table block (high bits) */
+ u_int16_t ext2bgd_nbfree_hi; /* number of free blocks (high bits) */
+ u_int16_t ext2bgd_nifree_hi; /* number of free inodes (high bits) */
+ u_int16_t ext2bgd_ndirs_hi; /* number of directories (high bits) */
+ u_int16_t reserved_hi;
+ u_int32_t reserved2_hi[3];
};
#define E2FS_BG_INODE_UNINIT 0x0001 /* Inode bitmap not used/initialized */
@@ -492,15 +498,18 @@ void e2fs_sb_bswap(struct ext2fs *, struct ext2fs *);
# define e2fs_sbsave(old, new) e2fs_sb_bswap((old), (new))
#endif
-/* Group descriptors are not byte swapped */
-#define e2fs_cgload(old, new, size) memcpy((new), (old), (size))
-#define e2fs_cgsave(old, new, size) memcpy((new), (old), (size))
+void e2fs_cgload(const char *ondisk, struct ext2_gd *inmemory,
+ int shift_cg_entry_size, int cg_size);
+void e2fs_cgsave(const struct ext2_gd *inmemory, char *ondisk,
+ int shift_cg_entry_size, int cg_size);
/*
* Turn file system block numbers into disk block addresses.
* This maps file system blocks to device size blocks.
*/
#define EXT2_FSBTODB(fs, b) ((b) << (fs)->e2fs_fsbtodb)
+#define EXT2_FSBTODB64(fs, b, b_hi) (((((u_int64_t)(b_hi)) << 32) | (b)) << (fs)->e2fs_fsbtodb)
+#define EXT2_FSBTODB64OFF(fs, b, b_hi, off) ((((((u_int64_t)(b_hi)) << 32) | (b)) + (off)) << (fs)->e2fs_fsbtodb)
#define EXT2_DBTOFSB(fs, b) ((b) >> (fs)->e2fs_fsbtodb)
/*
@@ -512,6 +521,8 @@ void e2fs_sb_bswap(struct ext2fs *, struct ext2fs *);
#define ino_to_cg(fs, x) (((x) - 1) / (fs)->e2fs.e2fs_ipg)
#define ino_to_fsba(fs, x) \
(fs2h32((fs)->e2fs_gd[ino_to_cg((fs), (x))].ext2bgd_i_tables) + \
+ (((u_int64_t)fs2h32((fs)->e2fs_gd[ino_to_cg((fs), (x))].ext2bgd_i_tables_hi)) \
+ << 32) + \
(((x) - 1) % (fs)->e2fs.e2fs_ipg) / (fs)->e2fs_ipb)
#define ino_to_fsbo(fs, x) (((x) - 1) % (fs)->e2fs_ipb)
diff --git a/sys/ufs/ext2fs/ext2fs_alloc.c b/sys/ufs/ext2fs/ext2fs_alloc.c
index a130242833a..b78ba827641 100644
--- a/sys/ufs/ext2fs/ext2fs_alloc.c
+++ b/sys/ufs/ext2fs/ext2fs_alloc.c
@@ -91,7 +91,7 @@ static u_long ext2fs_hashalloc(struct inode *, int, long, int,
static daddr_t ext2fs_nodealloccg(struct inode *, int, daddr_t, int);
static daddr_t ext2fs_mapsearch(struct m_ext2fs *, char *, daddr_t);
static __inline void ext2fs_cg_update(struct m_ext2fs *, int, struct ext2_gd *, int, int, int, daddr_t);
-static uint16_t ext2fs_cg_get_csum(struct m_ext2fs *, int, struct ext2_gd *);
+static uint16_t ext2fs_cg_get_csum(struct m_ext2fs *, int, struct ext2_gd *, size_t);
static void ext2fs_init_bb(struct m_ext2fs *, int, struct ext2_gd *, char *);
/*
@@ -212,13 +212,19 @@ ext2fs_dirpref(struct m_ext2fs *fs)
avgifree = fs->e2fs.e2fs_ficount / fs->e2fs_ncg;
maxspace = 0;
mincg = -1;
- for (cg = 0; cg < fs->e2fs_ncg; cg++)
- if (fs2h16(fs->e2fs_gd[cg].ext2bgd_nifree) >= avgifree) {
- if (mincg == -1 || fs2h16(fs->e2fs_gd[cg].ext2bgd_nbfree) > maxspace) {
+ for (cg = 0; cg < fs->e2fs_ncg; cg++) {
+ u_int32_t nifree = (fs2h16(fs->e2fs_gd[cg].ext2bgd_nifree_hi) << 16)
+ | fs2h16(fs->e2fs_gd[cg].ext2bgd_nifree);
+ if (nifree >= avgifree) {
+ u_int32_t nbfree
+ = (fs2h16(fs->e2fs_gd[cg].ext2bgd_nbfree_hi) << 16)
+ | fs2h16(fs->e2fs_gd[cg].ext2bgd_nbfree);
+ if (mincg == -1 || nbfree > maxspace) {
mincg = cg;
- maxspace = fs2h16(fs->e2fs_gd[cg].ext2bgd_nbfree);
+ maxspace = nbfree;
}
}
+ }
return mincg;
}
@@ -333,14 +339,14 @@ ext2fs_alloccg(struct inode *ip, int cg, daddr_t bpref, int size)
struct m_ext2fs *fs;
char *bbp;
struct buf *bp;
- /* XXX ondisk32 */
int error, bno, start, end, loc;
fs = ip->i_e2fs;
- if (fs->e2fs_gd[cg].ext2bgd_nbfree == 0)
+ if (fs->e2fs_gd[cg].ext2bgd_nbfree == 0 && fs->e2fs_gd[cg].ext2bgd_nbfree_hi == 0)
return 0;
- error = bread(ip->i_devvp, EXT2_FSBTODB(fs,
- fs2h32(fs->e2fs_gd[cg].ext2bgd_b_bitmap)),
+ error = bread(ip->i_devvp, EXT2_FSBTODB64(fs,
+ fs2h32(fs->e2fs_gd[cg].ext2bgd_b_bitmap),
+ fs2h32(fs->e2fs_gd[cg].ext2bgd_b_bitmap_hi)),
(int)fs->e2fs_bsize, B_MODIFY, &bp);
if (error) {
return 0;
@@ -437,10 +443,11 @@ ext2fs_nodealloccg(struct inode *ip, int cg, daddr_t ipref, int mode)
if (ipref == -1)
ipref = 0;
fs = ip->i_e2fs;
- if (fs->e2fs_gd[cg].ext2bgd_nifree == 0)
+ if (fs->e2fs_gd[cg].ext2bgd_nifree == 0 || fs->e2fs_gd[cg].ext2bgd_nifree_hi == 0)
return 0;
- error = bread(ip->i_devvp, EXT2_FSBTODB(fs,
- fs2h32(fs->e2fs_gd[cg].ext2bgd_i_bitmap)),
+ error = bread(ip->i_devvp, EXT2_FSBTODB64(fs,
+ fs2h32(fs->e2fs_gd[cg].ext2bgd_i_bitmap),
+ fs2h32(fs->e2fs_gd[cg].ext2bgd_i_bitmap_hi)),
(int)fs->e2fs_bsize, B_MODIFY, &bp);
if (error) {
return 0;
@@ -519,7 +526,7 @@ ext2fs_blkfree(struct inode *ip, daddr_t bno)
return;
}
error = bread(ip->i_devvp,
- EXT2_FSBTODB(fs, fs2h32(fs->e2fs_gd[cg].ext2bgd_b_bitmap)),
+ EXT2_FSBTODB64(fs, fs2h32(fs->e2fs_gd[cg].ext2bgd_b_bitmap), fs2h32(fs->e2fs_gd[cg].ext2bgd_b_bitmap_hi)),
(int)fs->e2fs_bsize, B_MODIFY, &bp);
if (error) {
return;
@@ -566,7 +573,7 @@ ext2fs_vfree(struct vnode *pvp, ino_t ino, int mode)
KASSERT(!E2FS_HAS_GD_CSUM(fs) || (fs->e2fs_gd[cg].ext2bgd_flags & h2fs16(E2FS_BG_INODE_UNINIT)) == 0);
error = bread(pip->i_devvp,
- EXT2_FSBTODB(fs, fs2h32(fs->e2fs_gd[cg].ext2bgd_i_bitmap)),
+ EXT2_FSBTODB64(fs, fs2h32(fs->e2fs_gd[cg].ext2bgd_i_bitmap), fs2h32(fs->e2fs_gd[cg].ext2bgd_i_bitmap_hi)),
(int)fs->e2fs_bsize, B_MODIFY, &bp);
if (error) {
return 0;
@@ -647,9 +654,11 @@ ext2fs_fserr(struct m_ext2fs *fs, u_int uid, const char *cp)
static __inline void
ext2fs_cg_update(struct m_ext2fs *fs, int cg, struct ext2_gd *gd, int nbfree, int nifree, int ndirs, daddr_t ioff)
{
- /* XXX disk32 */
if (nifree) {
- gd->ext2bgd_nifree = h2fs16(fs2h16(gd->ext2bgd_nifree) + nifree);
+ u_int32_t ext2bgd_nifree = fs2h16(gd->ext2bgd_nifree) | (fs2h16(gd->ext2bgd_nifree_hi) << 16);
+ ext2bgd_nifree += nifree;
+ gd->ext2bgd_nifree = h2fs16(ext2bgd_nifree);
+ gd->ext2bgd_nifree_hi = h2fs16(ext2bgd_nifree >> 16);
/*
* If we allocated inode on bigger offset than what was
* ever used before, bump the itable_unused count. This
@@ -662,18 +671,27 @@ ext2fs_cg_update(struct m_ext2fs *fs, int cg, struct ext2_gd *gd, int nbfree, in
gd->ext2bgd_itable_unused_lo = h2fs16(fs->e2fs.e2fs_ipg - (ioff + 1));
}
- KASSERT(!E2FS_HAS_GD_CSUM(fs) || gd->ext2bgd_itable_unused_lo <= gd->ext2bgd_nifree);
+ KASSERT(!E2FS_HAS_GD_CSUM(fs) || gd->ext2bgd_itable_unused_lo <= ext2bgd_nifree);
}
- if (nbfree)
- gd->ext2bgd_nbfree = h2fs16(fs2h16(gd->ext2bgd_nbfree) + nbfree);
+ if (nbfree) {
+ u_int32_t ext2bgd_nbfree = fs2h16(gd->ext2bgd_nbfree) | (fs2h16(gd->ext2bgd_nbfree_hi) << 16);
+ ext2bgd_nbfree += nbfree;
+ gd->ext2bgd_nbfree = h2fs16(ext2bgd_nbfree);
+ gd->ext2bgd_nbfree_hi = h2fs16(ext2bgd_nbfree >> 16);
+
+ }
- if (ndirs)
- gd->ext2bgd_ndirs = h2fs16(fs2h16(gd->ext2bgd_ndirs) + ndirs);
+ if (ndirs) {
+ u_int32_t ext2bgd_ndirs = fs2h16(gd->ext2bgd_ndirs) | (fs2h16(gd->ext2bgd_ndirs_hi) << 16);
+ ext2bgd_ndirs += ndirs;
+ gd->ext2bgd_ndirs = h2fs16(ext2bgd_ndirs);
+ gd->ext2bgd_ndirs_hi = h2fs16(ext2bgd_ndirs >> 16);
+ }
if (E2FS_HAS_GD_CSUM(fs))
- gd->ext2bgd_checksum = ext2fs_cg_get_csum(fs, cg, gd);
+ gd->ext2bgd_checksum = ext2fs_cg_get_csum(fs, cg, gd, 1 << fs->e2fs_group_desc_shift);
}
/*
@@ -681,7 +699,7 @@ ext2fs_cg_update(struct m_ext2fs *fs, int cg, struct ext2_gd *gd, int nbfree, in
* Returned as LE (disk encoding).
*/
static uint16_t
-ext2fs_cg_get_csum(struct m_ext2fs *fs, int cg, struct ext2_gd *gd)
+ext2fs_cg_get_csum(struct m_ext2fs *fs, int cg, struct ext2_gd *gd, size_t cgsize)
{
uint16_t crc;
uint32_t cg_bswapped = h2fs32((uint32_t)cg);
@@ -695,7 +713,7 @@ ext2fs_cg_get_csum(struct m_ext2fs *fs, int cg, struct ext2_gd *gd)
crc = crc16(~0, (uint8_t *)fs->e2fs.e2fs_uuid, sizeof(fs->e2fs.e2fs_uuid));
crc = crc16(crc, (uint8_t *)&cg_bswapped, sizeof(cg_bswapped));
crc = crc16(crc, (uint8_t *)gd, off);
- /* XXX ondisk32 */
+ crc = crc16(crc, (uint8_t *)gd + off + 2, cgsize - (off + 2));
return h2fs16(crc);
}
@@ -723,7 +741,6 @@ ext2fs_init_bb(struct m_ext2fs *fs, int cg, struct ext2_gd *gd, char *bbp)
int
ext2fs_cg_verify_and_initialize(struct vnode *devvp, struct m_ext2fs *fs, int ronly)
{
- /* XXX disk32 */
struct ext2_gd *gd;
ino_t ioff;
size_t boff;
@@ -737,7 +754,7 @@ ext2fs_cg_verify_and_initialize(struct vnode *devvp, struct m_ext2fs *fs, int ro
gd = &fs->e2fs_gd[cg];
/* Verify checksum */
- if (gd->ext2bgd_checksum != ext2fs_cg_get_csum(fs, cg, gd)) {
+ if (gd->ext2bgd_checksum != ext2fs_cg_get_csum(fs, cg, gd, 1 << fs->e2fs_group_desc_shift)) {
printf("ext2fs_cg_verify_and_initialize: group %d invalid csum\n", cg);
return EINVAL;
}
@@ -761,7 +778,7 @@ ext2fs_cg_verify_and_initialize(struct vnode *devvp, struct m_ext2fs *fs, int ro
if (boff) {
/* partial wipe, must read old data */
error = bread(devvp,
- EXT2_FSBTODB(fs, fs2h32(gd->ext2bgd_i_tables) + i),
+ EXT2_FSBTODB64OFF(fs, fs2h32(gd->ext2bgd_i_tables), fs2h32(gd->ext2bgd_i_tables_hi), i),
(int)fs->e2fs_bsize, B_MODIFY, &bp);
if (error) {
printf("ext2fs_cg_verify_and_initialize: can't read itable block");
@@ -775,7 +792,7 @@ ext2fs_cg_verify_and_initialize(struct vnode *devvp, struct m_ext2fs *fs, int ro
* assumes nothing else is changing the data.
*/
bp = getblk(devvp,
- EXT2_FSBTODB(fs, fs2h32(gd->ext2bgd_i_tables) + i),
+ EXT2_FSBTODB64OFF(fs, fs2h32(gd->ext2bgd_i_tables), fs2h32(gd->ext2bgd_i_tables_hi), i),
(int)fs->e2fs_bsize, 0, 0);
clrbuf(bp);
}
@@ -784,7 +801,7 @@ ext2fs_cg_verify_and_initialize(struct vnode *devvp, struct m_ext2fs *fs, int ro
}
gd->ext2bgd_flags |= h2fs16(E2FS_BG_INODE_ZEROED);
- gd->ext2bgd_checksum = ext2fs_cg_get_csum(fs, cg, gd);
+ gd->ext2bgd_checksum = ext2fs_cg_get_csum(fs, cg, gd, 1 << fs->e2fs_group_desc_shift);
fs->e2fs_fmod = 1;
}
diff --git a/sys/ufs/ext2fs/ext2fs_vfsops.c b/sys/ufs/ext2fs/ext2fs_vfsops.c
index c7fb6a2346b..21e851e5fdb 100644
--- a/sys/ufs/ext2fs/ext2fs_vfsops.c
+++ b/sys/ufs/ext2fs/ext2fs_vfsops.c
@@ -597,9 +597,9 @@ ext2fs_reload(struct mount *mp, kauth_cred_t cred, struct lwp *l)
if (error) {
return error;
}
- e2fs_cgload((struct ext2_gd *)bp->b_data,
+ e2fs_cgload(bp->b_data,
&fs->e2fs_gd[i * fs->e2fs_bsize / sizeof(struct ext2_gd)],
- fs->e2fs_bsize);
+ fs->e2fs_bsize, 1 << fs->e2fs_group_desc_shift);
brelse(bp, 0);
}
@@ -705,7 +705,8 @@ ext2fs_mountfs(struct vnode *devvp, struct mount *mp)
}
/* XXX: should be added in ext2fs_sbfill()? */
- m_fs->e2fs_gd = kmem_alloc(m_fs->e2fs_ngdb * m_fs->e2fs_bsize, KM_SLEEP);
+ m_fs->e2fs_gd = kmem_alloc(m_fs->e2fs_ngdb * (m_fs->e2fs_bsize >> m_fs->e2fs_group_desc_shift)
+ * sizeof(struct ext2_gd), KM_SLEEP);
for (i = 0; i < m_fs->e2fs_ngdb; i++) {
error = bread(devvp,
EXT2_FSBTODB(m_fs, m_fs->e2fs.e2fs_first_dblock +
@@ -716,10 +717,9 @@ ext2fs_mountfs(struct vnode *devvp, struct mount *mp)
m_fs->e2fs_ngdb * m_fs->e2fs_bsize);
goto out;
}
- e2fs_cgload((struct ext2_gd *)bp->b_data,
- &m_fs->e2fs_gd[
- i * m_fs->e2fs_bsize / sizeof(struct ext2_gd)],
- m_fs->e2fs_bsize);
+ e2fs_cgload(bp->b_data,
+ &m_fs->e2fs_gd[i * (m_fs->e2fs_bsize >> m_fs->e2fs_group_desc_shift)],
+ m_fs->e2fs_bsize, m_fs->e2fs_group_desc_shift);
brelse(bp, 0);
bp = NULL;
}
@@ -1277,7 +1277,7 @@ ext2fs_cgupdate(struct ufsmount *mp, int waitfor)
1 /* superblock */ + i), fs->e2fs_bsize, 0, 0);
e2fs_cgsave(&fs->e2fs_gd[
i * fs->e2fs_bsize / sizeof(struct ext2_gd)],
- (struct ext2_gd *)bp->b_data, fs->e2fs_bsize);
+ bp->b_data, fs->e2fs_bsize, fs->e2fs_group_desc_shift);
if (waitfor == MNT_WAIT)
error = bwrite(bp);
else
@@ -1348,7 +1348,17 @@ ext2fs_sbfill(struct m_ext2fs *m_fs, int ronly)
m_fs->e2fs_qbmask = m_fs->e2fs_bsize - 1;
m_fs->e2fs_bmask = ~m_fs->e2fs_qbmask;
- if ((u32 = m_fs->e2fs_bsize / sizeof(struct ext2_gd)) == 0) {
+ if (!(fs->e2fs_features_incompat & EXT2F_INCOMPAT_64BIT) ||
+ (fs->e2fs_rev == E2FS_REV0))
+ m_fs->e2fs_group_desc_shift = 5;
+ else {
+ for (m_fs->e2fs_group_desc_shift = 0;
+ (1 << m_fs->e2fs_group_desc_shift)
+ < fs->e3fs_desc_size;
+ m_fs->e2fs_group_desc_shift++);
+ }
+
+ if ((u32 = (m_fs->e2fs_bsize >> m_fs->e2fs_group_desc_shift)) == 0) {
/* Unlikely to happen */
printf("ext2fs: invalid block size\n");
return EINVAL;
@@ -1401,3 +1411,51 @@ ext2fs_sbfill(struct m_ext2fs *m_fs, int ronly)
return 0;
}
+
+void e2fs_cgload(const char *ondisk, struct ext2_gd *inmemory,
+ int cg_size, int shift_cg_entry_size)
+{
+ const char *iptr = ondisk;
+ struct ext2_gd *optr = inmemory;
+ if (shift_cg_entry_size > 6) {
+ int i;
+ for (i=0; i < (cg_size >> shift_cg_entry_size); i++, optr++,
+ iptr += (1 << shift_cg_entry_size)) {
+ memcpy(optr, iptr, sizeof(struct ext2_gd));
+ }
+ } else if (shift_cg_entry_size == 6) {
+ memcpy(inmemory, ondisk, cg_size);
+ } else {
+ int i;
+ for (i=0; i < (cg_size >> shift_cg_entry_size); i++, optr++,
+ iptr += (1 << shift_cg_entry_size)) {
+ memcpy(optr, iptr, 32);
+ memset(optr + 32, 0, sizeof(struct ext2_gd) - 32);
+ }
+ }
+}
+
+void e2fs_cgsave(const struct ext2_gd *inmemory, char *ondisk,
+ int cg_size, int shift_cg_entry_size)
+{
+ const struct ext2_gd *iptr = inmemory;
+ char *optr = ondisk;
+ if (shift_cg_entry_size > 6) {
+ int i;
+ for (i=0; i < (cg_size >> shift_cg_entry_size); i++, iptr++,
+ optr += (1 << shift_cg_entry_size)) {
+ memcpy(optr, iptr, sizeof(struct ext2_gd));
+ memset(optr + sizeof(struct ext2_gd), 0,
+ (1 << shift_cg_entry_size)
+ - sizeof(struct ext2_gd));
+ }
+ } else if (shift_cg_entry_size == 6) {
+ memcpy(ondisk, inmemory, cg_size);
+ } else {
+ int i;
+ for (i=0; i < (cg_size >> shift_cg_entry_size); i++, iptr++,
+ optr += (1 << shift_cg_entry_size)) {
+ memcpy(optr, iptr, 32);
+ }
+ }
+}
--
2.39.2
Home |
Main Index |
Thread Index |
Old Index