diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2023-04-24 14:25:39 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2023-04-24 14:25:39 -0700 |
| commit | 61d325dcbc05d8fef88110d35ef7776f3ac3f68b (patch) | |
| tree | 52bfc73caa90e761a64e62de2b241068d01bd497 | |
| parent | 97adb49f052e70455c3529509885f8aa3b40c370 (diff) | |
| parent | 745ed7d77834048879bf24088c94e5a6462b613f (diff) | |
| download | linux-61d325dcbc05d8fef88110d35ef7776f3ac3f68b.tar.gz linux-61d325dcbc05d8fef88110d35ef7776f3ac3f68b.tar.bz2 linux-61d325dcbc05d8fef88110d35ef7776f3ac3f68b.zip | |
Merge tag 'erofs-for-6.4-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs
Pull erofs updates from Gao Xiang:
"In this cycle, sub-page block support for uncompressed files is
available. It's mainly used to enable original signing ('golden')
4k-block images on arm64 with 16/64k pages. In addition, end users
could also use this feature to build a manifest to directly refer to
golden tar data.
Besides, long xattr name prefix support is also introduced in this
cycle to avoid too many xattrs with the same prefix (e.g. overlayfs
xattrs). It's useful for erofs + overlayfs combination (like Composefs
model): the image size is reduced by ~14% and runtime performance is
also slightly improved.
Others are random fixes and cleanups as usual.
Summary:
- Add sub-page block size support for uncompressed files
- Support flattened block device for multi-blob images to be attached
into virtual machines (including cloud servers) and bare metals
- Support long xattr name prefixes to optimize images with common
xattr namespaces (e.g. files with overlayfs xattrs) use cases
- Various minor cleanups & fixes"
* tag 'erofs-for-6.4-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs:
erofs: cleanup i_format-related stuffs
erofs: sunset erofs_dbg()
erofs: fix potential overflow calculating xattr_isize
erofs: get rid of z_erofs_fill_inode()
erofs: enable long extended attribute name prefixes
erofs: handle long xattr name prefixes properly
erofs: add helpers to load long xattr name prefixes
erofs: introduce on-disk format for long xattr name prefixes
erofs: move packed inode out of the compression part
erofs: keep meta inode into erofs_buf
erofs: initialize packed inode after root inode is assigned
erofs: stop parsing non-compact HEAD index if clusterofs is invalid
erofs: don't warn ztailpacking feature anymore
erofs: simplify erofs_xattr_generic_get()
erofs: rename init_inode_xattrs with erofs_ prefix
erofs: move several xattr helpers into xattr.c
erofs: tidy up EROFS on-disk naming
erofs: support flattened block device for multi-blob images
erofs: set block size to the on-disk block size
erofs: avoid hardcoded blocksize for subpage block support
| -rw-r--r-- | Documentation/filesystems/erofs.rst | 4 | ||||
| -rw-r--r-- | fs/erofs/data.c | 81 | ||||
| -rw-r--r-- | fs/erofs/decompressor.c | 6 | ||||
| -rw-r--r-- | fs/erofs/decompressor_lzma.c | 4 | ||||
| -rw-r--r-- | fs/erofs/dir.c | 25 | ||||
| -rw-r--r-- | fs/erofs/erofs_fs.h | 176 | ||||
| -rw-r--r-- | fs/erofs/fscache.c | 5 | ||||
| -rw-r--r-- | fs/erofs/inode.c | 36 | ||||
| -rw-r--r-- | fs/erofs/internal.h | 73 | ||||
| -rw-r--r-- | fs/erofs/namei.c | 27 | ||||
| -rw-r--r-- | fs/erofs/super.c | 116 | ||||
| -rw-r--r-- | fs/erofs/xattr.c | 224 | ||||
| -rw-r--r-- | fs/erofs/xattr.h | 27 | ||||
| -rw-r--r-- | fs/erofs/zdata.c | 25 | ||||
| -rw-r--r-- | fs/erofs/zmap.c | 166 | ||||
| -rw-r--r-- | include/trace/events/erofs.h | 4 |
16 files changed, 540 insertions, 459 deletions
diff --git a/Documentation/filesystems/erofs.rst b/Documentation/filesystems/erofs.rst index a43aacf1494e..4654ee57c1d5 100644 --- a/Documentation/filesystems/erofs.rst +++ b/Documentation/filesystems/erofs.rst @@ -40,8 +40,8 @@ Here are the main features of EROFS: - Support multiple devices to refer to external blobs, which can be used for container images; - - 4KiB block size and 32-bit block addresses for each device, therefore - 16TiB address space at most for now; + - 32-bit block addresses for each device, therefore 16TiB address space at + most with 4KiB block size for now; - Two inode layouts for different requirements: diff --git a/fs/erofs/data.c b/fs/erofs/data.c index c08c0f578bc6..6fe9a779fa91 100644 --- a/fs/erofs/data.c +++ b/fs/erofs/data.c @@ -27,11 +27,15 @@ void erofs_put_metabuf(struct erofs_buf *buf) buf->page = NULL; } -void *erofs_bread(struct erofs_buf *buf, struct inode *inode, - erofs_blk_t blkaddr, enum erofs_kmap_type type) +/* + * Derive the block size from inode->i_blkbits to make compatible with + * anonymous inode in fscache mode. + */ +void *erofs_bread(struct erofs_buf *buf, erofs_blk_t blkaddr, + enum erofs_kmap_type type) { - struct address_space *const mapping = inode->i_mapping; - erofs_off_t offset = blknr_to_addr(blkaddr); + struct inode *inode = buf->inode; + erofs_off_t offset = (erofs_off_t)blkaddr << inode->i_blkbits; pgoff_t index = offset >> PAGE_SHIFT; struct page *page = buf->page; struct folio *folio; @@ -41,7 +45,7 @@ void *erofs_bread(struct erofs_buf *buf, struct inode *inode, erofs_put_metabuf(buf); nofs_flag = memalloc_nofs_save(); - folio = read_cache_folio(mapping, index, NULL, NULL); + folio = read_cache_folio(inode->i_mapping, index, NULL, NULL); memalloc_nofs_restore(nofs_flag); if (IS_ERR(folio)) return folio; @@ -63,14 +67,19 @@ void *erofs_bread(struct erofs_buf *buf, struct inode *inode, return buf->base + (offset & ~PAGE_MASK); } -void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb, - erofs_blk_t blkaddr, enum erofs_kmap_type type) +void erofs_init_metabuf(struct erofs_buf *buf, struct super_block *sb) { if (erofs_is_fscache_mode(sb)) - return erofs_bread(buf, EROFS_SB(sb)->s_fscache->inode, - blkaddr, type); + buf->inode = EROFS_SB(sb)->s_fscache->inode; + else + buf->inode = sb->s_bdev->bd_inode; +} - return erofs_bread(buf, sb->s_bdev->bd_inode, blkaddr, type); +void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb, + erofs_blk_t blkaddr, enum erofs_kmap_type type) +{ + erofs_init_metabuf(buf, sb); + return erofs_bread(buf, blkaddr, type); } static int erofs_map_blocks_flatmode(struct inode *inode, @@ -79,33 +88,32 @@ static int erofs_map_blocks_flatmode(struct inode *inode, erofs_blk_t nblocks, lastblk; u64 offset = map->m_la; struct erofs_inode *vi = EROFS_I(inode); + struct super_block *sb = inode->i_sb; bool tailendpacking = (vi->datalayout == EROFS_INODE_FLAT_INLINE); - nblocks = DIV_ROUND_UP(inode->i_size, EROFS_BLKSIZ); + nblocks = erofs_iblks(inode); lastblk = nblocks - tailendpacking; /* there is no hole in flatmode */ map->m_flags = EROFS_MAP_MAPPED; - if (offset < blknr_to_addr(lastblk)) { - map->m_pa = blknr_to_addr(vi->raw_blkaddr) + map->m_la; - map->m_plen = blknr_to_addr(lastblk) - offset; + if (offset < erofs_pos(sb, lastblk)) { + map->m_pa = erofs_pos(sb, vi->raw_blkaddr) + map->m_la; + map->m_plen = erofs_pos(sb, lastblk) - offset; } else if (tailendpacking) { map->m_pa = erofs_iloc(inode) + vi->inode_isize + - vi->xattr_isize + erofs_blkoff(offset); + vi->xattr_isize + erofs_blkoff(sb, offset); map->m_plen = inode->i_size - offset; /* inline data should be located in the same meta block */ - if (erofs_blkoff(map->m_pa) + map->m_plen > EROFS_BLKSIZ) { - erofs_err(inode->i_sb, - "inline data cross block boundary @ nid %llu", + if (erofs_blkoff(sb, map->m_pa) + map->m_plen > sb->s_blocksize) { + erofs_err(sb, "inline data cross block boundary @ nid %llu", vi->nid); DBG_BUGON(1); return -EFSCORRUPTED; } map->m_flags |= EROFS_MAP_META; } else { - erofs_err(inode->i_sb, - "internal error @ nid: %llu (size %llu), m_la 0x%llx", + erofs_err(sb, "internal error @ nid: %llu (size %llu), m_la 0x%llx", vi->nid, inode->i_size, map->m_la); DBG_BUGON(1); return -EIO; @@ -148,29 +156,29 @@ int erofs_map_blocks(struct inode *inode, struct erofs_map_blocks *map) pos = ALIGN(erofs_iloc(inode) + vi->inode_isize + vi->xattr_isize, unit) + unit * chunknr; - kaddr = erofs_read_metabuf(&buf, sb, erofs_blknr(pos), EROFS_KMAP); + kaddr = erofs_read_metabuf(&buf, sb, erofs_blknr(sb, pos), EROFS_KMAP); if (IS_ERR(kaddr)) { err = PTR_ERR(kaddr); goto out; } map->m_la = chunknr << vi->chunkbits; map->m_plen = min_t(erofs_off_t, 1UL << vi->chunkbits, - roundup(inode->i_size - map->m_la, EROFS_BLKSIZ)); + round_up(inode->i_size - map->m_la, sb->s_blocksize)); /* handle block map */ if (!(vi->chunkformat & EROFS_CHUNK_FORMAT_INDEXES)) { - __le32 *blkaddr = kaddr + erofs_blkoff(pos); + __le32 *blkaddr = kaddr + erofs_blkoff(sb, pos); if (le32_to_cpu(*blkaddr) == EROFS_NULL_ADDR) { map->m_flags = 0; } else { - map->m_pa = blknr_to_addr(le32_to_cpu(*blkaddr)); + map->m_pa = erofs_pos(sb, le32_to_cpu(*blkaddr)); map->m_flags = EROFS_MAP_MAPPED; } goto out_unlock; } /* parse chunk indexes */ - idx = kaddr + erofs_blkoff(pos); + idx = kaddr + erofs_blkoff(sb, pos); switch (le32_to_cpu(idx->blkaddr)) { case EROFS_NULL_ADDR: map->m_flags = 0; @@ -178,7 +186,7 @@ int erofs_map_blocks(struct inode *inode, struct erofs_map_blocks *map) default: map->m_deviceid = le16_to_cpu(idx->device_id) & EROFS_SB(sb)->device_id_mask; - map->m_pa = blknr_to_addr(le32_to_cpu(idx->blkaddr)); + map->m_pa = erofs_pos(sb, le32_to_cpu(idx->blkaddr)); map->m_flags = EROFS_MAP_MAPPED; break; } @@ -197,7 +205,6 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map) struct erofs_device_info *dif; int id; - /* primary device by default */ map->m_bdev = sb->s_bdev; map->m_daxdev = EROFS_SB(sb)->dax_dev; map->m_dax_part_off = EROFS_SB(sb)->dax_part_off; @@ -210,20 +217,25 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map) up_read(&devs->rwsem); return -ENODEV; } + if (devs->flatdev) { + map->m_pa += erofs_pos(sb, dif->mapped_blkaddr); + up_read(&devs->rwsem); + return 0; + } map->m_bdev = dif->bdev; map->m_daxdev = dif->dax_dev; map->m_dax_part_off = dif->dax_part_off; map->m_fscache = dif->fscache; up_read(&devs->rwsem); - } else if (devs->extra_devices) { + } else if (devs->extra_devices && !devs->flatdev) { down_read(&devs->rwsem); idr_for_each_entry(&devs->tree, dif, id) { erofs_off_t startoff, length; if (!dif->mapped_blkaddr) continue; - startoff = blknr_to_addr(dif->mapped_blkaddr); - length = blknr_to_addr(dif->blocks); + startoff = erofs_pos(sb, dif->mapped_blkaddr); + length = erofs_pos(sb, dif->blocks); if (map->m_pa >= startoff && map->m_pa < startoff + length) { @@ -244,6 +256,7 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, unsigned int flags, struct iomap *iomap, struct iomap *srcmap) { int ret; + struct super_block *sb = inode->i_sb; struct erofs_map_blocks map; struct erofs_map_dev mdev; @@ -258,7 +271,7 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, .m_deviceid = map.m_deviceid, .m_pa = map.m_pa, }; - ret = erofs_map_dev(inode->i_sb, &mdev); + ret = erofs_map_dev(sb, &mdev); if (ret) return ret; @@ -284,11 +297,11 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, struct erofs_buf buf = __EROFS_BUF_INITIALIZER; iomap->type = IOMAP_INLINE; - ptr = erofs_read_metabuf(&buf, inode->i_sb, - erofs_blknr(mdev.m_pa), EROFS_KMAP); + ptr = erofs_read_metabuf(&buf, sb, + erofs_blknr(sb, mdev.m_pa), EROFS_KMAP); if (IS_ERR(ptr)) return PTR_ERR(ptr); - iomap->inline_data = ptr + erofs_blkoff(mdev.m_pa); + iomap->inline_data = ptr + erofs_blkoff(sb, mdev.m_pa); iomap->private = buf.base; } else { iomap->type = IOMAP_MAPPED; diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c index 51b7ac7166d9..7021e2cf6146 100644 --- a/fs/erofs/decompressor.c +++ b/fs/erofs/decompressor.c @@ -42,7 +42,7 @@ int z_erofs_load_lz4_config(struct super_block *sb, if (!sbi->lz4.max_pclusterblks) { sbi->lz4.max_pclusterblks = 1; /* reserved case */ } else if (sbi->lz4.max_pclusterblks > - Z_EROFS_PCLUSTER_MAX_SIZE / EROFS_BLKSIZ) { + erofs_blknr(sb, Z_EROFS_PCLUSTER_MAX_SIZE)) { erofs_err(sb, "too large lz4 pclusterblks %u", sbi->lz4.max_pclusterblks); return -EINVAL; @@ -221,13 +221,13 @@ static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx, support_0padding = true; ret = z_erofs_fixup_insize(rq, headpage + rq->pageofs_in, min_t(unsigned int, rq->inputsize, - EROFS_BLKSIZ - rq->pageofs_in)); + rq->sb->s_blocksize - rq->pageofs_in)); if (ret) { kunmap_atomic(headpage); return ret; } may_inplace = !((rq->pageofs_in + rq->inputsize) & - (EROFS_BLKSIZ - 1)); + (rq->sb->s_blocksize - 1)); } inputmargin = rq->pageofs_in; diff --git a/fs/erofs/decompressor_lzma.c b/fs/erofs/decompressor_lzma.c index d38e19c11270..73091fbe3ea4 100644 --- a/fs/erofs/decompressor_lzma.c +++ b/fs/erofs/decompressor_lzma.c @@ -166,8 +166,8 @@ int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq, /* 1. get the exact LZMA compressed size */ kin = kmap(*rq->in); err = z_erofs_fixup_insize(rq, kin + rq->pageofs_in, - min_t(unsigned int, rq->inputsize, - EROFS_BLKSIZ - rq->pageofs_in)); + min_t(unsigned int, rq->inputsize, + rq->sb->s_blocksize - rq->pageofs_in)); if (err) { kunmap(*rq->in); return err; diff --git a/fs/erofs/dir.c b/fs/erofs/dir.c index 6970b09b8307..b80abec0531a 100644 --- a/fs/erofs/dir.c +++ b/fs/erofs/dir.c @@ -50,44 +50,43 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx) { struct inode *dir = file_inode(f); struct erofs_buf buf = __EROFS_BUF_INITIALIZER; + struct super_block *sb = dir->i_sb; + unsigned long bsz = sb->s_blocksize; const size_t dirsize = i_size_read(dir); - unsigned int i = ctx->pos / EROFS_BLKSIZ; - unsigned int ofs = ctx->pos % EROFS_BLKSIZ; + unsigned int i = erofs_blknr(sb, ctx->pos); + unsigned int ofs = erofs_blkoff(sb, ctx->pos); int err = 0; bool initial = true; + buf.inode = dir; while (ctx->pos < dirsize) { struct erofs_dirent *de; unsigned int nameoff, maxsize; - de = erofs_bread(&buf, dir, i, EROFS_KMAP); + de = erofs_bread(&buf, i, EROFS_KMAP); if (IS_ERR(de)) { - erofs_err(dir->i_sb, - "fail to readdir of logical block %u of nid %llu", + erofs_err(sb, "fail to readdir of logical block %u of nid %llu", i, EROFS_I(dir)->nid); err = PTR_ERR(de); break; } nameoff = le16_to_cpu(de->nameoff); - if (nameoff < sizeof(struct erofs_dirent) || - nameoff >= EROFS_BLKSIZ) { - erofs_err(dir->i_sb, - "invalid de[0].nameoff %u @ nid %llu", + if (nameoff < sizeof(struct erofs_dirent) || nameoff >= bsz) { + erofs_err(sb, "invalid de[0].nameoff %u @ nid %llu", nameoff, EROFS_I(dir)->nid); err = -EFSCORRUPTED; break; } - maxsize = min_t(unsigned int, - dirsize - ctx->pos + ofs, EROFS_BLKSIZ); + maxsize = min_t(unsigned int, dirsize - ctx->pos + ofs, bsz); /* search dirents at the arbitrary position */ if (initial) { initial = false; ofs = roundup(ofs, sizeof(struct erofs_dirent)); - ctx->pos = blknr_to_addr(i) + ofs; + ctx->pos = erofs_pos(sb, i) + ofs; if (ofs >= nameoff) goto skip_this; } @@ -97,7 +96,7 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx) if (err) break; skip_this: - ctx->pos = blknr_to_addr(i) + maxsize; + ctx->pos = erofs_pos(sb, i) + maxsize; ++i; ofs = 0; } diff --git a/fs/erofs/erofs_fs.h b/fs/erofs/erofs_fs.h index dbcd24371002..2c7b16e340fe 100644 --- a/fs/erofs/erofs_fs.h +++ b/fs/erofs/erofs_fs.h @@ -27,6 +27,7 @@ #define EROFS_FEATURE_INCOMPAT_ZTAILPACKING 0x00000010 #define EROFS_FEATURE_INCOMPAT_FRAGMENTS 0x00000020 #define EROFS_FEATURE_INCOMPAT_DEDUPE 0x00000020 +#define EROFS_FEATURE_INCOMPAT_XATTR_PREFIXES 0x00000040 #define EROFS_ALL_FEATURE_INCOMPAT \ (EROFS_FEATURE_INCOMPAT_ZERO_PADDING | \ EROFS_FEATURE_INCOMPAT_COMPR_CFGS | \ @@ -36,7 +37,8 @@ EROFS_FEATURE_INCOMPAT_COMPR_HEAD2 | \ EROFS_FEATURE_INCOMPAT_ZTAILPACKING | \ EROFS_FEATURE_INCOMPAT_FRAGMENTS | \ - EROFS_FEATURE_INCOMPAT_DEDUPE) + EROFS_FEATURE_INCOMPAT_DEDUPE | \ + EROFS_FEATURE_INCOMPAT_XATTR_PREFIXES) #define EROFS_SB_EXTSLOT_SIZE 16 @@ -53,7 +55,7 @@ struct erofs_super_block { __le32 magic; /* file system magic number */ __le32 checksum; /* crc32c(super_block) */ __le32 feature_compat; - __u8 blkszbits; /* support block_size == PAGE_SIZE only */ + __u8 blkszbits; /* filesystem block size in bit shift */ __u8 sb_extslots; /* superblock size = 128 + sb_extslots * 16 */ __le16 root_nid; /* nid of root directory */ @@ -75,49 +77,46 @@ struct erofs_super_block { } __packed u1; __le16 extra_devices; /* # of devices besides the primary device */ __le16 devt_slotoff; /* startoff = devt_slotoff * devt_slotsize */ - __u8 reserved[6]; + __u8 dirblkbits; /* directory block size in bit shift */ + __u8 xattr_prefix_count; /* # of long xattr name prefixes */ + __le32 xattr_prefix_start; /* start of long xattr prefixes */ __le64 packed_nid; /* nid of the special packed inode */ __u8 reserved2[24]; }; /* - * erofs inode datalayout (i_format in on-disk inode): + * EROFS inode datalayout (i_format in on-disk inode): * 0 - uncompressed flat inode without tail-packing inline data: - * inode, [xattrs], ... | ... | no-holed data * 1 - compressed inode with non-compact indexes: - * inode, [xattrs], [map_header], extents ... | ... * 2 - uncompressed flat inode with tail-packing inline data: - * inode, [xattrs], tailpacking data, ... | ... | no-holed data * 3 - compressed inode with compact indexes: - * inode, [xattrs], map_header, extents ... | ... * 4 - chunk-based inode with (optional) multi-device support: - * inode, [xattrs], chunk indexes ... | ... * 5~7 - reserved */ enum { EROFS_INODE_FLAT_PLAIN = 0, - EROFS_INODE_FLAT_COMPRESSION_LEGACY = 1, + EROFS_INODE_COMPRESSED_FULL = 1, EROFS_INODE_FLAT_INLINE = 2, - EROFS_INODE_FLAT_COMPRESSION = 3, + EROFS_INODE_COMPRESSED_COMPACT = 3, EROFS_INODE_CHUNK_BASED = 4, EROFS_INODE_DATALAYOUT_MAX }; static inline bool erofs_inode_is_data_compressed(unsigned int datamode) { - return datamode == EROFS_INODE_FLAT_COMPRESSION || - datamode == EROFS_INODE_FLAT_COMPRESSION_LEGACY; + return datamode == EROFS_INODE_COMPRESSED_COMPACT || + datamode == EROFS_INODE_COMPRESSED_FULL; } /* bit definitions of inode i_format */ -#define EROFS_I_VERSION_BITS 1 -#define EROFS_I_DATALAYOUT_BITS 3 +#define EROFS_I_VERSION_MASK 0x01 +#define EROFS_I_DATALAYOUT_MASK 0x07 #define EROFS_I_VERSION_BIT 0 #define EROFS_I_DATALAYOUT_BIT 1 +#define EROFS_I_ALL_BIT 4 -#define EROFS_I_ALL \ - ((1 << (EROFS_I_DATALAYOUT_BIT + EROFS_I_DATALAYOUT_BITS)) - 1) +#define EROFS_I_ALL ((1 << EROFS_I_ALL_BIT) - 1) /* indicate chunk blkbits, thus 'chunksize = blocksize << chunk blkbits' */ #define EROFS_CHUNK_FORMAT_BLKBITS_MASK 0x001F @@ -127,11 +126,30 @@ static inline bool erofs_inode_is_data_compressed(unsigned int datamode) #define EROFS_CHUNK_FORMAT_ALL \ (EROFS_CHUNK_FORMAT_BLKBITS_MASK | EROFS_CHUNK_FORMAT_INDEXES) +/* 32-byte on-disk inode */ +#define EROFS_INODE_LAYOUT_COMPACT 0 +/* 64-byte on-disk inode */ +#define EROFS_INODE_LAYOUT_EXTENDED 1 + struct erofs_inode_chunk_info { __le16 format; /* chunk blkbits, etc. */ __le16 reserved; }; +union erofs_inode_i_u { + /* total compressed blocks for compressed inodes */ + __le32 compressed_blocks; + + /* block address for uncompressed flat inodes */ + __le32 raw_blkaddr; + + /* for device files, used to indicate old/new device # */ + __le32 rdev; + + /* for chunk-based files, it contains the summary info */ + struct erofs_inode_chunk_info c; +}; + /* 32-byte reduced form of an ondisk inode */ struct erofs_inode_compact { __le16 i_format; /* inode format hints */ @@ -142,29 +160,14 @@ struct erofs_inode_compact { __le16 i_nlink; __le32 i_size; __le32 i_reserved; - union { - /* total compressed blocks for compressed inodes */ - __le32 compressed_blocks; - /* block address for uncompressed flat inodes */ - __le32 raw_blkaddr; - - /* for device files, used to indicate old/new device # */ - __le32 rdev; - - /* for chunk-based files, it contains the summary info */ - struct erofs_inode_chunk_info c; - } i_u; - __le32 i_ino; /* only used for 32-bit stat compatibility */ + union erofs_inode_i_u i_u; + + __le32 i_ino; /* only used for 32-bit stat compatibility */ __le16 i_uid; __le16 i_gid; __le32 i_reserved2; }; -/* 32-byte on-disk inode */ -#define EROFS_INODE_LAYOUT_COMPACT 0 -/* 64-byte on-disk inode */ -#define EROFS_INODE_LAYOUT_EXTENDED 1 - /* 64-byte complete form of an ondisk inode */ struct erofs_inode_extended { __le16 i_format; /* inode format hints */ @@ -174,22 +177,9 @@ struct erofs_inode_extended { __le16 i_mode; __le16 i_reserved; __le64 i_size; - union { - /* total compressed blocks for compressed inodes */ - __le32 compressed_blocks; - /* block address for uncompressed flat inodes */ - __le32 raw_blkaddr; - - /* for device files, used to indicate old/new device # */ - __le32 rdev; - - /* for chunk-based files, it contains the summary info */ - struct erofs_inode_chunk_info c; - } i_u; - - /* only used for 32-bit stat compatibility */ - __le32 i_ino; + union erofs_inode_i_u i_u; + __le32 i_ino; /* only used for 32-bit stat compatibility */ __le32 i_uid; __le32 i_gid; __le64 i_mtime; @@ -198,10 +188,6 @@ struct erofs_inode_extended { __u8 i_reserved2[16]; }; -#define EROFS_MAX_SHARED_XATTRS (128) -/* h_shared_count between 129 ... 255 are special # */ -#define EROFS_SHARED_XATTR_EXTENT (255) - /* * inline xattrs (n == i_xattr_icount): * erofs_xattr_ibody_header(1) + (n - 1) * 4 bytes @@ -228,6 +214,13 @@ struct erofs_xattr_ibody_header { #define EROFS_XATTR_INDEX_LUSTRE 5 #define EROFS_XATTR_INDEX_SECURITY 6 +/* + * bit 7 of e_name_index is set when it refers to a long xattr name prefix, + * while the remained lower bits represent the index of the prefix. + */ +#define EROFS_XATTR_LONG_PREFIX 0x80 +#define EROFS_XATTR_LONG_PREFIX_MASK 0x7f + /* xattr entry (for both inline & shared xattrs) */ struct erofs_xattr_entry { __u8 e_name_len; /* length of name */ @@ -237,6 +230,12 @@ struct erofs_xattr_entry { char e_name[]; /* attribute name */ }; +/* long xattr name prefix */ +struct erofs_xattr_long_prefix { + __u8 base_index; /* short xattr name prefix index */ + char infix[]; /* infix apart from short prefix */ +}; + static inline unsigned int erofs_xattr_ibody_size(__le16 i_xattr_icount) { if (!i_xattr_icount) @@ -267,6 +266,22 @@ struct erofs_inode_chunk_index { __le32 blkaddr; /* start block address of this inode chunk */ }; +/* dirent sorts in alphabet order, thus we can do binary search */ +struct erofs_dirent { + __le64 nid; /* node number */ + __le16 nameoff; /* start offset of file name */ + __u8 file_type; /* file type */ + __u8 reserved; /* reserved */ +} __packed; + +/* + * EROFS file types should match generic FT_* types and + * it seems no need to add BUILD_BUG_ONs since potential + * unmatchness will break other fses as well... + */ + +#define EROFS_NAME_LEN 255 + /* maximum supported size of a physical compression cluster */ #define Z_EROFS_PCLUSTER_MAX_SIZE (1024 * 1024) @@ -336,10 +351,8 @@ struct z_erofs_map_header { __u8 h_clusterbits; }; -#define Z_EROFS_VLE_LEGACY_HEADER_PADDING 8 - /* - * Fixed-sized output compression on-disk logical cluster type: + * On-disk logical cluster type: * 0 - literal (uncompressed) lcluster * 1,3 - compressed lcluster (for HEAD lclusters) * 2 - compressed lcluster (for NONHEAD lclusters) @@ -363,27 +376,27 @@ struct z_erofs_map_header { * di_u.delta[1] = distance to the next HEAD lcluster */ enum { - Z_EROFS_VLE_CLUSTER_TYPE_PLAIN = 0, - Z_EROFS_VLE_CLUSTER_TYPE_HEAD1 = 1, - Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD = 2, - Z_EROFS_VLE_CLUSTER_TYPE_HEAD2 = 3, - Z_EROFS_VLE_CLUSTER_TYPE_MAX + Z_EROFS_LCLUSTER_TYPE_PLAIN = 0, + Z_EROFS_LCLUSTER_TYPE_HEAD1 = 1, + Z_EROFS_LCLUSTER_TYPE_NONHEAD = 2, + Z_EROFS_LCLUSTER_TYPE_HEAD2 = 3, + Z_EROFS_LCLUSTER_TYPE_MAX }; -#define Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS 2 -#define Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT 0 +#define Z_EROFS_LI_LCLUSTER_TYPE_BITS 2 +#define Z_EROFS_LI_LCLUSTER_TYPE_BIT 0 /* (noncompact only, HEAD) This pcluster refers to partial decompressed data */ -#define Z_EROFS_VLE_DI_PARTIAL_REF (1 << 15) +#define Z_EROFS_LI_PARTIAL_REF (1 << 15) /* * D0_CBLKCNT will be marked _only_ at the 1st non-head lcluster to store the * compressed block count of a compressed extent (in logical clusters, aka. * block count of a pcluster). */ -#define Z_EROFS_VLE_DI_D0_CBLKCNT (1 << 11) +#define Z_EROFS_LI_D0_CBLKCNT (1 << 11) -struct z_erofs_vle_decompressed_index { +struct z_erofs_lcluster_index { __le16 di_advise; /* where to decompress in the head lcluster */ __le16 di_clusterofs; @@ -400,25 +413,8 @@ struct z_erofs_vle_decompressed_index { } di_u; }; -#define Z_EROFS_VLE_LEGACY_INDEX_ALIGN(size) \ - (round_up(size, sizeof(struct z_erofs_vle_decompressed_index)) + \ - sizeof(struct z_erofs_map_header) + Z_EROFS_VLE_LEGACY_HEADER_PADDING) - -/* dirent sorts in alphabet order, thus we can do binary search */ -struct erofs_dirent { - __le64 nid; /* node number */ - __le16 nameoff; /* start offset of file name */ - __u8 file_type; /* file type */ - __u8 reserved; /* reserved */ -} __packed; - -/* - * EROFS file types should match generic FT_* types and - * it seems no need to add BUILD_BUG_ONs since potential - * unmatchness will break other fses as well... - */ - -#define EROFS_NAME_LEN 255 +#define Z_EROFS_FULL_INDEX_ALIGN(end) \ + (ALIGN(end, 8) + sizeof(struct z_erofs_map_header) + 8) /* check the EROFS on-disk layout strictly at compile time */ static inline void erofs_check_ondisk_layout_definitions(void) @@ -435,15 +431,15 @@ static inline void erofs_check_ondisk_layout_definitions(void) BUILD_BUG_ON(sizeof(struct erofs_inode_chunk_info) != 4); BUILD_BUG_ON(sizeof(struct erofs_inode_chunk_index) != 8); BUILD_BUG_ON(sizeof(struct z_erofs_map_header) != 8); - BUILD_BUG_ON(sizeof(struct z_erofs_vle_decompressed_index) != 8); + BUILD_BUG_ON(sizeof(struct z_erofs_lcluster_index) != 8); BUILD_BUG_ON(sizeof(struct erofs_dirent) != 12); /* keep in sync between 2 index structures for better extendibility */ BUILD_BUG_ON(sizeof(struct erofs_inode_chunk_index) != - sizeof(struct z_erofs_vle_decompressed_index)); + sizeof(struct z_erofs_lcluster_index)); BUILD_BUG_ON(sizeof(struct erofs_deviceslot) != 128); - BUILD_BUG_ON(BIT(Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS) < - Z_EROFS_VLE_CLUSTER_TYPE_MAX - 1); + BUILD_BUG_ON(BIT(Z_EROFS_LI_LCLUSTER_TYPE_BITS) < + Z_EROFS_LCLUSTER_TYPE_MAX - 1); /* exclude old compiler versions like gcc 7.5.0 */ BUILD_BUG_ON(__builtin_constant_p(fmh) ? fmh != cpu_to_le64(1ULL << 63) : 0); diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c index 96a87c023128..87ff35bff8d5 100644 --- a/fs/erofs/fscache.c +++ b/fs/erofs/fscache.c @@ -209,8 +209,8 @@ static int erofs_fscache_data_read_slice(struct erofs_fscache_request *primary) void *src; /* For tail packing layout, the offset may be non-zero. */ - offset = erofs_blkoff(map.m_pa); - blknr = erofs_blknr(map.m_pa); + offset = erofs_blkoff(sb, map.m_pa); + blknr = erofs_blknr(sb, map.m_pa); size = map.m_llen; src = erofs_read_metabuf(&buf, sb, blknr, EROFS_KMAP); @@ -460,6 +460,7 @@ static struct erofs_fscache *erofs_fscache_acquire_cookie(struct super_block *sb inode->i_size = OFFSET_MAX; inode->i_mapping->a_ops = &erofs_fscache_meta_aops; mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); + inode->i_blkbits = EROFS_SB(sb)->blkszbits; inode->i_private = ctx; ctx->cookie = cookie; diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c index 4be7dda3cd24..d70b12b81507 100644 --- a/fs/erofs/inode.c +++ b/fs/erofs/inode.c @@ -23,11 +23,8 @@ static void *erofs_read_inode(struct erofs_buf *buf, unsigned int ifmt; int err; - blkaddr = erofs_blknr(inode_loc); - *ofs = erofs_blkoff(inode_loc); - - erofs_dbg("%s, reading inode nid %llu at %u of blkaddr %u", - __func__, vi->nid, *ofs, blkaddr); + blkaddr = erofs_blknr(sb, inode_loc); + *ofs = erofs_blkoff(sb, inode_loc); kaddr = erofs_read_metabuf(buf, sb, blkaddr, EROFS_KMAP); if (IS_ERR(kaddr)) { @@ -58,11 +55,11 @@ static void *erofs_read_inode(struct erofs_buf *buf, case EROFS_INODE_LAYOUT_EXTENDED: vi->inode_isize = sizeof(struct erofs_inode_extended); /* check if the extended inode acrosses block boundary */ - if (*ofs + vi->inode_isize <= EROFS_BLKSIZ) { + if (*ofs + vi->inode_isize <= sb->s_blocksize) { *ofs += vi->inode_isize; die = (struct erofs_inode_extended *)dic; } else { - const unsigned int gotten = EROFS_BLKSIZ - *ofs; + const unsigned int gotten = sb->s_blocksize - *ofs; copied = kmalloc(vi->inode_isize, GFP_NOFS); if (!copied) { @@ -176,7 +173,7 @@ static void *erofs_read_inode(struct erofs_buf *buf, err = -EOPNOTSUPP; goto err_out; } - vi->chunkbits = LOG_BLOCK_SIZE + + vi->chunkbits = sb->s_blocksize_bits + (vi->chunkformat & EROFS_CHUNK_FORMAT_BLKBITS_MASK); } inode->i_mtime.tv_sec = inode->i_ctime.tv_sec; @@ -188,11 +185,12 @@ static void *erofs_read_inode(struct erofs_buf *buf, if (test_opt(&sbi->opt, DAX_ALWAYS) && S_ISREG(inode->i_mode) && vi->datalayout == EROFS_INODE_FLAT_PLAIN) inode->i_flags |= S_DAX; + if (!nblks) /* measure inode.i_blocks as generic filesystems */ - inode->i_blocks = roundup(inode->i_size, EROFS_BLKSIZ) >> 9; + inode->i_blocks = round_up(inode->i_size, sb->s_blocksize) >> 9; else - inode->i_blocks = nblks << LOG_SECTORS_PER_BLOCK; + inode->i_blocks = nblks << (sb->s_blocksize_bits - 9); return kaddr; bogusimode: @@ -210,11 +208,12 @@ static int erofs_fill_symlink(struct inode *inode, void *kaddr, unsigned int m_pofs) { struct erofs_inode *vi = EROFS_I(inode); + unsigned int bsz = i_blocksize(inode); char *lnk; /* if it cannot be handled with fast symlink scheme */ if (vi->datalayout != EROFS_INODE_FLAT_INLINE || - inode->i_size >= EROFS_BLKSIZ || inode->i_size < 0) { + inode->i_size >= bsz || inode->i_size < 0) { inode->i_op = &erofs_symlink_iops; return 0; } @@ -225,7 +224,7 @@ static int erofs_fill_symlink(struct inode *inode, void *kaddr, m_pofs += vi->xattr_isize; /* inline symlink data shouldn't cross block boundary */ - if (m_pofs + inode->i_size > EROFS_BLKSIZ) { + if (m_pofs + inode->i_size > bsz) { kfree(lnk); erofs_err(inode->i_sb, "inline data cross block boundary @ nid %llu", @@ -289,10 +288,15 @@ static int erofs_fill_inode(struct inode *inode) } if (erofs_inode_is_data_compressed(vi->datalayout)) { - if (!erofs_is_fscache_mode(inode->i_sb)) - err = z_erofs_fill_inode(inode); - else - err = -EOPNOTSUPP; +#ifdef CONFIG_EROFS_FS_ZIP + if (!erofs_is_fscache_mode(inode->i_sb) && + inode->i_sb->s_blocksize_bits == PAGE_SHIFT) { + inode->i_mapping->a_ops = &z_erofs_aops; + err = 0; + goto out_unlock; + } +#endif + err = -EOPNOTSUPP; goto out_unlock; } inode->i_mapping->a_ops = &erofs_raw_access_aops; diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h index 1db018f8c2e8..af0431a40647 100644 --- a/fs/erofs/internal.h +++ b/fs/erofs/internal.h @@ -31,10 +31,8 @@ __printf(3, 4) void _erofs_info(struct super_block *sb, #define erofs_info(sb, fmt, ...) \ _erofs_info(sb, __func__, fmt "\n", ##__VA_ARGS__) #ifdef CONFIG_EROFS_FS_DEBUG -#define erofs_dbg(x, ...) pr_debug(x "\n", ##__VA_ARGS__) #define DBG_BUGON BUG_ON #else -#define erofs_dbg(x, ...) ((void)0) #define DBG_BUGON(x) ((void)(x)) #endif /* !CONFIG_EROFS_FS_DEBUG */ @@ -81,6 +79,7 @@ struct erofs_dev_context { struct rw_semaphore rwsem; unsigned int extra_devices; + bool |
