From 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 16 Apr 2005 15:20:36 -0700 Subject: Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! --- fs/ext2/CHANGES | 157 ++++++ fs/ext2/Makefile | 12 + fs/ext2/acl.c | 518 +++++++++++++++++++ fs/ext2/acl.h | 82 +++ fs/ext2/balloc.c | 699 +++++++++++++++++++++++++ fs/ext2/bitmap.c | 25 + fs/ext2/dir.c | 673 ++++++++++++++++++++++++ fs/ext2/ext2.h | 160 ++++++ fs/ext2/file.c | 68 +++ fs/ext2/fsync.c | 51 ++ fs/ext2/ialloc.c | 735 ++++++++++++++++++++++++++ fs/ext2/inode.c | 1276 ++++++++++++++++++++++++++++++++++++++++++++++ fs/ext2/ioctl.c | 81 +++ fs/ext2/namei.c | 418 +++++++++++++++ fs/ext2/super.c | 1161 +++++++++++++++++++++++++++++++++++++++++ fs/ext2/symlink.c | 52 ++ fs/ext2/xattr.c | 1043 +++++++++++++++++++++++++++++++++++++ fs/ext2/xattr.h | 118 +++++ fs/ext2/xattr_security.c | 53 ++ fs/ext2/xattr_trusted.c | 64 +++ fs/ext2/xattr_user.c | 77 +++ 21 files changed, 7523 insertions(+) create mode 100644 fs/ext2/CHANGES create mode 100644 fs/ext2/Makefile create mode 100644 fs/ext2/acl.c create mode 100644 fs/ext2/acl.h create mode 100644 fs/ext2/balloc.c create mode 100644 fs/ext2/bitmap.c create mode 100644 fs/ext2/dir.c create mode 100644 fs/ext2/ext2.h create mode 100644 fs/ext2/file.c create mode 100644 fs/ext2/fsync.c create mode 100644 fs/ext2/ialloc.c create mode 100644 fs/ext2/inode.c create mode 100644 fs/ext2/ioctl.c create mode 100644 fs/ext2/namei.c create mode 100644 fs/ext2/super.c create mode 100644 fs/ext2/symlink.c create mode 100644 fs/ext2/xattr.c create mode 100644 fs/ext2/xattr.h create mode 100644 fs/ext2/xattr_security.c create mode 100644 fs/ext2/xattr_trusted.c create mode 100644 fs/ext2/xattr_user.c (limited to 'fs/ext2') diff --git a/fs/ext2/CHANGES b/fs/ext2/CHANGES new file mode 100644 index 000000000000..aa5aaf0e5911 --- /dev/null +++ b/fs/ext2/CHANGES @@ -0,0 +1,157 @@ +Changes from version 0.5a to version 0.5b +========================================= + - Now that we have sysctl(), the immutable flag cannot be changed when + the system is running at security level > 0. + - Some cleanups in the code. + - More consistency checks on directories. + - The ext2.diff patch from Tom May has been + integrated. This patch replaces expensive "/" and "%" with + cheap ">>" and "&" where possible. + +Changes from version 0.5 to version 0.5a +======================================== + - Zero the partial block following the end of the file when a file + is truncated. + - Dates updated in the copyright. + - More checks when the filesystem is mounted: the count of blocks, + fragments, and inodes per group is checked against the block size. + - The buffers used by the error routines are now static variables, to + avoid using space on the kernel stack, as requested by Linus. + - Some cleanups in the error messages (some versions of syslog contain + a bug which truncates an error message if it contains '\n'). + - Check that no data can be written to a file past the 2GB limit. + - The famous readdir() bug has been fixed by Stephen Tweedie. + - Added a revision level in the superblock. + - Full support for O_SYNC flag of the open system call. + - New mount options: `resuid=#uid' and `resgid=#gid'. `resuid' causes + ext2fs to consider user #uid like root for the reserved blocks. + `resgid' acts the same way with group #gid. New fields in the + superblock contain default values for resuid and resgid and can + be modified by tune2fs. + Idea comes from Rene Cougnenc . + - New mount options: `bsddf' and `minixdf'. `bsddf' causes ext2fs + to remove the blocks used for FS structures from the total block + count in statfs. With `minixdf', ext2fs mimics Minix behavior + in statfs (i.e. it returns the total number of blocks on the + partition). This is intended to make bde happy :-) + - New file attributes: + - Immutable files cannot be modified. Data cannot be written to + these files. They cannot be removed, renamed and new links cannot + be created. Even root cannot modify the files. He has to remove + the immutable attribute first. + - Append-only files: can only be written in append-mode when writing. + They cannot be removed, renamed and new links cannot be created. + Note: files may only be added to an append-only directory. + - No-dump files: the attribute is not used by the kernel. My port + of dump uses it to avoid backing up files which are not important. + - New check in ext2_check_dir_entry: the inode number is checked. + - Support for big file systems: the copy of the FS descriptor is now + dynamically allocated (previous versions used a fixed size array). + This allows to mount 2GB+ FS. + - Reorganization of the ext2_inode structure to allow other operating + systems to create specific fields if they use ext2fs as their native + file system. Currently, ext2fs is only implemented in Linux but + will soon be part of Gnu Hurd and of Masix. + +Changes from version 0.4b to version 0.5 +======================================== + - New superblock fields: s_lastcheck and s_checkinterval added + by Uwe Ohse to implement timedependent checks + of the file system + - Real random numbers for secure rm added by Pierre del Perugia + + - The mount warnings related to the state of a fs are not printed + if the fs is mounted read-only, idea by Nick Holloway + + +Changes from version 0.4a to version 0.4b +========================================= + - Copyrights changed to include the name of my laboratory. + - Clean up of balloc.c and ialloc.c. + - More consistency checks. + - Block preallocation added by Stephen Tweedie. + - Direct reads of directories disallowed. + - Readahead implemented in readdir by Stephen Tweedie. + - Bugs in block and inodes allocation fixed. + - Readahead implemented in ext2_find_entry by Chip Salzenberg. + - New mount options: + `check=none|normal|strict' + `debug' + `errors=continue|remount-ro|panic' + `grpid', `bsdgroups' + `nocheck' + `nogrpid', `sysvgroups' + - truncate() now tries to deallocate contiguous blocks in a single call + to ext2_free_blocks(). + - lots of cosmetic changes. + +Changes from version 0.4 to version 0.4a +======================================== + - the `sync' option support is now complete. Version 0.4 was not + supporting it when truncating a file. I have tested the synchronous + writes and they work but they make the system very slow :-( I have + to work again on this to make it faster. + - when detecting an error on a mounted filesystem, version 0.4 used + to try to write a flag in the super block even if the filesystem had + been mounted read-only. This is fixed. + - the `sb=#' option now causes the kernel code to use the filesystem + descriptors located at block #+1. Version 0.4 used the superblock + backup located at block # but used the main copy of the descriptors. + - a new file attribute `S' is supported. This attribute causes + synchronous writes but is applied to a file not to the entire file + system (thanks to Michael Kraehe for + suggesting it). + - the directory cache is inhibited by default. The cache management + code seems to be buggy and I have to look at it carefully before + using it again. + - deleting a file with the `s' attribute (secure deletion) causes its + blocks to be overwritten with random values not with zeros (thanks to + Michael A. Griffith for suggesting it). + - lots of cosmetic changes have been made. + +Changes from version 0.3 to version 0.4 +======================================= + - Three new mount options are supported: `check', `sync' and `sb=#'. + `check' tells the kernel code to make more consistency checks + when the file system is mounted. Currently, the kernel code checks + that the blocks and inodes bitmaps are consistent with the free + blocks and inodes counts. More checks will be added in future + releases. + `sync' tells the kernel code to use synchronous writes when updating + an inode, a bitmap, a directory entry or an indirect block. This + can make the file system much slower but can be a big win for files + recovery in case of a crash (and we can now say to the BSD folks + that Linux also supports synchronous updates :-). + `sb=#' tells the kernel code to use an alternate super block instead + of its master copy. `#' is the number of the block (counted in + 1024 bytes blocks) which contains the alternate super block. + An ext2 file system typically contains backups of the super block + at blocks 8193, 16385, and so on. + - I have change the meaning of the valid flag used by e2fsck. it + now contains the state of the file system. If the kernel code + detects an inconsistency while the file system is mounted, it flags + it as erroneous and e2fsck will detect that on next run. + - The super block now contains a mount counter. This counter is + incremented each time the file system is mounted read/write. When + this counter becomes bigger than a maximal mount counts (also stored + in the super block), e2fsck checks the file system, even if it had + been unmounted cleanly, and resets this counter to 0. + - File attributes are now supported. One can associate a set of + attributes to a file. Three attributes are defined: + `c': the file is marked for automatic compression, + `s': the file is marked for secure deletion: when the file is + deleted, its blocks are zeroed and written back to the disk, + `u': the file is marked for undeletion: when the file is deleted, + its contents are saved to allow a future undeletion. + Currently, only the `s' attribute is implemented in the kernel + code. Support for the other attributes will be added in a future + release. + - a few bugs related to times updates have been fixed by Bruce + Evans and me. + - a bug related to the links count of deleted inodes has been fixed. + Previous versions used to keep the links count set to 1 when a file + was deleted. The new version now sets links_count to 0 when deleting + the last link. + - a race condition when deallocating an inode has been fixed by + Stephen Tweedie. + diff --git a/fs/ext2/Makefile b/fs/ext2/Makefile new file mode 100644 index 000000000000..ee240a14e70f --- /dev/null +++ b/fs/ext2/Makefile @@ -0,0 +1,12 @@ +# +# Makefile for the linux ext2-filesystem routines. +# + +obj-$(CONFIG_EXT2_FS) += ext2.o + +ext2-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ + ioctl.o namei.o super.o symlink.o + +ext2-$(CONFIG_EXT2_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o +ext2-$(CONFIG_EXT2_FS_POSIX_ACL) += acl.o +ext2-$(CONFIG_EXT2_FS_SECURITY) += xattr_security.o diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c new file mode 100644 index 000000000000..8369ee8d28c4 --- /dev/null +++ b/fs/ext2/acl.c @@ -0,0 +1,518 @@ +/* + * linux/fs/ext2/acl.c + * + * Copyright (C) 2001-2003 Andreas Gruenbacher, + */ + +#include +#include +#include +#include +#include "ext2.h" +#include "xattr.h" +#include "acl.h" + +/* + * Convert from filesystem to in-memory representation. + */ +static struct posix_acl * +ext2_acl_from_disk(const void *value, size_t size) +{ + const char *end = (char *)value + size; + int n, count; + struct posix_acl *acl; + + if (!value) + return NULL; + if (size < sizeof(ext2_acl_header)) + return ERR_PTR(-EINVAL); + if (((ext2_acl_header *)value)->a_version != + cpu_to_le32(EXT2_ACL_VERSION)) + return ERR_PTR(-EINVAL); + value = (char *)value + sizeof(ext2_acl_header); + count = ext2_acl_count(size); + if (count < 0) + return ERR_PTR(-EINVAL); + if (count == 0) + return NULL; + acl = posix_acl_alloc(count, GFP_KERNEL); + if (!acl) + return ERR_PTR(-ENOMEM); + for (n=0; n < count; n++) { + ext2_acl_entry *entry = + (ext2_acl_entry *)value; + if ((char *)value + sizeof(ext2_acl_entry_short) > end) + goto fail; + acl->a_entries[n].e_tag = le16_to_cpu(entry->e_tag); + acl->a_entries[n].e_perm = le16_to_cpu(entry->e_perm); + switch(acl->a_entries[n].e_tag) { + case ACL_USER_OBJ: + case ACL_GROUP_OBJ: + case ACL_MASK: + case ACL_OTHER: + value = (char *)value + + sizeof(ext2_acl_entry_short); + acl->a_entries[n].e_id = ACL_UNDEFINED_ID; + break; + + case ACL_USER: + case ACL_GROUP: + value = (char *)value + sizeof(ext2_acl_entry); + if ((char *)value > end) + goto fail; + acl->a_entries[n].e_id = + le32_to_cpu(entry->e_id); + break; + + default: + goto fail; + } + } + if (value != end) + goto fail; + return acl; + +fail: + posix_acl_release(acl); + return ERR_PTR(-EINVAL); +} + +/* + * Convert from in-memory to filesystem representation. + */ +static void * +ext2_acl_to_disk(const struct posix_acl *acl, size_t *size) +{ + ext2_acl_header *ext_acl; + char *e; + size_t n; + + *size = ext2_acl_size(acl->a_count); + ext_acl = (ext2_acl_header *)kmalloc(sizeof(ext2_acl_header) + + acl->a_count * sizeof(ext2_acl_entry), GFP_KERNEL); + if (!ext_acl) + return ERR_PTR(-ENOMEM); + ext_acl->a_version = cpu_to_le32(EXT2_ACL_VERSION); + e = (char *)ext_acl + sizeof(ext2_acl_header); + for (n=0; n < acl->a_count; n++) { + ext2_acl_entry *entry = (ext2_acl_entry *)e; + entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag); + entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm); + switch(acl->a_entries[n].e_tag) { + case ACL_USER: + case ACL_GROUP: + entry->e_id = + cpu_to_le32(acl->a_entries[n].e_id); + e += sizeof(ext2_acl_entry); + break; + + case ACL_USER_OBJ: + case ACL_GROUP_OBJ: + case ACL_MASK: + case ACL_OTHER: + e += sizeof(ext2_acl_entry_short); + break; + + default: + goto fail; + } + } + return (char *)ext_acl; + +fail: + kfree(ext_acl); + return ERR_PTR(-EINVAL); +} + +static inline struct posix_acl * +ext2_iget_acl(struct inode *inode, struct posix_acl **i_acl) +{ + struct posix_acl *acl = EXT2_ACL_NOT_CACHED; + + spin_lock(&inode->i_lock); + if (*i_acl != EXT2_ACL_NOT_CACHED) + acl = posix_acl_dup(*i_acl); + spin_unlock(&inode->i_lock); + + return acl; +} + +static inline void +ext2_iset_acl(struct inode *inode, struct posix_acl **i_acl, + struct posix_acl *acl) +{ + spin_lock(&inode->i_lock); + if (*i_acl != EXT2_ACL_NOT_CACHED) + posix_acl_release(*i_acl); + *i_acl = posix_acl_dup(acl); + spin_unlock(&inode->i_lock); +} + +/* + * inode->i_sem: don't care + */ +static struct posix_acl * +ext2_get_acl(struct inode *inode, int type) +{ + struct ext2_inode_info *ei = EXT2_I(inode); + int name_index; + char *value = NULL; + struct posix_acl *acl; + int retval; + + if (!test_opt(inode->i_sb, POSIX_ACL)) + return NULL; + + switch(type) { + case ACL_TYPE_ACCESS: + acl = ext2_iget_acl(inode, &ei->i_acl); + if (acl != EXT2_ACL_NOT_CACHED) + return acl; + name_index = EXT2_XATTR_INDEX_POSIX_ACL_ACCESS; + break; + + case ACL_TYPE_DEFAULT: + acl = ext2_iget_acl(inode, &ei->i_default_acl); + if (acl != EXT2_ACL_NOT_CACHED) + return acl; + name_index = EXT2_XATTR_INDEX_POSIX_ACL_DEFAULT; + break; + + default: + return ERR_PTR(-EINVAL); + } + retval = ext2_xattr_get(inode, name_index, "", NULL, 0); + if (retval > 0) { + value = kmalloc(retval, GFP_KERNEL); + if (!value) + return ERR_PTR(-ENOMEM); + retval = ext2_xattr_get(inode, name_index, "", value, retval); + } + if (retval > 0) + acl = ext2_acl_from_disk(value, retval); + else if (retval == -ENODATA || retval == -ENOSYS) + acl = NULL; + else + acl = ERR_PTR(retval); + if (value) + kfree(value); + + if (!IS_ERR(acl)) { + switch(type) { + case ACL_TYPE_ACCESS: + ext2_iset_acl(inode, &ei->i_acl, acl); + break; + + case ACL_TYPE_DEFAULT: + ext2_iset_acl(inode, &ei->i_default_acl, acl); + break; + } + } + return acl; +} + +/* + * inode->i_sem: down + */ +static int +ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl) +{ + struct ext2_inode_info *ei = EXT2_I(inode); + int name_index; + void *value = NULL; + size_t size; + int error; + + if (S_ISLNK(inode->i_mode)) + return -EOPNOTSUPP; + if (!test_opt(inode->i_sb, POSIX_ACL)) + return 0; + + switch(type) { + case ACL_TYPE_ACCESS: + name_index = EXT2_XATTR_INDEX_POSIX_ACL_ACCESS; + if (acl) { + mode_t mode = inode->i_mode; + error = posix_acl_equiv_mode(acl, &mode); + if (error < 0) + return error; + else { + inode->i_mode = mode; + mark_inode_dirty(inode); + if (error == 0) + acl = NULL; + } + } + break; + + case ACL_TYPE_DEFAULT: + name_index = EXT2_XATTR_INDEX_POSIX_ACL_DEFAULT; + if (!S_ISDIR(inode->i_mode)) + return acl ? -EACCES : 0; + break; + + default: + return -EINVAL; + } + if (acl) { + value = ext2_acl_to_disk(acl, &size); + if (IS_ERR(value)) + return (int)PTR_ERR(value); + } + + error = ext2_xattr_set(inode, name_index, "", value, size, 0); + + if (value) + kfree(value); + if (!error) { + switch(type) { + case ACL_TYPE_ACCESS: + ext2_iset_acl(inode, &ei->i_acl, acl); + break; + + case ACL_TYPE_DEFAULT: + ext2_iset_acl(inode, &ei->i_default_acl, acl); + break; + } + } + return error; +} + +static int +ext2_check_acl(struct inode *inode, int mask) +{ + struct posix_acl *acl = ext2_get_acl(inode, ACL_TYPE_ACCESS); + + if (acl) { + int error = posix_acl_permission(inode, acl, mask); + posix_acl_release(acl); + return error; + } + + return -EAGAIN; +} + +int +ext2_permission(struct inode *inode, int mask, struct nameidata *nd) +{ + return generic_permission(inode, mask, ext2_check_acl); +} + +/* + * Initialize the ACLs of a new inode. Called from ext2_new_inode. + * + * dir->i_sem: down + * inode->i_sem: up (access to inode is still exclusive) + */ +int +ext2_init_acl(struct inode *inode, struct inode *dir) +{ + struct posix_acl *acl = NULL; + int error = 0; + + if (!S_ISLNK(inode->i_mode)) { + if (test_opt(dir->i_sb, POSIX_ACL)) { + acl = ext2_get_acl(dir, ACL_TYPE_DEFAULT); + if (IS_ERR(acl)) + return PTR_ERR(acl); + } + if (!acl) + inode->i_mode &= ~current->fs->umask; + } + if (test_opt(inode->i_sb, POSIX_ACL) && acl) { + struct posix_acl *clone; + mode_t mode; + + if (S_ISDIR(inode->i_mode)) { + error = ext2_set_acl(inode, ACL_TYPE_DEFAULT, acl); + if (error) + goto cleanup; + } + clone = posix_acl_clone(acl, GFP_KERNEL); + error = -ENOMEM; + if (!clone) + goto cleanup; + mode = inode->i_mode; + error = posix_acl_create_masq(clone, &mode); + if (error >= 0) { + inode->i_mode = mode; + if (error > 0) { + /* This is an extended ACL */ + error = ext2_set_acl(inode, + ACL_TYPE_ACCESS, clone); + } + } + posix_acl_release(clone); + } +cleanup: + posix_acl_release(acl); + return error; +} + +/* + * Does chmod for an inode that may have an Access Control List. The + * inode->i_mode field must be updated to the desired value by the caller + * before calling this function. + * Returns 0 on success, or a negative error number. + * + * We change the ACL rather than storing some ACL entries in the file + * mode permission bits (which would be more efficient), because that + * would break once additional permissions (like ACL_APPEND, ACL_DELETE + * for directories) are added. There are no more bits available in the + * file mode. + * + * inode->i_sem: down + */ +int +ext2_acl_chmod(struct inode *inode) +{ + struct posix_acl *acl, *clone; + int error; + + if (!test_opt(inode->i_sb, POSIX_ACL)) + return 0; + if (S_ISLNK(inode->i_mode)) + return -EOPNOTSUPP; + acl = ext2_get_acl(inode, ACL_TYPE_ACCESS); + if (IS_ERR(acl) || !acl) + return PTR_ERR(acl); + clone = posix_acl_clone(acl, GFP_KERNEL); + posix_acl_release(acl); + if (!clone) + return -ENOMEM; + error = posix_acl_chmod_masq(clone, inode->i_mode); + if (!error) + error = ext2_set_acl(inode, ACL_TYPE_ACCESS, clone); + posix_acl_release(clone); + return error; +} + +/* + * Extended attribut handlers + */ +static size_t +ext2_xattr_list_acl_access(struct inode *inode, char *list, size_t list_size, + const char *name, size_t name_len) +{ + const size_t size = sizeof(XATTR_NAME_ACL_ACCESS); + + if (!test_opt(inode->i_sb, POSIX_ACL)) + return 0; + if (list && size <= list_size) + memcpy(list, XATTR_NAME_ACL_ACCESS, size); + return size; +} + +static size_t +ext2_xattr_list_acl_default(struct inode *inode, char *list, size_t list_size, + const char *name, size_t name_len) +{ + const size_t size = sizeof(XATTR_NAME_ACL_DEFAULT); + + if (!test_opt(inode->i_sb, POSIX_ACL)) + return 0; + if (list && size <= list_size) + memcpy(list, XATTR_NAME_ACL_DEFAULT, size); + return size; +} + +static int +ext2_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size) +{ + struct posix_acl *acl; + int error; + + if (!test_opt(inode->i_sb, POSIX_ACL)) + return -EOPNOTSUPP; + + acl = ext2_get_acl(inode, type); + if (IS_ERR(acl)) + return PTR_ERR(acl); + if (acl == NULL) + return -ENODATA; + error = posix_acl_to_xattr(acl, buffer, size); + posix_acl_release(acl); + + return error; +} + +static int +ext2_xattr_get_acl_access(struct inode *inode, const char *name, + void *buffer, size_t size) +{ + if (strcmp(name, "") != 0) + return -EINVAL; + return ext2_xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size); +} + +static int +ext2_xattr_get_acl_default(struct inode *inode, const char *name, + void *buffer, size_t size) +{ + if (strcmp(name, "") != 0) + return -EINVAL; + return ext2_xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size); +} + +static int +ext2_xattr_set_acl(struct inode *inode, int type, const void *value, + size_t size) +{ + struct posix_acl *acl; + int error; + + if (!test_opt(inode->i_sb, POSIX_ACL)) + return -EOPNOTSUPP; + if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) + return -EPERM; + + if (value) { + acl = posix_acl_from_xattr(value, size); + if (IS_ERR(acl)) + return PTR_ERR(acl); + else if (acl) { + error = posix_acl_valid(acl); + if (error) + goto release_and_out; + } + } else + acl = NULL; + + error = ext2_set_acl(inode, type, acl); + +release_and_out: + posix_acl_release(acl); + return error; +} + +static int +ext2_xattr_set_acl_access(struct inode *inode, const char *name, + const void *value, size_t size, int flags) +{ + if (strcmp(name, "") != 0) + return -EINVAL; + return ext2_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size); +} + +static int +ext2_xattr_set_acl_default(struct inode *inode, const char *name, + const void *value, size_t size, int flags) +{ + if (strcmp(name, "") != 0) + return -EINVAL; + return ext2_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size); +} + +struct xattr_handler ext2_xattr_acl_access_handler = { + .prefix = XATTR_NAME_ACL_ACCESS, + .list = ext2_xattr_list_acl_access, + .get = ext2_xattr_get_acl_access, + .set = ext2_xattr_set_acl_access, +}; + +struct xattr_handler ext2_xattr_acl_default_handler = { + .prefix = XATTR_NAME_ACL_DEFAULT, + .list = ext2_xattr_list_acl_default, + .get = ext2_xattr_get_acl_default, + .set = ext2_xattr_set_acl_default, +}; diff --git a/fs/ext2/acl.h b/fs/ext2/acl.h new file mode 100644 index 000000000000..fed96ae81a7d --- /dev/null +++ b/fs/ext2/acl.h @@ -0,0 +1,82 @@ +/* + File: fs/ext2/acl.h + + (C) 2001 Andreas Gruenbacher, +*/ + +#include + +#define EXT2_ACL_VERSION 0x0001 + +typedef struct { + __le16 e_tag; + __le16 e_perm; + __le32 e_id; +} ext2_acl_entry; + +typedef struct { + __le16 e_tag; + __le16 e_perm; +} ext2_acl_entry_short; + +typedef struct { + __le32 a_version; +} ext2_acl_header; + +static inline size_t ext2_acl_size(int count) +{ + if (count <= 4) { + return sizeof(ext2_acl_header) + + count * sizeof(ext2_acl_entry_short); + } else { + return sizeof(ext2_acl_header) + + 4 * sizeof(ext2_acl_entry_short) + + (count - 4) * sizeof(ext2_acl_entry); + } +} + +static inline int ext2_acl_count(size_t size) +{ + ssize_t s; + size -= sizeof(ext2_acl_header); + s = size - 4 * sizeof(ext2_acl_entry_short); + if (s < 0) { + if (size % sizeof(ext2_acl_entry_short)) + return -1; + return size / sizeof(ext2_acl_entry_short); + } else { + if (s % sizeof(ext2_acl_entry)) + return -1; + return s / sizeof(ext2_acl_entry) + 4; + } +} + +#ifdef CONFIG_EXT2_FS_POSIX_ACL + +/* Value for inode->u.ext2_i.i_acl and inode->u.ext2_i.i_default_acl + if the ACL has not been cached */ +#define EXT2_ACL_NOT_CACHED ((void *)-1) + +/* acl.c */ +extern int ext2_permission (struct inode *, int, struct nameidata *); +extern int ext2_acl_chmod (struct inode *); +extern int ext2_init_acl (struct inode *, struct inode *); + +#else +#include +#define ext2_permission NULL +#define ext2_get_acl NULL +#define ext2_set_acl NULL + +static inline int +ext2_acl_chmod (struct inode *inode) +{ + return 0; +} + +static inline int ext2_init_acl (struct inode *inode, struct inode *dir) +{ + return 0; +} +#endif + diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c new file mode 100644 index 000000000000..6591abef64d0 --- /dev/null +++ b/fs/ext2/balloc.c @@ -0,0 +1,699 @@ +/* + * linux/fs/ext2/balloc.c + * + * Copyright (C) 1992, 1993, 1994, 1995 + * Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * + * Enhanced block allocation by Stephen Tweedie (sct@redhat.com), 1993 + * Big-endian to little-endian byte-swapping/bitmaps by + * David S. Miller (davem@caip.rutgers.edu), 1995 + */ + +#include +#include "ext2.h" +#include +#include +#include + +/* + * balloc.c contains the blocks allocation and deallocation routines + */ + +/* + * The free blocks are managed by bitmaps. A file system contains several + * blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap + * block for inodes, N blocks for the inode table and data blocks. + * + * The file system contains group descriptors which are located after the + * super block. Each descriptor contains the number of the bitmap block and + * the free blocks count in the block. The descriptors are loaded in memory + * when a file system is mounted (see ext2_read_super). + */ + + +#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) + +struct ext2_group_desc * ext2_get_group_desc(struct super_block * sb, + unsigned int block_group, + struct buffer_head ** bh) +{ + unsigned long group_desc; + unsigned long offset; + struct ext2_group_desc * desc; + struct ext2_sb_info *sbi = EXT2_SB(sb); + + if (block_group >= sbi->s_groups_count) { + ext2_error (sb, "ext2_get_group_desc", + "block_group >= groups_count - " + "block_group = %d, groups_count = %lu", + block_group, sbi->s_groups_count); + + return NULL; + } + + group_desc = block_group >> EXT2_DESC_PER_BLOCK_BITS(sb); + offset = block_group & (EXT2_DESC_PER_BLOCK(sb) - 1); + if (!sbi->s_group_desc[group_desc]) { + ext2_error (sb, "ext2_get_group_desc", + "Group descriptor not loaded - " + "block_group = %d, group_desc = %lu, desc = %lu", + block_group, group_desc, offset); + return NULL; + } + + desc = (struct ext2_group_desc *) sbi->s_group_desc[group_desc]->b_data; + if (bh) + *bh = sbi->s_group_desc[group_desc]; + return desc + offset; +} + +/* + * Read the bitmap for a given block_group, reading into the specified + * slot in the superblock's bitmap cache. + * + * Return buffer_head on success or NULL in case of failure. + */ +static struct buffer_head * +read_block_bitmap(struct super_block *sb, unsigned int block_group) +{ + struct ext2_group_desc * desc; + struct buffer_head * bh = NULL; + + desc = ext2_get_group_desc (sb, block_group, NULL); + if (!desc) + goto error_out; + bh = sb_bread(sb, le32_to_cpu(desc->bg_block_bitmap)); + if (!bh) + ext2_error (sb, "read_block_bitmap", + "Cannot read block bitmap - " + "block_group = %d, block_bitmap = %u", + block_group, le32_to_cpu(desc->bg_block_bitmap)); +error_out: + return bh; +} + +/* + * Set sb->s_dirt here because the superblock was "logically" altered. We + * need to recalculate its free blocks count and flush it out. + */ +static int reserve_blocks(struct super_block *sb, int count) +{ + struct ext2_sb_info *sbi = EXT2_SB(sb); + struct ext2_super_block *es = sbi->s_es; + unsigned free_blocks; + unsigned root_blocks; + + free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); + root_blocks = le32_to_cpu(es->s_r_blocks_count); + + if (free_blocks < count) + count = free_blocks; + + if (free_blocks < root_blocks + count && !capable(CAP_SYS_RESOURCE) && + sbi->s_resuid != current->fsuid && + (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) { + /* + * We are too close to reserve and we are not privileged. + * Can we allocate anything at all? + */ + if (free_blocks > root_blocks) + count = free_blocks - root_blocks; + else + return 0; + } + + percpu_counter_mod(&sbi->s_freeblocks_counter, -count); + sb->s_dirt = 1; + return count; +} + +static void release_blocks(struct super_block *sb, int count) +{ + if (count) { + struct ext2_sb_info *sbi = EXT2_SB(sb); + + percpu_counter_mod(&sbi->s_freeblocks_counter, count); + sb->s_dirt = 1; + } +} + +static int group_reserve_blocks(struct ext2_sb_info *sbi, int group_no, + struct ext2_group_desc *desc, struct buffer_head *bh, int count) +{ + unsigned free_blocks; + + if (!desc->bg_free_blocks_count) + return 0; + + spin_lock(sb_bgl_lock(sbi, group_no)); + free_blocks = le16_to_cpu(desc->bg_free_blocks_count); + if (free_blocks < count) + count = free_blocks; + desc->bg_free_blocks_count = cpu_to_le16(free_blocks - count); + spin_unlock(sb_bgl_lock(sbi, group_no)); + mark_buffer_dirty(bh); + return count; +} + +static void group_release_blocks(struct super_block *sb, int group_no, + struct ext2_group_desc *desc, struct buffer_head *bh, int count) +{ + if (count) { + struct ext2_sb_info *sbi = EXT2_SB(sb); + unsigned free_blocks; + + spin_lock(sb_bgl_lock(sbi, group_no)); + free_blocks = le16_to_cpu(desc->bg_free_blocks_count); + desc->bg_free_blocks_count = cpu_to_le16(free_blocks + count); + spin_unlock(sb_bgl_lock(sbi, group_no)); + sb->s_dirt = 1; + mark_buffer_dirty(bh); + } +} + +/* Free given blocks, update quota and i_blocks field */ +void ext2_free_blocks (struct inode * inode, unsigned long block, + unsigned long count) +{ + struct buffer_head *bitmap_bh = NULL; + struct buffer_head * bh2; + unsigned long block_group; + unsigned long bit; + unsigned long i; + unsigned long overflow; + struct super_block * sb = inode->i_sb; + struct ext2_sb_info * sbi = EXT2_SB(sb); + struct ext2_group_desc * desc; + struct ext2_super_block * es = sbi->s_es; + unsigned freed = 0, group_freed; + + if (block < le32_to_cpu(es->s_first_data_block) || + block + count < block || + block + count > le32_to_cpu(es->s_blocks_count)) { + ext2_error (sb, "ext2_free_blocks", + "Freeing blocks not in datazone - " + "block = %lu, count = %lu", block, count); + goto error_return; + } + + ext2_debug ("freeing block(s) %lu-%lu\n", block, block + count - 1); + +do_more: + overflow = 0; + block_group = (block - le32_to_cpu(es->s_first_data_block)) / + EXT2_BLOCKS_PER_GROUP(sb); + bit = (block - le32_to_cpu(es->s_first_data_block)) % + EXT2_BLOCKS_PER_GROUP(sb); + /* + * Check to see if we are freeing blocks across a group + * boundary. + */ + if (bit + count > EXT2_BLOCKS_PER_GROUP(sb)) { + overflow = bit + count - EXT2_BLOCKS_PER_GROUP(sb); + count -= overflow; + } + brelse(bitmap_bh); + bitmap_bh = read_block_bitmap(sb, block_group); + if (!bitmap_bh) + goto error_return; + + desc = ext2_get_group_desc (sb, block_group, &bh2); + if (!desc) + goto error_return; + + if (in_range (le32_to_cpu(desc->bg_block_bitmap), block, count) || + in_range (le32_to_cpu(desc->bg_inode_bitmap), block, count) || + in_range (block, le32_to_cpu(desc->bg_inode_table), + sbi->s_itb_per_group) || + in_range (block + count - 1, le32_to_cpu(desc->bg_inode_table), + sbi->s_itb_per_group)) + ext2_error (sb, "ext2_free_blocks", + "Freeing blocks in system zones - " + "Block = %lu, count = %lu", + block, count); + + for (i = 0, group_freed = 0; i < count; i++) { + if (!ext2_clear_bit_atomic(sb_bgl_lock(sbi, block_group), + bit + i, bitmap_bh->b_data)) { + ext2_error(sb, __FUNCTION__, + "bit already cleared for block %lu", block + i); + } else { + group_freed++; + } + } + + mark_buffer_dirty(bitmap_bh); + if (sb->s_flags & MS_SYNCHRONOUS) + sync_dirty_buffer(bitmap_bh); + + group_release_blocks(sb, block_group, desc, bh2, group_freed); + freed += group_freed; + + if (overflow) { + block += count; + count = overflow; + goto do_more; + } +error_return: + brelse(bitmap_bh); + release_blocks(sb, freed); + DQUOT_FREE_BLOCK(inode, freed); +} + +static int grab_block(spinlock_t *lock, char *map, unsigned size, int goal) +{ + int k; + char *p, *r; + + if (!ext2_test_bit(goal, map)) + goto got_it; + +repeat: + if (goal) { + /* + * The goal was occupied; search forward for a free + * block within the next XX blocks. + * + * end_goal is more or less random, but it has to be + * less than EXT2_BLOCKS_PER_GROUP. Aligning up to the + * next 64-bit boundary is simple.. + */ + k = (goal + 63) & ~63; + goal = ext2_find_next_zero_bit(map, k, goal); + if (goal < k) + goto got_it; + /* + * Search in the remainder of the current group. + */ + } + + p = map + (goal >> 3); + r = memscan(p, 0, (size - goal + 7) >> 3); + k = (r - map) << 3; + if (k < size) { + /* + * We have succeeded in finding a free byte in the block + * bitmap. Now search backwards to find the start of this + * group of free blocks - won't take more than 7 iterations. + */ + for (goal = k; goal && !ext2_test_bit (goal - 1, map); goal--) + ; + goto got_it; + } + + k = ext2_find_next_zero_bit ((u32 *)map, size, goal); + if (k < size) { + goal = k; + goto got_it; + } + return -1; +got_it: + if (ext2_set_bit_atomic(lock, goal, (void *) map)) + goto repeat; + return goal; +} + +/* + * ext2_new_block uses a goal block to assist allocation. If the goal is + * free, or there is a free block within 32 blocks of the goal, that block + * is allocated. Otherwise a forward search is made for a free block; within + * each block group the search first looks for an entire free byte in the block + * bitmap, and then for any free bit if that fails. + * This function also updates quota and i_blocks field. + */ +int ext2_new_block(struct inode *inode, unsigned long goal, + u32 *prealloc_count, u32 *prealloc_block, int *err) +{ + struct buffer_head *bitmap_bh = NULL; + struct buffer_head *gdp_bh; /* bh2 */ + struct ext2_group_desc *desc; + int group_no; /* i */ + int ret_block; /* j */ + int group_idx; /* k */ + int target_block; /* tmp */ + int block = 0; + struct super_block *sb = inode->i_sb; + struct ext2_sb_info *sbi = EXT2_SB(sb); + struct ext2_super_block *es = sbi->s_es; + unsigned group_size = EXT2_BLOCKS_PER_GROUP(sb); + unsigned prealloc_goal = es->s_prealloc_blocks; + unsigned group_alloc = 0, es_alloc, dq_alloc; + int nr_scanned_groups; + + if (!prealloc_goal--) + prealloc_goal = EXT2_DEFAULT_PREALLOC_BLOCKS - 1; + if (!prealloc_count || *prealloc_count) + prealloc_goal = 0; + + if (DQUOT_ALLOC_BLOCK(inode, 1)) { + *err = -EDQUOT; + goto out; + } + + while (prealloc_goal && DQUOT_PREALLOC_BLOCK(inode, prealloc_goal)) + prealloc_goal--; + + dq_alloc = prealloc_goal + 1; + es_alloc = reserve_blocks(sb, dq_alloc); + if (!es_alloc) { + *err = -ENOSPC; + goto out_dquot; + } + + ext2_debug ("goal=%lu.\n", goal); + + if (goal < le32_to_cpu(es->s_first_data_block) || + goal >= le32_to_cpu(es->s_blocks_count)) + goal = le32_to_cpu(es->s_first_data_block); + group_no = (goal - le32_to_cpu(es->s_first_data_block)) / group_size; + desc = ext2_get_group_desc (sb, group_no, &gdp_bh); + if (!desc) { + /* + * gdp_bh may still be uninitialised. But group_release_blocks + * will not touch it because group_alloc is zero. + */ + goto io_error; + } + + group_alloc = group_reserve_blocks(sbi, group_no, desc, + gdp_bh, es_alloc); + if (group_alloc) { + ret_block = ((goal - le32_to_cpu(es->s_first_data_block)) % + group_size); + brelse(bitmap_bh); + bitmap_bh = read_block_bitmap(sb, group_no); + if (!bitmap_bh) + goto io_error; + + ext2_debug("goal is at %d:%d.\n", group_no, ret_block); + + ret_block = grab_block(sb_bgl_lock(sbi, group_no), + bitmap_bh->b_data, group_size, ret_block); + if (ret_block >= 0) + goto got_block; + group_release_blocks(sb, group_no, desc, gdp_bh, group_alloc); + group_alloc = 0; + } + + ext2_debug ("Bit not found in block group %d.\n", group_no); + + /* + * Now search the rest of the groups. We assume that + * i and desc correctly point to the last group visited. + */ + nr_scanned_groups = 0; +retry: + for (group_idx = 0; !group_alloc && + group_idx < sbi->s_groups_count; group_idx++) { + group_no++; + if (group_no >= sbi->s_groups_count) + group_no = 0; + desc = ext2_get_group_desc(sb, group_no, &gdp_bh); + if (!desc) + goto io_error; + group_alloc = group_reserve_blocks(sbi, group_no, desc, + gdp_bh, es_alloc); + } + if (!group_alloc) { + *err = -ENOSPC; + goto out_release; + } + brelse(bitmap_bh); + bitmap_bh = read_block_bitmap(sb, group_no); + if (!bitmap_bh) + goto io_error; + + ret_block = grab_block(sb_bgl_lock(sbi, group_no), bitmap_bh->b_data, + group_size, 0); + if (ret_block < 0) { + /* + * If a free block counter is corrupted we can loop inifintely. + * Detect that here. + */ + nr_scanned_groups++; + if (nr_scanned_groups > 2 * sbi->s_groups_count) { + ext2_error(sb, "ext2_new_block", + "corrupted free blocks counters"); + goto io_error; + } + /* + * Someone else grabbed the last free block in this blockgroup + * before us. Retry the scan. + */ + group_release_blocks(sb, group_no, desc, gdp_bh, group_alloc); + group_alloc = 0; + goto retry; + } + +got_block: + ext2_debug("using block group %d(%d)\n", + group_no, desc->bg_free_blocks_count); + + target_block = ret_block + group_no * group_size + + le32_to_cpu(es->s_first_data_block); + + if (target_block == le32_to_cpu(desc->bg_block_bitmap) || + target_block == le32_to_cpu(desc->bg_inode_bitmap) || + in_range(target_block, le32_to_cpu(desc->bg_inode_table), + sbi->s_itb_per_group)) + ext2_error (sb, "ext2_new_block", + "Allocating block in system zone - " + "block = %u", target_block); + + if (target_block >= le32_to_cpu(es->s_blocks_count)) { + ext2_error (sb, "ext2_new_block", + "block(%d) >= blocks count(%d) - " + "block_group = %d, es == %p ", ret_block, + le32_to_cpu(es->s_blocks_count), group_no, es); + goto io_error; + } + block = target_block; + + /* OK, we _had_ allocated something */ + ext2_debug("found bit %d\n", ret_block); + + dq_alloc--; + es_alloc--; + group_alloc--; + + /* + * Do block preallocation now if required. + */ + write_lock(&EXT2_I(inode)->i_meta_lock); + if (group_alloc && !*prealloc_count) { + unsigned n; + + for (n = 0; n < group_alloc && ++ret_block < group_size; n++) { + if (ext2_set_bit_atomic(sb_bgl_lock(sbi, group_no), + ret_block, + (void*) bitmap_bh->b_data)) + break; + } + *prealloc_block = block + 1; + *prealloc_count = n; + es_alloc -= n; + dq_alloc -= n; + group_alloc -= n; + } + write_unlock(&EXT2_I(inode)->i_meta_lock); + + mark_buffer_dirty(bitmap_bh); + if (sb->s_flags & MS_SYNCHRONOUS) + sync_dirty_buffer(bitmap_bh); + + ext2_debug ("allocating block %d. ", block); + + *err = 0; +out_release: + group_release_blocks(sb, group_no, desc, gdp_bh, group_alloc); + release_blocks(sb, es_alloc); +out_dquot: + DQUOT_FREE_BLOCK(inode, dq_alloc); +out: + brelse(bitmap_bh); + return block; + +io_error: + *err = -EIO; + goto out_release; +} + +unsigned long ext2_count_free_blocks (struct super_block * sb) +{ + struct ext2_group_desc * desc; + unsigned long desc_count = 0; + int i; +#ifdef EXT2FS_DEBUG + unsigned long bitmap_count, x; + struct ext2_super_block *es; + + lock_super (sb); + es = EXT2_SB(sb)->s_es; + desc_count = 0; + bitmap_count = 0; + desc = NULL; + for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) { + struct buffer_head *bitmap_bh; + desc = ext2_get_group_desc (sb, i, NULL); + if (!desc) + continue; + desc_count += le16_to_cpu(desc->bg_free_blocks_count); + bitmap_bh = read_block_bitmap(sb, i); + if (!bitmap_bh) + continue; + + x = ext2_count_free(bitmap_bh, sb->s_blocksize); + printk ("group %d: stored = %d, counted = %lu\n", + i, le16_to_cpu(desc->bg_free_blocks_count), x); + bitmap_count += x; + brelse(bitmap_bh); + } + printk("ext2_count_free_blocks: stored = %lu, computed = %lu, %lu\n", + (long)le32_to_cpu(es->s_free_blocks_count), + desc_count, bitmap_count); + unlock_super (sb); + return bitmap_count; +#else + for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) { + desc = ext2_get_group_desc (sb, i, NULL); + if (!desc) + continue; + desc_count += le16_to_cpu(desc->bg_free_blocks_count); + } + return desc_count; +#endif +} + +static inline int +block_in_use(unsigned long block, struct super_block *sb, unsigned char *map) +{ + return ext2_test_bit ((block - + le32_to_cpu(EXT2_SB(sb)->s_es->s_first_data_block)) % + EXT2_BLOCKS_PER_GROUP(sb), map); +} + +static inline int test_root(int a, int b) +{ + int num = b; + + while (a > num) + num *= b; + return num == a; +} + +static int ext2_group_sparse(int group) +{ + if (group <= 1) + return 1; + return (test_root(group, 3) || test_root(group, 5) || + test_root(group, 7)); +} + +/** + * ext2_bg_has_super - number of blocks used by the superblock in group + * @sb: superblock for filesystem + * @group: group number to check + * + * Return the number of blocks used by the superblock (primary or backup) + * in this group. Currently this will be only 0 or 1. + */ +int ext2_bg_has_super(struct super_block *sb, int group) +{ + if (EXT2_HAS_RO_COMPAT_FEATURE(sb,EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER)&& + !ext2_group_sparse(group)) + return 0; + return 1; +} + +/** + * ext2_bg_num_gdb - number of blocks used by the group table in group + * @sb: superblock for filesystem + * @group: group number to check + * + * Return the number of blocks used by the group descriptor table + * (primary or backup) in this group. In the future there may be a + * different number of descriptor blocks in each group. + */ +unsigned long ext2_bg_num_gdb(struct super_block *sb, int group) +{ + if (EXT2_HAS_RO_COMPAT_FEATURE(sb,EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER)&& + !ext2_group_sparse(group)) + return 0; + return EXT2_SB(sb)->s_gdb_count; +} + +#ifdef CONFIG_EXT2_CHECK +/* Called at mount-time, super-block is locked */ +void ext2_check_blocks_bitmap (struct super_block * sb) +{ + struct buffer_head *bitmap_bh = NULL; + struct ext2_super_block * es; + unsigned long desc_count, bitmap_count, x, j; + unsigned long desc_blocks; + struct ext2_group_desc * desc; + int i; + + es = EXT2_SB(sb)->s_es; + desc_count = 0; + bitmap_count = 0; + desc = NULL; + for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) { + desc = ext2_get_group_desc (sb, i, NULL); + if (!desc) + continue; + desc_count += le16_to_cpu(desc->bg_free_blocks_count); + brelse(bitmap_bh); + bitmap_bh = read_block_bitmap(sb, i); + if (!bitmap_bh) + continue; + + if (ext2_bg_has_super(sb, i) && + !ext2_test_bit(0, bitmap_bh->b_data)) + ext2_error(sb, __FUNCTION__, + "Superblock in group %d is marked free", i); + + desc_blocks = ext2_bg_num_gdb(sb, i); + for (j = 0; j < desc_blocks; j++) + if (!ext2_test_bit(j + 1, bitmap_bh->b_data)) + ext2_error(sb, __FUNCTION__, + "Descriptor block #%ld in group " + "%d is marked free", j, i); + + if (!block_in_use(le32_to_cpu(desc->bg_block_bitmap), + sb, bitmap_bh->b_data)) + ext2_error(sb, "ext2_check_blocks_bitmap", + "Block bitmap for group %d is marked free", + i); + + if (!block_in_use(le32_to_cpu(desc->bg_inode_bitmap), + sb, bitmap_bh->b_data)) + ext2_error(sb, "ext2_check_blocks_bitmap", + "Inode bitmap for group %d is marked free", + i); + + for (j = 0; j < EXT2_SB(sb)->s_itb_per_group; j++) + if (!block_in_use(le32_to_cpu(desc->bg_inode_table) + j, + sb, bitmap_bh->b_data)) + ext2_error (sb, "ext2_check_blocks_bitmap", + "Block #%ld of the inode table in " + "group %d is marked free", j, i); + + x = ext2_count_free(bitmap_bh, sb->s_blocksize); + if (le16_to_cpu(desc->bg_free_blocks_count) != x) + ext2_error (sb, "ext2_check_blocks_bitmap", + "Wrong free blocks count for group %d, " + "stored = %d, counted = %lu", i, + le16_to_cpu(desc->bg_free_blocks_count), x); + bitmap_count += x; + } + if (le32_to_cpu(es->s_free_blocks_count) != bitmap_count) + ext2_error (sb, "ext2_check_blocks_bitmap", + "Wrong free blocks count in super block, " + "stored = %lu, counted = %lu", + (unsigned long)le32_to_cpu(es->s_free_blocks_count), + bitmap_count); + brelse(bitmap_bh); +} +#endif diff --git a/fs/ext2/bitmap.c b/fs/ext2/bitmap.c new file mode 100644 index 000000000000..20145b74623f --- /dev/null +++ b/fs/ext2/bitmap.c @@ -0,0 +1,25 @@ +/* + * linux/fs/ext2/bitmap.c + * + * Copyright (C) 1992, 1993, 1994, 1995 + * Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + */ + +#include + +static int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0}; + +unsigned long ext2_count_free (struct buffer_head * map, unsigned int numchars) +{ + unsigned int i; + unsigned long sum = 0; + + if (!map) + return (0); + for (i = 0; i < numchars; i++) + sum += nibblemap[map->b_data[i] & 0xf] + + nibblemap[(map->b_data[i] >> 4) & 0xf]; + return (sum); +} diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c new file mode 100644 index 000000000000..5b5f52876b42 --- /dev/null +++ b/fs/ext2/dir.c @@ -0,0 +1,673 @@ +/* + * linux/fs/ext2/dir.c + * + * Copyright (C) 1992, 1993, 1994, 1995 + * Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * + * from + * + * linux/fs/minix/dir.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * ext2 directory handling functions + * + * Big-endian to little-endian byte-swapping/bitmaps by + * David S. Miller (davem@caip.rutgers.edu), 1995 + * + * All code that works with directory layout had been switched to pagecache + * and moved here. AV + */ + +#include "ext2.h" +#include +#include + +typedef struct ext2_dir_entry_2 ext2_dirent; + +/* + * ext2 uses block-sized chunks. Arguably, sector-sized ones would be + * more robust, but we have what we have + */ +static inline unsigned ext2_chunk_size(struct inode *inode) +{ + return inode->i_sb->s_blocksize; +} + +static inline void ext2_put_page(struct page *page) +{ + kunmap(page); + page_cache_release(page); +} + +static inline unsigned long dir_pages(struct inode *inode) +{ + return (inode->i_size+PAGE_CACHE_SIZE-1)>>PAGE_CACHE_SHIFT; +} + +/* + * Return the offset into page `page_nr' of the last valid + * byte in that page, plus one. + */ +static unsigned +ext2_last_byte(struct inode *inode, unsigned long page_nr) +{ + unsigned last_byte = inode->i_size; + + last_byte -= page_nr << PAGE_CACHE_SHIFT; + if (last_byte > PAGE_CACHE_SIZE) + last_byte = PAGE_CACHE_SIZE; + return last_byte; +} + +static int ext2_commit_chunk(struct page *page, unsigned from, unsigned to) +{ + struct inode *dir = page->mapping->host; + int err = 0; + dir->i_version++; + page->mapping->a_ops->commit_write(NULL, page, from, to); + if (IS_DIRSYNC(dir)) + err = write_one_page(page, 1); + else + unlock_page(page); + return err; +} + +static void ext2_check_page(struct page *page) +{ + struct inode *dir = page->mapping->host; + struct super_block *sb = dir->i_sb; + unsigned chunk_size = ext2_chunk_size(dir); + char *kaddr = page_address(page); + u32 max_inumber = le32_to_cpu(EXT2_SB(sb)->s_es->s_inodes_count); + unsigned offs, rec_len; + unsigned limit = PAGE_CACHE_SIZE; + ext2_dirent *p; + char *error; + + if ((dir->i_size >> PAGE_CACHE_SHIFT) == page->index) { + limit = dir->i_size & ~PAGE_CACHE_MASK; + if (limit & (chunk_size - 1)) + goto Ebadsize; + if (!limit) + goto out; + } + for (offs = 0; offs <= limit - EXT2_DIR_REC_LEN(1); offs += rec_len) { + p = (ext2_dirent *)(kaddr + offs); + rec_len = le16_to_cpu(p->rec_len); + + if (rec_len < EXT2_DIR_REC_LEN(1)) + goto Eshort; + if (rec_len & 3) + goto Ealign; + if (rec_len < EXT2_DIR_REC_LEN(p->name_len)) + goto Enamelen; + if (((offs + rec_len - 1) ^ offs) & ~(chunk_size-1)) + goto Espan; + if (le32_to_cpu(p->inode) > max_inumber) + goto Einumber; + } + if (offs != limit) + goto Eend; +out: + SetPageChecked(page); + return; + + /* Too bad, we had an error */ + +Ebadsize: + ext2_error(sb, "ext2_check_page", + "size of directory #%lu is not a multiple of chunk size", + dir->i_ino + ); + goto fail; +Eshort: + error = "rec_len is smaller than minimal"; + goto bad_entry; +Ealign: + error = "unaligned directory entry"; + goto bad_entry; +Enamelen: + error = "rec_len is too small for name_len"; + goto bad_entry; +Espan: + error = "directory entry across blocks"; + goto bad_entry; +Einumber: + error = "inode out of bounds"; +bad_entry: + ext2_error (sb, "ext2_check_page", "bad entry in directory #%lu: %s - " + "offset=%lu, inode=%lu, rec_len=%d, name_len=%d", + dir->i_ino, error, (page->index<inode), + rec_len, p->name_len); + goto fail; +Eend: + p = (ext2_dirent *)(kaddr + offs); + ext2_error (sb, "ext2_check_page", + "entry in directory #%lu spans the page boundary" + "offset=%lu, inode=%lu", + dir->i_ino, (page->index<inode)); +fail: + SetPageChecked(page); + SetPageError(page); +} + +static struct page * ext2_get_page(struct inode *dir, unsigned long n) +{ + struct address_space *mapping = dir->i_mapping; + struct page *page = read_cache_page(mapping, n, + (filler_t*)mapping->a_ops->readpage, NULL); + if (!IS_ERR(page)) { + wait_on_page_locked(page); + kmap(page); + if (!PageUptodate(page)) + goto fail; + if (!PageChecked(page)) + ext2_check_page(page); + if (PageError(page)) + goto fail; + } + return page; + +fail: + ext2_put_page(page); + return ERR_PTR(-EIO); +} + +/* + * NOTE! unlike strncmp, ext2_match returns 1 for success, 0 for failure. + * + * len <= EXT2_NAME_LEN and de != NULL are guaranteed by caller. + */ +static inline int ext2_match (int len, const char * const name, + struct ext2_dir_entry_2 * de) +{ + if (len != de->name_len) + return 0; + if (!de->inode) + return 0; + return !memcmp(name, de->name, len); +} + +/* + * p is at least 6 bytes before the end of page + */ +static inline ext2_dirent *ext2_next_entry(ext2_dirent *p) +{ + return (ext2_dirent *)((char*)p + le16_to_cpu(p->rec_len)); +} + +static inline unsigned +ext2_validate_entry(char *base, unsigned offset, unsigned mask) +{ + ext2_dirent *de = (ext2_dirent*)(base + offset); + ext2_dirent *p = (ext2_dirent*)(base + (offset&mask)); + while ((char*)p < (char*)de) { + if (p->rec_len == 0) + break; + p = ext2_next_entry(p); + } + return (char *)p - base; +} + +static unsigned char ext2_filetype_table[EXT2_FT_MAX] = { + [EXT2_FT_UNKNOWN] = DT_UNKNOWN, + [EXT2_FT_REG_FILE] = DT_REG, + [EXT2_FT_DIR] = DT_DIR, + [EXT2_FT_CHRDEV] = DT_CHR, + [EXT2_FT_BLKDEV] = DT_BLK, + [EXT2_FT_FIFO] = DT_FIFO, + [EXT2_FT_SOCK] = DT_SOCK, + [EXT2_FT_SYMLINK] = DT_LNK, +}; + +#define S_SHIFT 12 +static unsigned char ext2_type_by_mode[S_IFMT >> S_SHIFT] = { + [S_IFREG >> S_SHIFT] = EXT2_FT_REG_FILE, + [S_IFDIR >> S_SHIFT] = EXT2_FT_DIR, + [S_IFCHR >> S_SHIFT] = EXT2_FT_CHRDEV, + [S_IFBLK >> S_SHIFT] = EXT2_FT_BLKDEV, + [S_IFIFO >> S_SHIFT] = EXT2_FT_FIFO, + [S_IFSOCK >> S_SHIFT] = EXT2_FT_SOCK, + [S_IFLNK >> S_SHIFT] = EXT2_FT_SYMLINK, +}; + +static inline void ext2_set_de_type(ext2_dirent *de, struct inode *inode) +{ + mode_t mode = inode->i_mode; + if (EXT2_HAS_INCOMPAT_FEATURE(inode->i_sb, EXT2_FEATURE_INCOMPAT_FILETYPE)) + de->file_type = ext2_type_by_mode[(mode & S_IFMT)>>S_SHIFT]; + else + de->file_type = 0; +} + +static int +ext2_readdir (struct file * filp, void * dirent, filldir_t filldir) +{ + loff_t pos = filp->f_pos; + struct inode *inode = filp->f_dentry->d_inode; + struct super_block *sb = inode->i_sb; + unsigned int offset = pos & ~PAGE_CACHE_MASK; + unsigned long n = pos >> PAGE_CACHE_SHIFT; + unsigned long npages = dir_pages(inode); + unsigned chunk_mask = ~(ext2_chunk_size(inode)-1); + unsigned char *types = NULL; + int need_revalidate = (filp->f_version != inode->i_version); + int ret; + + if (pos > inode->i_size - EXT2_DIR_REC_LEN(1)) + goto success; + + if (EXT2_HAS_INCOMPAT_FEATURE(sb, EXT2_FEATURE_INCOMPAT_FILETYPE)) + types = ext2_filetype_table; + + for ( ; n < npages; n++, offset = 0) { + char *kaddr, *limit; + ext2_dirent *de; + struct page *page = ext2_get_page(inode, n); + + if (IS_ERR(page)) { + ext2_error(sb, __FUNCTION__, + "bad page in #%lu", + inode->i_ino); + filp->f_pos += PAGE_CACHE_SIZE - offset; + ret = -EIO; + goto done; + } + kaddr = page_address(page); + if (need_revalidate) { + offset = ext2_validate_entry(kaddr, offset, chunk_mask); + need_revalidate = 0; + } + de = (ext2_dirent *)(kaddr+offset); + limit = kaddr + ext2_last_byte(inode, n) - EXT2_DIR_REC_LEN(1); + for ( ;(char*)de <= limit; de = ext2_next_entry(de)) { + if (de->rec_len == 0) { + ext2_error(sb, __FUNCTION__, + "zero-length directory entry"); + ret = -EIO; + ext2_put_page(page); + goto done; + } + if (de->inode) { + int over; + unsigned char d_type = DT_UNKNOWN; + + if (types && de->file_type < EXT2_FT_MAX) + d_type = types[de->file_type]; + + offset = (char *)de - kaddr; + over = filldir(dirent, de->name, de->name_len, + (n<inode), d_type); + if (over) { + ext2_put_page(page); + goto success; + } + } + filp->f_pos += le16_to_cpu(de->rec_len); + } + ext2_put_page(page); + } + +success: + ret = 0; +done: + filp->f_version = inode->i_version; + return ret; +} + +/* + * ext2_find_entry() + * + * finds an entry in the specified directory with the wanted name. It + * returns the page in which the entry was found, and the entry itself + * (as a parameter - res_dir). Page is returned mapped and unlocked. + * Entry is guaranteed to be valid. + */ +struct ext2_dir_entry_2 * ext2_find_entry (struct inode * dir, + struct dentry *dentry, struct page ** res_page) +{ + const char *name = dentry->d_name.name; + int namelen = dentry->d_name.len; + unsigned reclen = EXT2_DIR_REC_LEN(namelen); + unsigned long start, n; + unsigned long npages = dir_pages(dir); + struct page *page = NULL; + struct ext2_inode_info *ei = EXT2_I(dir); + ext2_dirent * de; + + if (npages == 0) + goto out; + + /* OFFSET_CACHE */ + *res_page = NULL; + + start = ei->i_dir_start_lookup; + if (start >= npages) + start = 0; + n = start; + do { + char *kaddr; + page = ext2_get_page(dir, n); + if (!IS_ERR(page)) { + kaddr = page_address(page); + de = (ext2_dirent *) kaddr; + kaddr += ext2_last_byte(dir, n) - reclen; + while ((char *) de <= kaddr) { + if (de->rec_len == 0) { + ext2_error(dir->i_sb, __FUNCTION__, + "zero-length directory entry"); + ext2_put_page(page); + goto out; + } + if (ext2_match (namelen, name, de)) + goto found; + de = ext2_next_entry(de); + } + ext2_put_page(page); + } + if (++n >= npages) + n = 0; + } while (n != start); +out: + return NULL; + +found: + *res_page = page; + ei->i_dir_start_lookup = n; + return de; +} + +struct ext2_dir_entry_2 * ext2_dotdot (struct inode *dir, struct page **p) +{ + struct page *page = ext2_get_page(dir, 0); + ext2_dirent *de = NULL; + + if (!IS_ERR(page)) { + de = ext2_next_entry((ext2_dirent *) page_address(page)); + *p = page; + } + return de; +} + +ino_t ext2_inode_by_name(struct inode * dir, struct dentry *dentry) +{ + ino_t res = 0; + struct ext2_dir_entry_2 * de; + struct page *page; + + de = ext2_find_entry (dir, dentry, &page); + if (de) { + res = le32_to_cpu(de->inode); + kunmap(page); + page_cache_release(page); + } + return res; +} + +/* Releases the page */ +void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de, + struct page *page, struct inode *inode) +{ + unsigned from = (char *) de - (char *) page_address(page); + unsigned to = from + le16_to_cpu(de->rec_len); + int err; + + lock_page(page); + err = page->mapping->a_ops->prepare_write(NULL, page, from, to); + if (err) + BUG(); + de->inode = cpu_to_le32(inode->i_ino); + ext2_set_de_type (de, inode); + err = ext2_commit_chunk(page, from, to); + ext2_put_page(page); + dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; + EXT2_I(dir)->i_flags &= ~EXT2_BTREE_FL; + mark_inode_dirty(dir); +} + +/* + * Parent is locked. + */ +int ext2_add_link (struct dentry *dentry, struct inode *inode) +{ + struct inode *dir = dentry->d_parent->d_inode; + const char *name = dentry->d_name.name; + int namelen = dentry->d_name.len; + unsigned chunk_size = ext2_chunk_size(dir); + unsigned reclen = EXT2_DIR_REC_LEN(namelen); + unsigned short rec_len, name_len; + struct page *page = NULL; + ext2_dirent * de; + unsigned long npages = dir_pages(dir); + unsigned long n; + char *kaddr; + unsigned from, to; + int err; + + /* + * We take care of directory expansion in the same loop. + * This code plays outside i_size, so it locks the page + * to protect that region. + */ + for (n = 0; n <= npages; n++) { + char *dir_end; + + page = ext2_get_page(dir, n); + err = PTR_ERR(page); + if (IS_ERR(page)) + goto out; + lock_page(page); + kaddr = page_address(page); + dir_end = kaddr + ext2_last_byte(dir, n); + de = (ext2_dirent *)kaddr; + kaddr += PAGE_CACHE_SIZE - reclen; + while ((char *)de <= kaddr) { + if ((char *)de == dir_end) { + /* We hit i_size */ + name_len = 0; + rec_len = chunk_size; + de->rec_len = cpu_to_le16(chunk_size); + de->inode = 0; + goto got_it; + } + if (de->rec_len == 0) { + ext2_error(dir->i_sb, __FUNCTION__, + "zero-length directory entry"); + err = -EIO; + goto out_unlock; + } + err = -EEXIST; + if (ext2_match (namelen, name, de)) + goto out_unlock; + name_len = EXT2_DIR_REC_LEN(de->name_len); + rec_len = le16_to_cpu(de->rec_len); + if (!de->inode && rec_len >= reclen) + goto got_it; + if (rec_len >= name_len + reclen) + goto got_it; + de = (ext2_dirent *) ((char *) de + rec_len); + } + unlock_page(page); + ext2_put_page(page); + } + BUG(); + return -EINVAL; + +got_it: + from = (char*)de - (char*)page_address(page); + to = from + rec_len; + err = page->mapping->a_ops->prepare_write(NULL, page, from, to); + if (err) + goto out_unlock; + if (de->inode) { + ext2_dirent *de1 = (ext2_dirent *) ((char *) de + name_len); + de1->rec_len = cpu_to_le16(rec_len - name_len); + de->rec_len = cpu_to_le16(name_len); + de = de1; + } + de->name_len = namelen; + memcpy (de->name, name, namelen); + de->inode = cpu_to_le32(inode->i_ino); + ext2_set_de_type (de, inode); + err = ext2_commit_chunk(page, from, to); + dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; + EXT2_I(dir)->i_flags &= ~EXT2_BTREE_FL; + mark_inode_dirty(dir); + /* OFFSET_CACHE */ +out_put: + ext2_put_page(page); +out: + return err; +out_unlock: + unlock_page(page); + goto out_put; +} + +/* + * ext2_delete_entry deletes a directory entry by merging it with the + * previous entry. Page is up-to-date. Releases the page. + */ +int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page ) +{ + struct address_space *mapping = page->mapping; + struct inode *inode = mapping->host; + char *kaddr = page_address(page); + unsigned from = ((char*)dir - kaddr) & ~(ext2_chunk_size(inode)-1); + unsigned to = ((char*)dir - kaddr) + le16_to_cpu(dir->rec_len); + ext2_dirent * pde = NULL; + ext2_dirent * de = (ext2_dirent *) (kaddr + from); + int err; + + while ((char*)de < (char*)dir) { + if (de->rec_len == 0) { + ext2_error(inode->i_sb, __FUNCTION__, + "zero-length directory entry"); + err = -EIO; + goto out; + } + pde = de; + de = ext2_next_entry(de); + } + if (pde) + from = (char*)pde - (char*)page_address(page); + lock_page(page); + err = mapping->a_ops->prepare_write(NULL, page, from, to); + if (err) + BUG(); + if (pde) + pde->rec_len = cpu_to_le16(to-from); + dir->inode = 0; + err = ext2_commit_chunk(page, from, to); + inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC; + EXT2_I(inode)->i_flags &= ~EXT2_BTREE_FL; + mark_inode_dirty(inode); +out: + ext2_put_page(page); + return err; +} + +/* + * Set the first fragment of directory. + */ +int ext2_make_empty(struct inode *inode, struct inode *parent) +{ + struct address_space *mapping = inode->i_mapping; + struct page *page = grab_cache_page(mapping, 0); + unsigned chunk_size = ext2_chunk_size(inode); + struct ext2_dir_entry_2 * de; + int err; + void *kaddr; + + if (!page) + return -ENOMEM; + err = mapping->a_ops->prepare_write(NULL, page, 0, chunk_size); + if (err) { + unlock_page(page); + goto fail; + } + kaddr = kmap_atomic(page, KM_USER0); + memset(kaddr, 0, chunk_size); + de = (struct ext2_dir_entry_2 *)kaddr; + de->name_len = 1; + de->rec_len = cpu_to_le16(EXT2_DIR_REC_LEN(1)); + memcpy (de->name, ".\0\0", 4); + de->inode = cpu_to_le32(inode->i_ino); + ext2_set_de_type (de, inode); + + de = (struct ext2_dir_entry_2 *)(kaddr + EXT2_DIR_REC_LEN(1)); + de->name_len = 2; + de->rec_len = cpu_to_le16(chunk_size - EXT2_DIR_REC_LEN(1)); + de->inode = cpu_to_le32(parent->i_ino); + memcpy (de->name, "..\0", 4); + ext2_set_de_type (de, inode); + kunmap_atomic(kaddr, KM_USER0); + err = ext2_commit_chunk(page, 0, chunk_size); +fail: + page_cache_release(page); + return err; +} + +/* + * routine to check that the specified directory is empty (for rmdir) + */ +int ext2_empty_dir (struct inode * inode) +{ + struct page *page = NULL; + unsigned long i, npages = dir_pages(inode); + + for (i = 0; i < npages; i++) { + char *kaddr; + ext2_dirent * de; + page = ext2_get_page(inode, i); + + if (IS_ERR(page)) + continue; + + kaddr = page_address(page); + de = (ext2_dirent *)kaddr; + kaddr += ext2_last_byte(inode, i) - EXT2_DIR_REC_LEN(1); + + while ((char *)de <= kaddr) { + if (de->rec_len == 0) { + ext2_error(inode->i_sb, __FUNCTION__, + "zero-length directory entry"); + printk("kaddr=%p, de=%p\n", kaddr, de); + goto not_empty; + } + if (de->inode != 0) { + /* check for . and .. */ + if (de->name[0] != '.') + goto not_empty; + if (de->name_len > 2) + goto not_empty; + if (de->name_len < 2) { + if (de->inode != + cpu_to_le32(inode->i_ino)) + goto not_empty; + } else if (de->name[1] != '.') + goto not_empty; + } + de = ext2_next_entry(de); + } + ext2_put_page(page); + } + return 1; + +not_empty: + ext2_put_page(page); + return 0; +} + +struct file_operations ext2_dir_operations = { + .llseek = generic_file_llseek, + .read = generic_read_dir, + .readdir = ext2_readdir, + .ioctl = ext2_ioctl, + .fsync = ext2_sync_file, +}; diff --git a/fs/ext2/ext2.h b/fs/