From 708b4818f2d0b9a1e277de85a00a0355745a5cd0 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Sat, 30 Sep 2006 16:02:25 +0000 Subject: bmap support --- ChangeLog | 7 +++++ README | 5 ++++ doc/kernel.txt | 21 +++++++++++++ include/fuse.h | 10 +++++++ include/fuse_lowlevel.h | 36 ++++++++++++++++++++++ kernel/file.c | 37 +++++++++++++++++++++++ kernel/fuse_i.h | 3 ++ kernel/fuse_kernel.h | 11 +++++++ kernel/inode.c | 80 +++++++++++++++++++++++++++++++++++++++---------- lib/fuse.c | 24 +++++++++++++++ lib/fuse_lowlevel.c | 21 +++++++++++++ lib/mount.c | 2 ++ util/fusermount.c | 20 +++++++++---- 13 files changed, 257 insertions(+), 20 deletions(-) diff --git a/ChangeLog b/ChangeLog index a14f301..942c2d9 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,12 @@ 2006-09-30 Miklos Szeredi + * Add support for block device backed filesystems. This mode is + selected with the 'blkdev' option, which is privileged. + + * Add support for the bmap (FIBMAP ioctl) operation on block + device backed filesystems. This allows swapon and lilo to work on + such filesystems. + * kernel changes: * Drop support for kernels earlier than 2.6.9. Kernel module from diff --git a/README b/README index eeb6d96..b97d56b 100644 --- a/README +++ b/README @@ -259,3 +259,8 @@ uid=N gid=N Override the 'st_gid' field set by the filesystem. + +blkdev + + Mount a filesystem backed by a block device. This is a privileged + option. The device must be specified with the 'fsname=NAME' option. diff --git a/doc/kernel.txt b/doc/kernel.txt index a584f05..e94e98b 100644 --- a/doc/kernel.txt +++ b/doc/kernel.txt @@ -51,6 +51,22 @@ homepage: http://fuse.sourceforge.net/ +Filesystem type +~~~~~~~~~~~~~~~ + +The filesystem type given to mount(2) can be one of the following: + +'fuse' + + This is the usual way to mount a FUSE filesystem. The first + argument of the mount system call may contain an arbitrary string, + which is not interpreted by the kernel. + +'fuseblk' + + The filesystem is block device based. The first argument of the + mount system call is interpreted as the name of the device. + Mount options ~~~~~~~~~~~~~ @@ -94,6 +110,11 @@ Mount options The default is infinite. Note that the size of read requests is limited anyway to 32 pages (which is 128kbyte on i386). +'blksize=N' + + Set the block size for the filesystem. The default is 512. This + option is only valid for 'fuseblk' type mounts. + Control filesystem ~~~~~~~~~~~~~~~~~~ diff --git a/include/fuse.h b/include/fuse.h index 8237422..797f225 100644 --- a/include/fuse.h +++ b/include/fuse.h @@ -408,6 +408,16 @@ struct fuse_operations { * Introduced in version 2.6 */ int (*utimens) (const char *, const struct timespec tv[2]); + + /** + * Map block index within file to block index within device + * + * Note: This makes sense only for block device backed filesystems + * mounted with the 'blkdev' option + * + * Introduced in version 2.6 + */ + int (*bmap) (const char *, size_t blocksize, uint64_t *idx); }; /** Extra context that may be needed by some filesystems diff --git a/include/fuse_lowlevel.h b/include/fuse_lowlevel.h index 00b22bc..83e9739 100644 --- a/include/fuse_lowlevel.h +++ b/include/fuse_lowlevel.h @@ -733,6 +733,8 @@ struct fuse_lowlevel_ops { /** * Test for a POSIX file lock * + * Introduced in version 2.6 + * * Valid replies: * fuse_reply_lock * fuse_reply_err @@ -759,6 +761,8 @@ struct fuse_lowlevel_ops { * will still allow file locking to work locally. Hence these are * only interesting for network filesystems and similar. * + * Introduced in version 2.6 + * * Valid replies: * fuse_reply_err * @@ -771,6 +775,26 @@ struct fuse_lowlevel_ops { */ void (*setlk) (fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, struct flock *lock, uint64_t owner, int sleep); + + /** + * Map block index within file to block index within device + * + * Note: This makes sense only for block device backed filesystems + * mounted with the 'blkdev' option + * + * Introduced in version 2.6 + * + * Valid replies: + * fuse_reply_bmap + * fuse_reply_err + * + * @param req request handle + * @param ino the inode number + * @param blocksize unit of block index + * @param idx block index within file + */ + void (*bmap) (fuse_req_t req, fuse_ino_t ino, size_t blocksize, + uint64_t idx); }; /** @@ -929,6 +953,18 @@ int fuse_reply_xattr(fuse_req_t req, size_t count); */ int fuse_reply_lock(fuse_req_t req, struct flock *lock); +/** + * Reply with block index + * + * Possible requests: + * bmap + * + * @param req request handle + * @param idx block index within device + * @return zero for success, -errno for failure to send reply + */ +int fuse_reply_bmap(fuse_req_t req, uint64_t idx); + /* ----------------------------------------------------------- * * Filling a buffer in readdir * * ----------------------------------------------------------- */ diff --git a/kernel/file.c b/kernel/file.c index 9e50109..fb381da 100644 --- a/kernel/file.c +++ b/kernel/file.c @@ -805,6 +805,42 @@ static int fuse_file_lock(struct file *file, int cmd, struct file_lock *fl) return err; } +static sector_t fuse_bmap(struct address_space *mapping, sector_t block) +{ + struct inode *inode = mapping->host; + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_req *req; + struct fuse_bmap_in inarg; + struct fuse_bmap_out outarg; + int err; + + if (!inode->i_sb->s_bdev || fc->no_bmap) + return 0; + + req = fuse_get_req(fc); + if (IS_ERR(req)) + return 0; + + memset(&inarg, 0, sizeof(inarg)); + inarg.block = block; + inarg.blocksize = inode->i_sb->s_blocksize; + req->in.h.opcode = FUSE_BMAP; + req->in.h.nodeid = get_node_id(inode); + req->in.numargs = 1; + req->in.args[0].size = sizeof(inarg); + req->in.args[0].value = &inarg; + req->out.numargs = 1; + req->out.args[0].size = sizeof(outarg); + req->out.args[0].value = &outarg; + request_send(fc, req); + err = req->out.h.error; + fuse_put_request(fc, req); + if (err == -ENOSYS) + fc->no_bmap = 1; + + return err ? 0 : outarg.block; +} + static struct file_operations fuse_file_operations = { .llseek = generic_file_llseek, .read = generic_file_read, @@ -836,6 +872,7 @@ static struct address_space_operations fuse_file_aops = { .commit_write = fuse_commit_write, .readpages = fuse_readpages, .set_page_dirty = fuse_set_page_dirty, + .bmap = fuse_bmap, }; void fuse_init_file_inode(struct inode *inode) diff --git a/kernel/fuse_i.h b/kernel/fuse_i.h index e44dd9a..0841e44 100644 --- a/kernel/fuse_i.h +++ b/kernel/fuse_i.h @@ -398,6 +398,9 @@ struct fuse_conn { /** Is interrupt not implemented by fs? */ unsigned no_interrupt : 1; + /** Is bmap not implemented by fs? */ + unsigned no_bmap : 1; + /** The number of requests waiting for completion */ atomic_t num_waiting; diff --git a/kernel/fuse_kernel.h b/kernel/fuse_kernel.h index 179c7eb..936ea95 100644 --- a/kernel/fuse_kernel.h +++ b/kernel/fuse_kernel.h @@ -162,6 +162,7 @@ enum fuse_opcode { FUSE_ACCESS = 34, FUSE_CREATE = 35, FUSE_INTERRUPT = 36, + FUSE_BMAP = 37, }; /* The read buffer is required to be at least 8k, but may be much larger */ @@ -331,6 +332,16 @@ struct fuse_interrupt_in { __u64 unique; }; +struct fuse_bmap_in { + __u64 block; + __u32 blocksize; + __u32 padding; +}; + +struct fuse_bmap_out { + __u64 block; +}; + struct fuse_in_header { __u32 len; __u32 opcode; diff --git a/kernel/inode.c b/kernel/inode.c index 2d4749e..7800892 100644 --- a/kernel/inode.c +++ b/kernel/inode.c @@ -44,6 +44,7 @@ struct fuse_mount_data { unsigned group_id_present : 1; unsigned flags; unsigned max_read; + unsigned blksize; }; static struct inode *fuse_alloc_inode(struct super_block *sb) @@ -302,6 +303,7 @@ enum { OPT_DEFAULT_PERMISSIONS, OPT_ALLOW_OTHER, OPT_MAX_READ, + OPT_BLKSIZE, OPT_ERR }; @@ -313,14 +315,16 @@ static match_table_t tokens = { {OPT_DEFAULT_PERMISSIONS, "default_permissions"}, {OPT_ALLOW_OTHER, "allow_other"}, {OPT_MAX_READ, "max_read=%u"}, + {OPT_BLKSIZE, "blksize=%u"}, {OPT_ERR, NULL} }; -static int parse_fuse_opt(char *opt, struct fuse_mount_data *d) +static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev) { char *p; memset(d, 0, sizeof(struct fuse_mount_data)); d->max_read = ~0; + d->blksize = 512; while ((p = strsep(&opt, ",")) != NULL) { int token; @@ -373,6 +377,12 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d) d->max_read = value; break; + case OPT_BLKSIZE: + if (!is_bdev || match_int(&args[0], &value)) + return 0; + d->blksize = value; + break; + default: return 0; } @@ -599,15 +609,21 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) struct dentry *root_dentry; struct fuse_req *init_req; int err; + int is_bdev = sb->s_bdev != NULL; if (sb->s_flags & MS_MANDLOCK) return -EINVAL; - if (!parse_fuse_opt((char *) data, &d)) + if (!parse_fuse_opt((char *) data, &d, is_bdev)) return -EINVAL; - sb->s_blocksize = PAGE_CACHE_SIZE; - sb->s_blocksize_bits = PAGE_CACHE_SHIFT; + if (is_bdev) { + if (!sb_set_blocksize(sb, d.blksize)) + return -EINVAL; + } else { + sb->s_blocksize = PAGE_CACHE_SIZE; + sb->s_blocksize_bits = PAGE_CACHE_SHIFT; + } sb->s_magic = FUSE_SUPER_MAGIC; sb->s_op = &fuse_super_operations; sb->s_maxbytes = MAX_LFS_FILESIZE; @@ -693,6 +709,14 @@ static int fuse_get_sb(struct file_system_type *fs_type, { return get_sb_nodev(fs_type, flags, raw_data, fuse_fill_super, mnt); } + +static int fuse_get_sb_blk(struct file_system_type *fs_type, + int flags, const char *dev_name, + void *raw_data, struct vfsmount *mnt) +{ + return get_sb_bdev(fs_type, flags, dev_name, raw_data, fuse_fill_super, + mnt); +} #else static struct super_block *fuse_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, @@ -700,6 +724,14 @@ static struct super_block *fuse_get_sb(struct file_system_type *fs_type, { return get_sb_nodev(fs_type, flags, raw_data, fuse_fill_super); } + +static struct super_block *fuse_get_sb_blk(struct file_system_type *fs_type, + int flags, const char *dev_name, + void *raw_data) +{ + return get_sb_bdev(fs_type, flags, dev_name, raw_data, + fuse_fill_super); +} #endif static struct file_system_type fuse_fs_type = { @@ -709,6 +741,14 @@ static struct file_system_type fuse_fs_type = { .kill_sb = kill_anon_super, }; +static struct file_system_type fuseblk_fs_type = { + .owner = THIS_MODULE, + .name = "fuseblk", + .get_sb = fuse_get_sb_blk, + .kill_sb = kill_block_super, + .fs_flags = FS_REQUIRES_DEV, +}; + #ifndef HAVE_FS_SUBSYS static decl_subsys(fs, NULL, NULL); #endif @@ -731,24 +771,34 @@ static int __init fuse_fs_init(void) err = register_filesystem(&fuse_fs_type); if (err) - printk("fuse: failed to register filesystem\n"); - else { - fuse_inode_cachep = kmem_cache_create("fuse_inode", - sizeof(struct fuse_inode), - 0, SLAB_HWCACHE_ALIGN, - fuse_inode_init_once, NULL); - if (!fuse_inode_cachep) { - unregister_filesystem(&fuse_fs_type); - err = -ENOMEM; - } - } + goto out; + + err = register_filesystem(&fuseblk_fs_type); + if (err) + goto out_unreg; + fuse_inode_cachep = kmem_cache_create("fuse_inode", + sizeof(struct fuse_inode), + 0, SLAB_HWCACHE_ALIGN, + fuse_inode_init_once, NULL); + err = -ENOMEM; + if (!fuse_inode_cachep) + goto out_unreg2; + + return 0; + + out_unreg2: + unregister_filesystem(&fuseblk_fs_type); + out_unreg: + unregister_filesystem(&fuse_fs_type); + out: return err; } static void fuse_fs_cleanup(void) { unregister_filesystem(&fuse_fs_type); + unregister_filesystem(&fuseblk_fs_type); kmem_cache_destroy(fuse_inode_cachep); } diff --git a/lib/fuse.c b/lib/fuse.c index 14789b7..0e3e1d1 100644 --- a/lib/fuse.c +++ b/lib/fuse.c @@ -2371,6 +2371,29 @@ static void fuse_setlk(fuse_req_t req, fuse_ino_t ino, reply_err(req, err); } +static void fuse_bmap(fuse_req_t req, fuse_ino_t ino, size_t blocksize, + uint64_t idx) +{ + struct fuse *f = req_fuse_prepare(req); + char *path; + int err; + + err = -ENOENT; + pthread_rwlock_rdlock(&f->tree_lock); + path = get_path(f, ino); + if (path != NULL) { + err = -ENOSYS; + if (f->op.bmap) + err = f->op.bmap(path, blocksize, &idx); + free(path); + } + pthread_rwlock_unlock(&f->tree_lock); + if (!err) + fuse_reply_bmap(req, idx); + else + reply_err(req, err); +} + static struct fuse_lowlevel_ops fuse_path_ops = { .init = fuse_data_init, .destroy = fuse_data_destroy, @@ -2405,6 +2428,7 @@ static struct fuse_lowlevel_ops fuse_path_ops = { .removexattr = fuse_removexattr, .getlk = fuse_getlk, .setlk = fuse_setlk, + .bmap = fuse_bmap, }; static void free_cmd(struct fuse_cmd *cmd) diff --git a/lib/fuse_lowlevel.c b/lib/fuse_lowlevel.c index f014e6e..8ea6779 100644 --- a/lib/fuse_lowlevel.c +++ b/lib/fuse_lowlevel.c @@ -403,6 +403,16 @@ int fuse_reply_lock(fuse_req_t req, struct flock *lock) return send_reply_ok(req, &arg, sizeof(arg)); } +int fuse_reply_bmap(fuse_req_t req, uint64_t idx) +{ + struct fuse_bmap_out arg; + + memset(&arg, 0, sizeof(arg)); + arg.block = idx; + + return send_reply_ok(req, &arg, sizeof(arg)); +} + static void do_lookup(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) { char *name = (char *) inarg; @@ -907,6 +917,16 @@ static struct fuse_req *check_interrupt(struct fuse_ll *f, struct fuse_req *req) return NULL; } +static void do_bmap(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) +{ + struct fuse_bmap_in *arg = (struct fuse_bmap_in *) inarg; + + if (req->f->op.bmap) + req->f->op.bmap(req, nodeid, arg->blocksize, arg->block); + else + fuse_reply_err(req, ENOSYS); +} + static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) { struct fuse_init_in *arg = (struct fuse_init_in *) inarg; @@ -1040,6 +1060,7 @@ static struct { [FUSE_ACCESS] = { do_access, "ACCESS" }, [FUSE_CREATE] = { do_create, "CREATE" }, [FUSE_INTERRUPT] = { do_interrupt, "INTERRUPT" }, + [FUSE_BMAP] = { do_bmap, "BMAP" }, }; #define FUSE_MAXOP (sizeof(fuse_ll_ops) / sizeof(fuse_ll_ops[0])) diff --git a/lib/mount.c b/lib/mount.c index 8ac9787..2ed0381 100644 --- a/lib/mount.c +++ b/lib/mount.c @@ -49,6 +49,8 @@ static const struct fuse_opt fuse_mount_opts[] = { FUSE_OPT_KEY("allow_other", KEY_KERN), FUSE_OPT_KEY("allow_root", KEY_ALLOW_ROOT), FUSE_OPT_KEY("nonempty", KEY_KERN), + FUSE_OPT_KEY("blkdev", KEY_KERN), + FUSE_OPT_KEY("blksize=", KEY_KERN), FUSE_OPT_KEY("default_permissions", KEY_KERN), FUSE_OPT_KEY("fsname=", KEY_KERN), FUSE_OPT_KEY("large_read", KEY_KERN), diff --git a/util/fusermount.c b/util/fusermount.c index 46d0b6b..1843f50 100644 --- a/util/fusermount.c +++ b/util/fusermount.c @@ -257,7 +257,8 @@ static int unmount_fuse(const char *mnt, int quiet, int lazy) while ((entp = getmntent(fp)) != NULL) { int removed = 0; if (!found && strcmp(entp->mnt_dir, mnt) == 0 && - strcmp(entp->mnt_type, "fuse") == 0) { + (strcmp(entp->mnt_type, "fuse") == 0 || + strcmp(entp->mnt_type, "fuseblk") == 0)) { if (user == NULL) removed = 1; else { @@ -567,7 +568,7 @@ static int check_mountpoint_empty(const char *mnt, mode_t rootmode, return 0; } -static int do_mount(const char *mnt, const char *type, mode_t rootmode, +static int do_mount(const char *mnt, const char **type, mode_t rootmode, int fd, const char *opts, const char *dev, char **fsnamep, char **mnt_optsp, off_t rootsize) { @@ -579,6 +580,7 @@ static int do_mount(const char *mnt, const char *type, mode_t rootmode, char *d; char *fsname = NULL; int check_empty = 1; + int blkdev = 0; optbuf = (char *) malloc(strlen(opts) + 128); if (!optbuf) { @@ -601,6 +603,12 @@ static int do_mount(const char *mnt, const char *type, mode_t rootmode, } memcpy(fsname, s + fsname_str_len, len - fsname_str_len); fsname[len - fsname_str_len] = '\0'; + } else if (opt_eq(s, len, "blkdev")) { + if (getuid() != 0) { + fprintf(stderr, "%s: option blkdev is privileged\n", progname); + goto err; + } + blkdev = 1; } else if (opt_eq(s, len, "nonempty")) { check_empty = 0; } else if (!begins_with(s, "fd=") && @@ -662,11 +670,13 @@ static int do_mount(const char *mnt, const char *type, mode_t rootmode, if (check_empty && check_mountpoint_empty(mnt, rootmode, rootsize) == -1) goto err; - res = mount(fsname, mnt, type, flags, optbuf); + if (blkdev) + *type = "fuseblk"; + res = mount(fsname, mnt, *type, flags, optbuf); if (res == -1 && errno == EINVAL) { /* It could be an old version not supporting group_id */ sprintf(d, "fd=%i,rootmode=%o,user_id=%i", fd, rootmode, getuid()); - res = mount(fsname, mnt, type, flags, optbuf); + res = mount(fsname, mnt, *type, flags, optbuf); } if (res == -1) { fprintf(stderr, "%s: mount failed: %s\n", progname, strerror(errno)); @@ -906,7 +916,7 @@ static int mount_fuse(const char *mnt, const char *opts) res = check_perm(&real_mnt, &stbuf, &currdir_fd, &mountpoint_fd); restore_privs(); if (res != -1) - res = do_mount(real_mnt, type, stbuf.st_mode & S_IFMT, fd, opts, + res = do_mount(real_mnt, &type, stbuf.st_mode & S_IFMT, fd, opts, dev, &fsname, &mnt_opts, stbuf.st_size); } else restore_privs(); -- cgit v1.2.3