aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Miklos Szeredi <miklos@szeredi.hu>2006-06-29 14:38:35 +0000
committerGravatar Miklos Szeredi <miklos@szeredi.hu>2006-06-29 14:38:35 +0000
commit91762cd331d955f1378dd90af08b56aa15861ab4 (patch)
treea7181c6a71259f9bb46631e4c9c19b1d6f572999
parentb052a1a1b894c4bcd9b4e70dfceb70e340bbb781 (diff)
*** empty log message ***
-rw-r--r--ChangeLog11
-rw-r--r--configure.in2
-rw-r--r--doc/kernel.txt118
-rw-r--r--include/fuse_lowlevel.h7
-rw-r--r--kernel/Makefile.in7
-rw-r--r--kernel/configure.ac2
-rw-r--r--kernel/control.c217
-rw-r--r--kernel/dev.c418
-rw-r--r--kernel/dir.c101
-rw-r--r--kernel/file.c224
-rw-r--r--kernel/fuse_i.h170
-rw-r--r--kernel/inode.c213
12 files changed, 1016 insertions, 474 deletions
diff --git a/ChangeLog b/ChangeLog
index e3cec01..bd6d44f 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,11 +1,18 @@
+2006-06-29 Miklos Szeredi <miklos@szeredi.hu>
+
+ * Released 2.6.0-pre3
+
+2006-06-29 Miklos Szeredi <miklos@szeredi.hu>
+
+ * Support in kernel module for file locking and interruption. The
+ same functionality is available in official kernels >= 2.6.18
+
2006-06-28 Miklos Szeredi <miklos@szeredi.hu>
* Add POSIX file locking support
* Add request interruption
- * The above need 2.6.17-git9 or later to be usable
-
2006-06-06 Miklos Szeredi <miklos@szeredi.hu>
* Add missing pthread_rwlock_destroy(). Patch from Remy Blank
diff --git a/configure.in b/configure.in
index 8262670..91b65b1 100644
--- a/configure.in
+++ b/configure.in
@@ -1,4 +1,4 @@
-AC_INIT(fuse, 2.6.0-pre2)
+AC_INIT(fuse, 2.6.0-pre3)
AC_CANONICAL_TARGET
AM_INIT_AUTOMAKE
AM_CONFIG_HEADER(include/config.h)
diff --git a/doc/kernel.txt b/doc/kernel.txt
index 33f7431..a584f05 100644
--- a/doc/kernel.txt
+++ b/doc/kernel.txt
@@ -18,6 +18,14 @@ Non-privileged mount (or user mount):
user. NOTE: this is not the same as mounts allowed with the "user"
option in /etc/fstab, which is not discussed here.
+Filesystem connection:
+
+ A connection between the filesystem daemon and the kernel. The
+ connection exists until either the daemon dies, or the filesystem is
+ umounted. Note that detaching (or lazy umounting) the filesystem
+ does _not_ break the connection, in this case it will exist until
+ the last reference to the filesystem is released.
+
Mount owner:
The user who does the mounting.
@@ -86,16 +94,20 @@ Mount options
The default is infinite. Note that the size of read requests is
limited anyway to 32 pages (which is 128kbyte on i386).
-Sysfs
-~~~~~
+Control filesystem
+~~~~~~~~~~~~~~~~~~
+
+There's a control filesystem for FUSE, which can be mounted by:
-FUSE sets up the following hierarchy in sysfs:
+ mount -t fusectl none /sys/fs/fuse/connections
- /sys/fs/fuse/connections/N/
+Mounting it under the '/sys/fs/fuse/connections' directory makes it
+backwards compatible with earlier versions.
-where N is an increasing number allocated to each new connection.
+Under the fuse control filesystem each connection has a directory
+named by a unique number.
-For each connection the following attributes are defined:
+For each connection the following files exist within this directory:
'waiting'
@@ -110,7 +122,47 @@ For each connection the following attributes are defined:
connection. This means that all waiting requests will be aborted an
error returned for all aborted and new requests.
-Only a privileged user may read or write these attributes.
+Only the owner of the mount may read or write these files.
+
+Interrupting filesystem operations
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+If a process issuing a FUSE filesystem request is interrupted, the
+following will happen:
+
+ 1) If the request is not yet sent to userspace AND the signal is
+ fatal (SIGKILL or unhandled fatal signal), then the request is
+ dequeued and returns immediately.
+
+ 2) If the request is not yet sent to userspace AND the signal is not
+ fatal, then an 'interrupted' flag is set for the request. When
+ the request has been successfully transfered to userspace and
+ this flag is set, an INTERRUPT request is queued.
+
+ 3) If the request is already sent to userspace, then an INTERRUPT
+ request is queued.
+
+INTERRUPT requests take precedence over other requests, so the
+userspace filesystem will receive queued INTERRUPTs before any others.
+
+The userspace filesystem may ignore the INTERRUPT requests entirely,
+or may honor them by sending a reply to the _original_ request, with
+the error set to EINTR.
+
+It is also possible that there's a race between processing the
+original request and it's INTERRUPT request. There are two possibilities:
+
+ 1) The INTERRUPT request is processed before the original request is
+ processed
+
+ 2) The INTERRUPT request is processed after the original request has
+ been answered
+
+If the filesystem cannot find the original request, it should wait for
+some timeout and/or a number of new requests to arrive, after which it
+should reply to the INTERRUPT request with an EAGAIN error. In case
+1) the INTERRUPT request will be requeued. In case 2) the INTERRUPT
+reply will be ignored.
Aborting a filesystem connection
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -139,8 +191,8 @@ the filesystem. There are several ways to do this:
- Use forced umount (umount -f). Works in all cases but only if
filesystem is still attached (it hasn't been lazy unmounted)
- - Abort filesystem through the sysfs interface. Most powerful
- method, always works.
+ - Abort filesystem through the FUSE control filesystem. Most
+ powerful method, always works.
How do non-privileged mounts work?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -304,25 +356,7 @@ Scenario 1 - Simple deadlock
| | for "file"]
| | *DEADLOCK*
-The solution for this is to allow requests to be interrupted while
-they are in userspace:
-
- | [interrupted by signal] |
- | <fuse_unlink() |
- | [release semaphore] | [semaphore acquired]
- | <sys_unlink() |
- | | >fuse_unlink()
- | | [queue req on fc->pending]
- | | [wake up fc->waitq]
- | | [sleep on req->waitq]
-
-If the filesystem daemon was single threaded, this will stop here,
-since there's no other thread to dequeue and execute the request.
-In this case the solution is to kill the FUSE daemon as well. If
-there are multiple serving threads, you just have to kill them as
-long as any remain.
-
-Moral: a filesystem which deadlocks, can soon find itself dead.
+The solution for this is to allow the filesystem to be aborted.
Scenario 2 - Tricky deadlock
----------------------------
@@ -355,24 +389,14 @@ but is caused by a pagefault.
| | [lock page]
| | * DEADLOCK *
-Solution is again to let the the request be interrupted (not
-elaborated further).
-
-An additional problem is that while the write buffer is being
-copied to the request, the request must not be interrupted. This
-is because the destination address of the copy may not be valid
-after the request is interrupted.
-
-This is solved with doing the copy atomically, and allowing
-interruption while the page(s) belonging to the write buffer are
-faulted with get_user_pages(). The 'req->locked' flag indicates
-when the copy is taking place, and interruption is delayed until
-this flag is unset.
+Solution is basically the same as above.
-Scenario 3 - Tricky deadlock with asynchronous read
----------------------------------------------------
+An additional problem is that while the write buffer is being copied
+to the request, the request must not be interrupted/aborted. This is
+because the destination address of the copy may not be valid after the
+request has returned.
-The same situation as above, except thread-1 will wait on page lock
-and hence it will be uninterruptible as well. The solution is to
-abort the connection with forced umount (if mount is attached) or
-through the abort attribute in sysfs.
+This is solved with doing the copy atomically, and allowing abort
+while the page(s) belonging to the write buffer are faulted with
+get_user_pages(). The 'req->locked' flag indicates when the copy is
+taking place, and abort is delayed until this flag is unset.
diff --git a/include/fuse_lowlevel.h b/include/fuse_lowlevel.h
index cf1fe0c..ede1307 100644
--- a/include/fuse_lowlevel.h
+++ b/include/fuse_lowlevel.h
@@ -749,9 +749,10 @@ struct fuse_lowlevel_ops {
* Acquire, modify or release a POSIX file lock
*
* For POSIX threads (NPTL) there's a 1-1 relation between pid and
- * owner, but this is not always the case. For checking lock
- * ownership, 'owner' must be used. The l_pid field in 'struct
- * flock' should only be used to fill in this field in getlk().
+ * owner, but otherwise this is not always the case. For checking
+ * lock ownership, 'owner' must be used. The l_pid field in
+ * 'struct flock' should only be used to fill in this field in
+ * getlk().
*
* Valid replies:
* fuse_reply_err
diff --git a/kernel/Makefile.in b/kernel/Makefile.in
index 34d376d..a8361ac 100644
--- a/kernel/Makefile.in
+++ b/kernel/Makefile.in
@@ -7,7 +7,7 @@ majver = @majver@
VERSION = @PACKAGE_VERSION@
DISTFILES = Makefile.in configure.ac configure config.h.in ../install-sh \
- dev.c dir.c file.c inode.c fuse_i.h fuse_kernel.h
+ dev.c dir.c file.c inode.c fuse_i.h fuse_kernel.h control.c
COMPATDISTFILES = compat/parser.c compat/parser.h
fusemoduledir = @kmoduledir@/kernel/fs/fuse
@@ -59,7 +59,7 @@ LD = ld
CFLAGS = -O2 -Wall -Wstrict-prototypes -fno-strict-aliasing -pipe @KERNELCFLAGS@
CPPFLAGS = -I@kernelsrc@/include -I. -D__KERNEL__ -DMODULE -D_LOOSE_KERNEL_NAMES -DFUSE_VERSION=\"$(VERSION)\" @KERNELCPPFLAGS@
-fuse_objs = dev.o dir.o file.o inode.o compat/parser.o
+fuse_objs = dev.o dir.o file.o inode.o compat/parser.o control.o
SUFFIXES = .c .o .s
@@ -77,13 +77,14 @@ dev.o: $(fuse_headers)
dir.o: $(fuse_headers)
file.o: $(fuse_headers)
inode.o: $(fuse_headers)
+control.o: $(fuse_headers)
else
EXTRA_CFLAGS += -DFUSE_VERSION=\"$(VERSION)\"
obj-m := fuse.o
-fuse-objs := dev.o dir.o file.o inode.o
+fuse-objs := dev.o dir.o file.o inode.o control.o
all-spec:
$(MAKE) -C @kernelsrc@ SUBDIRS=$(PWD) @KERNELMAKE_PARAMS@ modules
diff --git a/kernel/configure.ac b/kernel/configure.ac
index 2aa7dee..21dc036 100644
--- a/kernel/configure.ac
+++ b/kernel/configure.ac
@@ -1,4 +1,4 @@
-AC_INIT(fuse-kernel, 2.6.0-pre2)
+AC_INIT(fuse-kernel, 2.6.0-pre3)
AC_CONFIG_HEADERS([config.h])
AC_PROG_INSTALL
diff --git a/kernel/control.c b/kernel/control.c
new file mode 100644
index 0000000..aff8b64
--- /dev/null
+++ b/kernel/control.c
@@ -0,0 +1,217 @@
+/*
+ FUSE: Filesystem in Userspace
+ Copyright (C) 2001-2006 Miklos Szeredi <miklos@szeredi.hu>
+
+ This program can be distributed under the terms of the GNU GPL.
+ See the file COPYING.
+*/
+
+#include "fuse_i.h"
+
+#include <linux/init.h>
+#include <linux/module.h>
+
+#define FUSE_CTL_SUPER_MAGIC 0x65735543
+
+/*
+ * This is non-NULL when the single instance of the control filesystem
+ * exists. Protected by fuse_mutex
+ */
+static struct super_block *fuse_control_sb;
+
+static struct fuse_conn *fuse_ctl_file_conn_get(struct file *file)
+{
+ struct fuse_conn *fc;
+ mutex_lock(&fuse_mutex);
+ fc = file->f_dentry->d_inode->u.generic_ip;
+ if (fc)
+ fc = fuse_conn_get(fc);
+ mutex_unlock(&fuse_mutex);
+ return fc;
+}
+
+static ssize_t fuse_conn_abort_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct fuse_conn *fc = fuse_ctl_file_conn_get(file);
+ if (fc) {
+ fuse_abort_conn(fc);
+ fuse_conn_put(fc);
+ }
+ return count;
+}
+
+static ssize_t fuse_conn_waiting_read(struct file *file, char __user *buf,
+ size_t len, loff_t *ppos)
+{
+ char tmp[32];
+ size_t size;
+
+ if (!*ppos) {
+ struct fuse_conn *fc = fuse_ctl_file_conn_get(file);
+ if (!fc)
+ return 0;
+
+ file->private_data=(void *)(long)atomic_read(&fc->num_waiting);
+ fuse_conn_put(fc);
+ }
+ size = sprintf(tmp, "%ld\n", (long)file->private_data);
+ return simple_read_from_buffer(buf, len, ppos, tmp, size);
+}
+
+static struct file_operations fuse_ctl_abort_ops = {
+ .open = nonseekable_open,
+ .write = fuse_conn_abort_write,
+};
+
+static struct file_operations fuse_ctl_waiting_ops = {
+ .open = nonseekable_open,
+ .read = fuse_conn_waiting_read,
+};
+
+static struct dentry *fuse_ctl_add_dentry(struct dentry *parent,
+ struct fuse_conn *fc,
+ const char *name,
+ int mode, int nlink,
+ struct inode_operations *iop,
+ struct file_operations *fop)
+{
+ struct dentry *dentry;
+ struct inode *inode;
+
+ BUG_ON(fc->ctl_ndents >= FUSE_CTL_NUM_DENTRIES);
+ dentry = d_alloc_name(parent, name);
+ if (!dentry)
+ return NULL;
+
+ fc->ctl_dentry[fc->ctl_ndents++] = dentry;
+ inode = new_inode(fuse_control_sb);
+ if (!inode)
+ return NULL;
+
+ inode->i_mode = mode;
+ inode->i_uid = fc->user_id;
+ inode->i_gid = fc->group_id;
+ inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+ /* setting ->i_op to NULL is not allowed */
+ if (iop)
+ inode->i_op = iop;
+ inode->i_fop = fop;
+ inode->i_nlink = nlink;
+ inode->u.generic_ip = fc;
+ d_add(dentry, inode);
+ return dentry;
+}
+
+/*
+ * Add a connection to the control filesystem (if it exists). Caller
+ * must host fuse_mutex
+ */
+int fuse_ctl_add_conn(struct fuse_conn *fc)
+{
+ struct dentry *parent;
+ char name[32];
+
+ if (!fuse_control_sb)
+ return 0;
+
+ parent = fuse_control_sb->s_root;
+ parent->d_inode->i_nlink++;
+ sprintf(name, "%llu", (unsigned long long) fc->id);
+ parent = fuse_ctl_add_dentry(parent, fc, name, S_IFDIR | 0500, 2,
+ &simple_dir_inode_operations,
+ &simple_dir_operations);
+ if (!parent)
+ goto err;
+
+ if (!fuse_ctl_add_dentry(parent, fc, "waiting", S_IFREG | 0400, 1,
+ NULL, &fuse_ctl_waiting_ops) ||
+ !fuse_ctl_add_dentry(parent, fc, "abort", S_IFREG | 0200, 1,
+ NULL, &fuse_ctl_abort_ops))
+ goto err;
+
+ return 0;
+
+ err:
+ fuse_ctl_remove_conn(fc);
+ return -ENOMEM;
+}
+
+/*
+ * Remove a connection from the control filesystem (if it exists).
+ * Caller must host fuse_mutex
+ */
+void fuse_ctl_remove_conn(struct fuse_conn *fc)
+{
+ int i;
+
+ if (!fuse_control_sb)
+ return;
+
+ for (i = fc->ctl_ndents - 1; i >= 0; i--) {
+ struct dentry *dentry = fc->ctl_dentry[i];
+ dentry->d_inode->u.generic_ip = NULL;
+ d_drop(dentry);
+ dput(dentry);
+ }
+ fuse_control_sb->s_root->d_inode->i_nlink--;
+}
+
+static int fuse_ctl_fill_super(struct super_block *sb, void *data, int silent)
+{
+ struct tree_descr empty_descr = {""};
+ struct fuse_conn *fc;
+ int err;
+
+ err = simple_fill_super(sb, FUSE_CTL_SUPER_MAGIC, &empty_descr);
+ if (err)
+ return err;
+
+ mutex_lock(&fuse_mutex);
+ BUG_ON(fuse_control_sb);
+ fuse_control_sb = sb;
+ list_for_each_entry(fc, &fuse_conn_list, entry) {
+ err = fuse_ctl_add_conn(fc);
+ if (err) {
+ fuse_control_sb = NULL;
+ mutex_unlock(&fuse_mutex);
+ return err;
+ }
+ }
+ mutex_unlock(&fuse_mutex);
+
+ return 0;
+}
+
+static struct super_block *fuse_ctl_get_sb(struct file_system_type *fs_type,
+ int flags, const char *dev_name,
+ void *raw_data)
+{
+ return get_sb_single(fs_type, flags, raw_data, fuse_ctl_fill_super);
+}
+
+static void fuse_ctl_kill_sb(struct super_block *sb)
+{
+ mutex_lock(&fuse_mutex);
+ fuse_control_sb = NULL;
+ mutex_unlock(&fuse_mutex);
+
+ kill_litter_super(sb);
+}
+
+static struct file_system_type fuse_ctl_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "fusectl",
+ .get_sb = fuse_ctl_get_sb,
+ .kill_sb = fuse_ctl_kill_sb,
+};
+
+int __init fuse_ctl_init(void)
+{
+ return register_filesystem(&fuse_ctl_fs_type);
+}
+
+void fuse_ctl_cleanup(void)
+{
+ unregister_filesystem(&fuse_ctl_fs_type);
+}
diff --git a/kernel/dev.c b/kernel/dev.c
index 80f662f..0231caa 100644
--- a/kernel/dev.c
+++ b/kernel/dev.c
@@ -36,6 +36,7 @@ static void fuse_request_init(struct fuse_req *req)
{
memset(req, 0, sizeof(*req));
INIT_LIST_HEAD(&req->list);
+ INIT_LIST_HEAD(&req->intr_entry);
init_waitqueue_head(&req->waitq);
atomic_set(&req->count, 1);
}
@@ -104,18 +105,6 @@ static void restore_sigs(sigset_t *oldset)
#endif
#endif
-/*
- * Reset request, so that it can be reused
- *
- * The caller must be _very_ careful to make sure, that it is holding
- * the only reference to req
- */
-void fuse_reset_request(struct fuse_req *req)
-{
- BUG_ON(atomic_read(&req->count) != 1);
- fuse_request_init(req);
-}
-
static void __fuse_get_request(struct fuse_req *req)
{
atomic_inc(&req->count);
@@ -128,6 +117,13 @@ static void __fuse_put_request(struct fuse_req *req)
atomic_dec(&req->count);
}
+static void fuse_req_init_context(struct fuse_req *req)
+{
+ req->in.h.uid = current->fsuid;
+ req->in.h.gid = current->fsgid;
+ req->in.h.pid = current->pid;
+}
+
struct fuse_req *fuse_get_req(struct fuse_conn *fc)
{
struct fuse_req *req;
@@ -143,14 +139,16 @@ struct fuse_req *fuse_get_req(struct fuse_conn *fc)
if (intr)
goto out;
+ err = -ENOTCONN;
+ if (!fc->connected)
+ goto out;
+
req = fuse_request_alloc();
err = -ENOMEM;
if (!req)
goto out;
- req->in.h.uid = current->fsuid;
- req->in.h.gid = current->fsgid;
- req->in.h.pid = current->pid;
+ fuse_req_init_context(req);
req->waiting = 1;
return req;
@@ -159,142 +157,183 @@ struct fuse_req *fuse_get_req(struct fuse_conn *fc)
return ERR_PTR(err);
}
-void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
+/*
+ * Return request in fuse_file->reserved_req. However that may
+ * currently be in use. If that is the case, wait for it to become
+ * available.
+ */
+static struct fuse_req *get_reserved_req(struct fuse_conn *fc,
+ struct file *file)
{
- if (atomic_dec_and_test(&req->count)) {
- if (req->waiting)
- atomic_dec(&fc->num_waiting);
- fuse_request_free(req);
- }
+ struct fuse_req *req = NULL;
+ struct fuse_file *ff = file->private_data;
+
+ do {
+ wait_event(fc->blocked_waitq, ff->reserved_req);
+ spin_lock(&fc->lock);
+ if (ff->reserved_req) {
+ req = ff->reserved_req;
+ ff->reserved_req = NULL;
+ get_file(file);
+ req->stolen_file = file;
+ }
+ spin_unlock(&fc->lock);
+ } while (!req);
+
+ return req;
}
/*
- * Called with sbput_sem held for read (request_end) or write
- * (fuse_put_super). By the time fuse_put_super() is finished, all
- * inodes belonging to background requests must be released, so the
- * iputs have to be done within the locked region.
+ * Put stolen request back into fuse_file->reserved_req
*/
-void fuse_release_background(struct fuse_conn *fc, struct fuse_req *req)
+static void put_reserved_req(struct fuse_conn *fc, struct fuse_req *req)
{
- iput(req->inode);
- iput(req->inode2);
+ struct file *file = req->stolen_file;
+ struct fuse_file *ff = file->private_data;
+
spin_lock(&fc->lock);
- list_del(&req->bg_entry);
- if (fc->num_background == FUSE_MAX_BACKGROUND) {
- fc->blocked = 0;
- wake_up_all(&fc->blocked_waitq);
- }
- fc->num_background--;
+ fuse_request_init(req);
+ BUG_ON(ff->reserved_req);
+ ff->reserved_req = req;
+ wake_up(&fc->blocked_waitq);
spin_unlock(&fc->lock);
+ fput(file);
}
/*
- * This function is called when a request is finished. Either a reply
- * has arrived or it was interrupted (and not yet sent) or some error
- * occurred during communication with userspace, or the device file
- * was closed. In case of a background request the reference to the
- * stored objects are released. The requester thread is woken up (if
- * still waiting), the 'end' callback is called if given, else the
- * reference to the request is released
+ * Gets a requests for a file operation, always succeeds
*
- * Releasing extra reference for foreground requests must be done
- * within the same locked region as setting state to finished. This
- * is because fuse_reset_request() may be called after request is
- * finished and it must be the sole possessor. If request is
- * interrupted and put in the background, it will return with an error
- * and hence never be reset and reused.
+ * This is used for sending the FLUSH request, which must get to
+ * userspace, due to POSIX locks which may need to be unlocked.
*
- * Called with fc->lock, unlocks it
+ * If allocation fails due to OOM, use the reserved request in
+ * fuse_file.
+ *
+ * This is very unlikely to deadlock accidentally, since the
+ * filesystem should not have it's own file open. If deadlock is
+ * intentional, it can still be broken by "aborting" the filesystem.
*/
-static void request_end(struct fuse_conn *fc, struct fuse_req *req)
+struct fuse_req *fuse_get_req_nofail(struct fuse_conn *fc, struct file *file)
{
- list_del(&req->list);
- req->state = FUSE_REQ_FINISHED;
- if (!req->background) {
- spin_unlock(&fc->lock);
- wake_up(&req->waitq);
- fuse_put_request(fc, req);
- } else {
- void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
- req->end = NULL;
- spin_unlock(&fc->lock);
- down_read(&fc->sbput_sem);
- if (fc->mounted)
- fuse_release_background(fc, req);
- up_read(&fc->sbput_sem);
+ struct fuse_req *req;
- /* fput must go outside sbput_sem, otherwise it can deadlock */
- if (req->file)
- fput(req->file);
+ atomic_inc(&fc->num_waiting);
+ wait_event(fc->blocked_waitq, !fc->blocked);
+ req = fuse_request_alloc();
+ if (!req)
+ req = get_reserved_req(fc, file);
- if (end)
- end(fc, req);
+ fuse_req_init_context(req);
+ req->waiting = 1;
+ return req;
+}
+
+void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
+{
+ if (atomic_dec_and_test(&req->count)) {
+ if (req->waiting)
+ atomic_dec(&fc->num_waiting);
+
+ if (req->stolen_file)
+ put_reserved_req(fc, req);
else
- fuse_put_request(fc, req);
+ fuse_request_free(req);
}
}
/*
- * Unfortunately request interruption not just solves the deadlock
- * problem, it causes problems too. These stem from the fact, that an
- * interrupted request is continued to be processed in userspace,
- * while all the locks and object references (inode and file) held
- * during the operation are released.
- *
- * To release the locks is exactly why there's a need to interrupt the
- * request, so there's not a lot that can be done about this, except
- * introduce additional locking in userspace.
- *
- * More important is to keep inode and file references until userspace
- * has replied, otherwise FORGET and RELEASE could be sent while the
- * inode/file is still used by the filesystem.
- *
- * For this reason the concept of "background" request is introduced.
- * An interrupted request is backgrounded if it has been already sent
- * to userspace. Backgrounding involves getting an extra reference to
- * inode(s) or file used in the request, and adding the request to
- * fc->background list. When a reply is received for a background
- * request, the object references are released, and the request is
- * removed from the list. If the filesystem is unmounted while there
- * are still background requests, the list is walked and references
- * are released as if a reply was received.
+ * This function is called when a request is finished. Either a reply
+ * has arrived or it was aborted (and not yet sent) or some error
+ * occurred during communication with userspace, or the device file
+ * was closed. The requester thread is woken up (if still waiting),
+ * the 'end' callback is called if given, else the reference to the
+ * request is released
*
- * There's one more use for a background request. The RELEASE message is
- * always sent as background, since it doesn't return an error or
- * data.
+ * Called with fc->lock, unlocks it
*/
-static void background_request(struct fuse_conn *fc, struct fuse_req *req)
-{
- req->background = 1;
- list_add(&req->bg_entry, &fc->background);
- fc->num_background++;
- if (fc->num_background == FUSE_MAX_BACKGROUND)
- fc->blocked = 1;
- if (req->inode)
- req->inode = igrab(req->inode);
- if (req->inode2)
- req->inode2 = igrab(req->inode2);
+static void request_end(struct fuse_conn *fc, struct fuse_req *req)
+{
+ void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
+ req->end = NULL;
+ list_del(&req->list);
+ list_del(&req->intr_entry);
+ req->state = FUSE_REQ_FINISHED;
+ if (req->background) {
+ if (fc->num_background == FUSE_MAX_BACKGROUND) {
+ fc->blocked = 0;
+ wake_up_all(&fc->blocked_waitq);
+ }
+ fc->num_background--;
+ }
+ spin_unlock(&fc->lock);
+ dput(req->dentry);
+ mntput(req->vfsmount);
if (req->file)
- get_file(req->file);
+ fput(req->file);
+ wake_up(&req->waitq);
+ if (end)
+ end(fc, req);
+ else
+ fuse_put_request(fc, req);
}
-/* Called with fc->lock held. Releases, and then reacquires it. */
-static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
+static void wait_answer_interruptible(struct fuse_conn *fc,
+ struct fuse_req *req)
{
- sigset_t oldset;
+ if (signal_pending(current))
+ return;
spin_unlock(&fc->lock);
- block_sigs(&oldset);
wait_event_interruptible(req->waitq, req->state == FUSE_REQ_FINISHED);
- restore_sigs(&oldset);
spin_lock(&fc->lock);
- if (req->state == FUSE_REQ_FINISHED && !req->interrupted)
- return;
+}
+
+static void queue_interrupt(struct fuse_conn *fc, struct fuse_req *req)
+{
+ list_add_tail(&req->intr_entry, &fc->interrupts);
+ wake_up(&fc->waitq);
+ kill_fasync(&fc->fasync, SIGIO, POLL_IN);
+}
+
+/* Called with fc->lock held. Releases, and then reacquires it. */
+static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
+{
+ if (!fc->no_interrupt) {
+ /* Any signal may interrupt this */
+ wait_answer_interruptible(fc, req);
+
+ if (req->aborted)
+ goto aborted;
+ if (req->state == FUSE_REQ_FINISHED)
+ return;
- if (!req->interrupted) {
- req->out.h.error = -EINTR;
req->interrupted = 1;
+ if (req->state == FUSE_REQ_SENT)
+ queue_interrupt(fc, req);
+ }
+
+ if (req->force) {
+ spin_unlock(&fc->lock);
+ wait_event(req->waitq, req->state == FUSE_REQ_FINISHED);
+ spin_lock(&fc->lock);
+ } else {
+ sigset_t oldset;
+
+ /* Only fatal signals may interrupt this */
+ block_sigs(&oldset);
+ wait_answer_interruptible(fc, req);
+ restore_sigs(&oldset);
}
+
+ if (req->aborted)
+ goto aborted;
+ if (req->state == FUSE_REQ_FINISHED)
+ return;
+
+ req->out.h.error = -EINTR;
+ req->aborted = 1;
+
+ aborted:
if (req->locked) {
/* This is uninterruptible sleep, because data is
being copied to/from the buffers of req. During
@@ -308,8 +347,11 @@ static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
if (req->state == FUSE_REQ_PENDING) {
list_del(&req->list);
__fuse_put_request(req);
- } else if (req->state == FUSE_REQ_SENT)
- background_request(fc, req);
+ } else if (req->state == FUSE_REQ_SENT) {
+ spin_unlock(&fc->lock);
+ wait_event(req->waitq, req->state == FUSE_REQ_FINISHED);
+ spin_lock(&fc->lock);
+ }
}
static unsigned len_args(unsigned numargs, struct fuse_arg *args)
@@ -323,13 +365,19 @@ static unsigned len_args(unsigned numargs, struct fuse_arg *args)
return nbytes;
}
+static u64 fuse_get_unique(struct fuse_conn *fc)
+ {
+ fc->reqctr++;
+ /* zero is special */
+ if (fc->reqctr == 0)
+ fc->reqctr = 1;
+
+ return fc->reqctr;
+}
+
static void queue_request(struct fuse_conn *fc, struct fuse_req *req)
{
- fc->reqctr++;
- /* zero is special */
- if (fc->reqctr == 0)
- fc->reqctr = 1;
- req->in.h.unique = fc->reqctr;
+ req->in.h.unique = fuse_get_unique(fc);
req->in.h.len = sizeof(struct fuse_in_header) +
len_args(req->in.numargs, (struct fuse_arg *) req->in.args);
list_add_tail(&req->list, &fc->pending);
@@ -342,9 +390,6 @@ static void queue_request(struct fuse_conn *fc, struct fuse_req *req)
kill_fasync(&fc->fasync, SIGIO, POLL_IN);
}
-/*
- * This can only be interrupted by a SIGKILL
- */
void request_send(struct fuse_conn *fc, struct fuse_req *req)
{
req->isreply = 1;
@@ -367,8 +412,12 @@ void request_send(struct fuse_conn *fc, struct fuse_req *req)
static void request_send_nowait(struct fuse_conn *fc, struct fuse_req *req)
{
spin_lock(&fc->lock);
- background_request(fc, req);
if (fc->connected) {
+ req->background = 1;
+ fc->num_background++;
+ if (fc->num_background == FUSE_MAX_BACKGROUND)
+ fc->blocked = 1;
+
queue_request(fc, req);
spin_unlock(&fc->lock);
} else {
@@ -392,14 +441,14 @@ void request_send_background(struct fuse_conn *fc, struct fuse_req *req)
/*
* Lock the request. Up to the next unlock_request() there mustn't be
* anything that could cause a page-fault. If the request was already
- * interrupted bail out.
+ * aborted bail out.
*/
static int lock_request(struct fuse_conn *fc, struct fuse_req *req)
{
int err = 0;
if (req) {
spin_lock(&fc->lock);
- if (req->interrupted)
+ if (req->aborted)
err = -ENOENT;
else
req->locked = 1;
@@ -409,7 +458,7 @@ static int lock_request(struct fuse_conn *fc, struct fuse_req *req)
}
/*
- * Unlock request. If it was interrupted during being locked, the
+ * Unlock request. If it was aborted during being locked, the
* requester thread is currently waiting for it to be unlocked, so
* wake it up.
*/
@@ -418,7 +467,7 @@ static void unlock_request(struct fuse_conn *fc, struct fuse_req *req)
if (req) {
spin_lock(&fc->lock);
req->locked = 0;
- if (req->interrupted)
+ if (req->aborted)
wake_up(&req->waitq);
spin_unlock(&fc->lock);
}
@@ -597,13 +646,18 @@ static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs,
return err;
}
+static int request_pending(struct fuse_conn *fc)
+{
+ return !list_empty(&fc->pending) || !list_empty(&fc->interrupts);
+}
+
/* Wait until a request is available on the pending list */
static void request_wait(struct fuse_conn *fc)
{
DECLARE_WAITQUEUE(wait, current);
add_wait_queue_exclusive(&fc->waitq, &wait);
- while (fc->connected && list_empty(&fc->pending)) {
+ while (fc->connected && !request_pending(fc)) {
set_current_state(TASK_INTERRUPTIBLE);
if (signal_pending(current))
break;
@@ -628,11 +682,50 @@ static size_t iov_length(const struct iovec *iov, unsigned long nr_segs)
}
#endif
/*
+ * Transfer an interrupt request to userspace
+ *
+ * Unlike other requests this is assembled on demand, without a need
+ * to allocate a separate fuse_req structure.
+ *
+ * Called with fc->lock held, releases it
+ */
+static int fuse_read_interrupt(struct fuse_conn *fc, struct fuse_req *req,
+ const struct iovec *iov, unsigned long nr_segs)
+{
+ struct fuse_copy_state cs;
+ struct fuse_in_header ih;
+ struct fuse_interrupt_in arg;
+ unsigned reqsize = sizeof(ih) + sizeof(arg);
+ int err;
+
+ list_del_init(&req->intr_entry);
+ req->intr_unique = fuse_get_unique(fc);
+ memset(&ih, 0, sizeof(ih));
+ memset(&arg, 0, sizeof(arg));
+ ih.len = reqsize;
+ ih.opcode = FUSE_INTERRUPT;
+ ih.unique = req->intr_unique;
+ arg.unique = req->in.h.unique;
+
+ spin_unlock(&fc->lock);
+ if (iov_length(iov, nr_segs) < reqsize)
+ return -EINVAL;
+
+ fuse_copy_init(&cs, fc, 1, NULL, iov, nr_segs);
+ err = fuse_copy_one(&cs, &ih, sizeof(ih));
+ if (!err)
+ err = fuse_copy_one(&cs, &arg, sizeof(arg));
+ fuse_copy_finish(&cs);
+
+ return err ? err : reqsize;
+}
+
+/*
* Read a single request into the userspace filesystem's buffer. This
* function waits until a request is available, then removes it from
* the pending list and copies request data to userspace buffer. If
- * no reply is needed (FORGET) or request has been interrupted or
- * there was an error during the copying then it's finished by calling
+ * no reply is needed (FORGET) or request has been aborted or there
+ * was an error during the copying then it's finished by calling
* request_end(). Otherwise add it to the processing list, and set
* the 'sent' flag.
*/
@@ -652,7 +745,7 @@ static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov,
spin_lock(&fc->lock);
err = -EAGAIN;
if ((file->f_flags & O_NONBLOCK) && fc->connected &&
- list_empty(&fc->pending))
+ !request_pending(fc))
goto err_unlock;
request_wait(fc);
@@ -660,9 +753,15 @@ static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov,
if (!fc->connected)
goto err_unlock;
err = -ERESTARTSYS;
- if (list_empty(&fc->pending))
+ if (!request_pending(fc))
goto err_unlock;
+ if (!list_empty(&fc->interrupts)) {
+ req = list_entry(fc->interrupts.next, struct fuse_req,
+ intr_entry);
+ return fuse_read_interrupt(fc, req, iov, nr_segs);
+ }
+
req = list_entry(fc->pending.next, struct fuse_req, list);
req->state = FUSE_REQ_READING;
list_move(&req->list, &fc->io);
@@ -687,10 +786,10 @@ static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov,
fuse_copy_finish(&cs);
spin_lock(&fc->lock);
req->locked = 0;
- if (!err && req->interrupted)
+ if (!err && req->aborted)
err = -ENOENT;
if (err) {
- if (!req->interrupted)
+ if (!req->aborted)
req->out.h.error = -EIO;
request_end(fc, req);
return err;
@@ -700,6 +799,8 @@ static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov,
else {
req->state = FUSE_REQ_SENT;
list_move_tail(&req->list, &fc->processing);
+ if (req->interrupted)
+ queue_interrupt(fc, req);
spin_unlock(&fc->lock);
}
return reqsize;
@@ -726,7 +827,7 @@ static struct fuse_req *request_find(struct fuse_conn *fc, u64 unique)
list_for_each(entry, &fc->processing) {
struct fuse_req *req;
req = list_entry(entry, struct fuse_req, list);
- if (req->in.h.unique == unique)
+ if (req->in.h.unique == unique || req->intr_unique == unique)
return req;
}
return NULL;
@@ -792,17 +893,33 @@ static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov,
goto err_unlock;
req = request_find(fc, oh.unique);
- err = -EINVAL;
if (!req)
goto err_unlock;
- if (req->interrupted) {
+ if (req->aborted) {
spin_unlock(&fc->lock);
fuse_copy_finish(&cs);
spin_lock(&fc->lock);
request_end(fc, req);
return -ENOENT;
}
+ /* Is it an interrupt reply? */
+ if (req->intr_unique == oh.unique) {
+ err = -EINVAL;
+ if (nbytes != sizeof(struct fuse_out_header))
+ goto err_unlock;
+
+ if (oh.error == -ENOSYS)
+ fc->no_interrupt = 1;
+ else if (oh.error == -EAGAIN)
+ queue_interrupt(fc, req);
+
+ spin_unlock(&fc->lock);
+ fuse_copy_finish(&cs);
+ return nbytes;
+ }
+
+ req->state = FUSE_REQ_WRITING;
list_move(&req->list, &fc->io);
req->out.h = oh;
req->locked = 1;
@@ -815,9 +932,9 @@ static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov,
spin_lock(&fc->lock);
req->locked = 0;
if (!err) {
- if (req->interrupted)
+ if (req->aborted)
err = -ENOENT;
- } else if (!req->interrupted)
+ } else if (!req->aborted)
req->out.h.error = -EIO;
request_end(fc, req);
@@ -851,7 +968,7 @@ static unsigned fuse_dev_poll(struct file *file, poll_table *wait)
spin_lock(&fc->lock);
if (!fc->connected)
mask = POLLERR;
- else if (!list_empty(&fc->pending))
+ else if (request_pending(fc))
mask |= POLLIN | POLLRDNORM;
spin_unlock(&fc->lock);
@@ -877,7 +994,7 @@ static void end_requests(struct fuse_conn *fc, struct list_head *head)
/*
* Abort requests under I/O
*
- * The requests are set to interrupted and finished, and the request
+ * The requests are set to aborted and finished, and the request
* waiter is woken up. This will make request_wait_answer() wait
* until the request is unlocked and then return.
*
@@ -892,7 +1009,7 @@ static void end_io_requests(struct fuse_conn *fc)
list_entry(fc->io.next, struct fuse_req, list);
void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
- req->interrupted = 1;
+ req->aborted = 1;
req->out.h.error = -ECONNABORTED;
req->state = FUSE_REQ_FINISHED;
list_del_init(&req->list);
@@ -925,19 +1042,20 @@ static void end_io_requests(struct fuse_conn *fc)
* onto the pending list is prevented by req->connected being false.
*
* Progression of requests under I/O to the processing list is
- * prevented by the req->interrupted flag being true for these
- * requests. For this reason requests on the io list must be aborted
- * first.
+ * prevented by the req->aborted flag being true for these requests.
+ * For this reason requests on the io list must be aborted first.
*/
void fuse_abort_conn(struct fuse_conn *fc)
{
spin_lock(&fc->lock);
if (fc->connected) {
fc->connected = 0;
+ fc->blocked = 0;
end_io_requests(fc);
end_requests(fc, &fc->pending);
end_requests(fc, &fc->processing);
wake_up_all(&fc->waitq);
+ wake_up_all(&fc->blocked_waitq);
kill_fasync(&fc->fasync, SIGIO, POLL_IN);
}
spin_unlock(&fc->lock);
@@ -953,7 +1071,7 @@ static int fuse_dev_release(struct inode *inode, struct file *file)
end_requests(fc, &fc->processing);
spin_unlock(&fc->lock);
fasync_helper(-1, file, 0, &fc->fasync);
- kobject_put(&fc->kobj);
+ fuse_conn_put(fc);
}
return 0;
diff --git a/kernel/dir.c b/kernel/dir.c
index a2887b6..66dac79 100644
--- a/kernel/dir.c
+++ b/kernel/dir.c
@@ -20,6 +20,33 @@
#include <linux/namei.h>
#endif
+#if BITS_PER_LONG >= 64
+static inline void fuse_dentry_settime(struct dentry *entry, u64 time)
+{
+ entry->d_time = time;
+}
+
+static inline u64 fuse_dentry_time(struct dentry *entry)
+{
+ return entry->d_time;
+}
+#else
+/*
+ * On 32 bit archs store the high 32 bits of time in d_fsdata
+ */
+static void fuse_dentry_settime(struct dentry *entry, u64 time)
+{
+ entry->d_time = time;
+ entry->d_fsdata = (void *) (unsigned long) (time >> 32);
+}
+
+static u64 fuse_dentry_time(struct dentry *entry)
+{
+ return (u64) entry->d_time +
+ ((u64) (unsigned long) entry->d_fsdata << 32);
+}
+#endif
+
/*
* FUSE caches dentries and attributes with separate timeout. The
* time in jiffies until the dentry/attributes are valid is stored in
@@ -29,10 +56,13 @@
/*
* Calculate the time in jiffies until a dentry/attributes are valid
*/
-static unsigned long time_to_jiffies(unsigned long sec, unsigned long nsec)
+static u64 time_to_jiffies(unsigned long sec, unsigned long nsec)
{
- struct timespec ts = {sec, nsec};
- return jiffies + timespec_to_jiffies(&ts);
+ if (sec || nsec) {
+ struct timespec ts = {sec, nsec};
+ return get_jiffies_64() + timespec_to_jiffies(&ts);
+ } else
+ return 0;
}
/*
@@ -41,7 +71,8 @@ static unsigned long time_to_jiffies(unsigned long sec, unsigned long nsec)
*/
static void fuse_change_timeout(struct dentry *entry, struct fuse_entry_out *o)
{
- entry->d_time = time_to_jiffies(o->entry_valid, o->entry_valid_nsec);
+ fuse_dentry_settime(entry,
+ time_to_jiffies(o->entry_valid, o->entry_valid_nsec));
if (entry->d_inode)
get_fuse_inode(entry->d_inode)->i_time =
time_to_jiffies(o->attr_valid, o->attr_valid_nsec);
@@ -53,7 +84,7 @@ static void fuse_change_timeout(struct dentry *entry, struct fuse_entry_out *o)
*/
void fuse_invalidate_attr(struct inode *inode)
{
- get_fuse_inode(inode)->i_time = jiffies - 1;
+ get_fuse_inode(inode)->i_time = 0;
}
/*
@@ -66,7 +97,7 @@ void fuse_invalidate_attr(struct inode *inode)
*/
static void fuse_invalidate_entry_cache(struct dentry *entry)
{
- entry->d_time = jiffies - 1;
+ fuse_dentry_settime(entry, 0);
}
/*
@@ -85,7 +116,6 @@ static void fuse_lookup_init(struct fuse_req *req, struct inode *dir,
{
req->in.h.opcode = FUSE_LOOKUP;
req->in.h.nodeid = get_node_id(dir);
- req->inode = dir;
req->in.numargs = 1;
req->in.args[0].size = entry->d_name.len + 1;
req->in.args[0].value = entry->d_name.name;
@@ -109,7 +139,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
if (inode && is_bad_inode(inode))
return 0;
- else if (time_after(jiffies, entry->d_time)) {
+ else if (fuse_dentry_time(entry) < get_jiffies_64()) {
int err;
struct fuse_entry_out outarg;
struct fuse_conn *fc;
@@ -262,6 +292,20 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
#ifdef HAVE_LOOKUP_INSTANTIATE_FILP
/*
+ * Synchronous release for the case when something goes wrong in CREATE_OPEN
+ */
+static void fuse_sync_release(struct fuse_conn *fc, struct fuse_file *ff,
+ u64 nodeid, int flags)
+{
+ struct fuse_req *req;
+
+ req = fuse_release_fill(ff, nodeid, flags, FUSE_RELEASE);
+ req->force = 1;
+ request_send(fc, req);
+ fuse_put_request(fc, req);
+}
+
+/*
* Atomic create+open operation
*
* If the filesystem doesn't support this, then fall back to separate
@@ -274,6 +318,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
struct inode *inode;
struct fuse_conn *fc = get_fuse_conn(dir);
struct fuse_req *req;
+ struct fuse_req *forget_req;
struct fuse_open_in inarg;
struct fuse_open_out outopen;
struct fuse_entry_out outentry;
@@ -284,9 +329,14 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
if (fc->no_create)
return -ENOSYS;
+ forget_req = fuse_get_req(fc);
+ if (IS_ERR(forget_req))
+ return PTR_ERR(forget_req);
+
req = fuse_get_req(fc);
+ err = PTR_ERR(req);
if (IS_ERR(req))
- return PTR_ERR(req);
+ goto out_put_forget_req;
err = -ENOMEM;
ff = fuse_file_alloc();
@@ -299,7 +349,6 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
inarg.mode = mode;
req->in.h.opcode = FUSE_CREATE;
req->in.h.nodeid = get_node_id(dir);
- req->inode = dir;
req->in.numargs = 2;
req->in.args[0].size = sizeof(inarg);
req->in.args[0].value = &inarg;
@@ -322,25 +371,23 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid))
goto out_free_ff;
+ fuse_put_request(fc, req);
inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation,
&outentry.attr);
- err = -ENOMEM;
if (!inode) {
flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
ff->fh = outopen.fh;
- /* Special release, with inode = NULL, this will
- trigger a 'forget' request when the release is
- complete */
- fuse_send_release(fc, ff, outentry.nodeid, NULL, flags, 0);
- goto out_put_request;
+ fuse_sync_release(fc, ff, outentry.nodeid, flags);
+ fuse_send_forget(fc, forget_req, outentry.nodeid, 1);
+ return -ENOMEM;
}
- fuse_put_request(fc, req);
+ fuse_put_request(fc, forget_req);
d_instantiate(entry, inode);
fuse_change_timeout(entry, &outentry);
file = lookup_instantiate_filp(nd, entry, generic_file_open);
if (IS_ERR(file)) {
ff->fh = outopen.fh;
- fuse_send_release(fc, ff, outentry.nodeid, inode, flags, 0);
+ fuse_sync_release(fc, ff, outentry.nodeid, flags);
return PTR_ERR(file);
}
fuse_finish_open(inode, file, ff, &outopen);
@@ -350,6 +397,8 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
fuse_file_free(ff);
out_put_request:
fuse_put_request(fc, req);
+ out_put_forget_req:
+ fuse_put_request(fc, forget_req);
return err;
}
#endif
@@ -366,7 +415,6 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req,
int err;
req->in.h.nodeid = get_node_id(dir);
- req->inode = dir;
req->out.numargs = 1;
req->out.args[0].size = sizeof(outarg);
req->out.args[0].value = &outarg;
@@ -488,7 +536,6 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry)
req->in.h.opcode = FUSE_UNLINK;
req->in.h.nodeid = get_node_id(dir);
- req->inode = dir;
req->in.numargs = 1;
req->in.args[0].size = entry->d_name.len + 1;
req->in.args[0].value = entry->d_name.name;
@@ -520,7 +567,6 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry)
req->in.h.opcode = FUSE_RMDIR;
req->in.h.nodeid = get_node_id(dir);
- req->inode = dir;
req->in.numargs = 1;
req->in.args[0].size = entry->d_name.len + 1;
req->in.args[0].value = entry->d_name.name;
@@ -550,8 +596,6 @@ static int fuse_rename(struct inode *olddir, struct dentry *oldent,
inarg.newdir = get_node_id(newdir);
req->in.h.opcode = FUSE_RENAME;
req->in.h.nodeid = get_node_id(olddir);
- req->inode = olddir;
- req->inode2 = newdir;
req->in.numargs = 3;
req->in.args[0].size = sizeof(inarg);
req->in.args[0].value = &inarg;
@@ -598,7 +642,6 @@ static int fuse_link(struct dentry *entry, struct inode *newdir,
memset(&inarg, 0, sizeof(inarg));
inarg.oldnodeid = get_node_id(inode);
req->in.h.opcode = FUSE_LINK;
- req->inode2 = inode;
req->in.numargs = 2;
req->in.args[0].size = sizeof(inarg);
req->in.args[0].value = &inarg;
@@ -627,7 +670,6 @@ int fuse_do_getattr(struct inode *inode)
req->in.h.opcode = FUSE_GETATTR;
req->in.h.nodeid = get_node_id(inode);
- req->inode = inode;
req->out.numargs = 1;
req->out.args[0].size = sizeof(arg);
req->out.args[0].value = &arg;
@@ -700,7 +742,7 @@ static int fuse_revalidate(struct dentry *entry)
if (!fuse_allow_task(fc, current))
return -EACCES;
if (get_node_id(inode) != FUSE_ROOT_ID &&
- time_before_eq(jiffies, fi->i_time))
+ fi->i_time >= get_jiffies_64())
return 0;
return fuse_do_getattr(inode);
@@ -725,7 +767,6 @@ static int fuse_access(struct inode *inode, int mask)
inarg.mask = mask;
req->in.h.opcode = FUSE_ACCESS;
req->in.h.nodeid = get_node_id(inode);
- req->inode = inode;
req->in.numargs = 1;
req->in.args[0].size = sizeof(inarg);
req->in.args[0].value = &inarg;
@@ -882,7 +923,6 @@ static char *read_link(struct dentry *dentry)
}
req->in.h.opcode = FUSE_READLINK;
req->in.h.nodeid = get_node_id(inode);
- req->inode = inode;
req->out.argvar = 1;
req->out.numargs = 1;
req->out.args[0].size = PAGE_SIZE - 1;
@@ -1048,7 +1088,6 @@ static int fuse_setattr(struct dentry *entry, struct iattr *attr)
iattr_to_fattr(attr, &inarg);
req->in.h.opcode = FUSE_SETATTR;
req->in.h.nodeid = get_node_id(inode);
- req->inode = inode;
req->in.numargs = 1;
req->in.args[0].size = sizeof(inarg);
req->in.args[0].value = &inarg;
@@ -1145,7 +1184,6 @@ static int fuse_setxattr(struct dentry *entry, const char *name,
inarg.flags = flags;
req->in.h.opcode = FUSE_SETXATTR;
req->in.h.nodeid = get_node_id(inode);
- req->inode = inode;
req->in.numargs = 3;
req->in.args[0].size = sizeof(inarg);
req->in.args[0].value = &inarg;
@@ -1184,7 +1222,6 @@ static ssize_t fuse_getxattr(struct dentry *entry, const char *name,
inarg.size = size;
req->in.h.opcode = FUSE_GETXATTR;
req->in.h.nodeid = get_node_id(inode);
- req->inode = inode;
req->in.numargs = 2;
req->in.args[0].size = sizeof(inarg);
req->in.args[0].value = &inarg;
@@ -1234,7 +1271,6 @@ static ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
inarg.size = size;
req->in.h.opcode = FUSE_LISTXATTR;
req->in.h.nodeid = get_node_id(inode);
- req->inode = inode;
req->in.numargs = 1;
req->in.args[0].size = sizeof(inarg);
req->in.args[0].value = &inarg;
@@ -1278,7 +1314,6 @@ static int fuse_removexattr(struct dentry *entry, const char *name)
req->in.h.opcode = FUSE_REMOVEXATTR;
req->in.h.nodeid = get_node_id(inode);
- req->inode = inode;
req->in.numargs = 1;
req->in.args[0].size = strlen(name) + 1;
req->in.args[0].value = name;
diff --git a/kernel/file.c b/kernel/file.c
index dd9758c..42e56f0 100644
--- a/kernel/file.c
+++ b/kernel/file.c
@@ -40,7 +40,6 @@ static int fuse_send_open(struct inode *inode, struct file *file, int isdir,
inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
req->in.h.opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN;
req->in.h.nodeid = get_node_id(inode);
- req->inode = inode;
req->in.numargs = 1;
req->in.args[0].size = sizeof(inarg);
req->in.args[0].value = &inarg;
@@ -59,8 +58,8 @@ struct fuse_file *fuse_file_alloc(void)
struct fuse_file *ff;
ff = kmalloc(sizeof(struct fuse_file), GFP_KERNEL);
if (ff) {
- ff->release_req = fuse_request_alloc();
- if (!ff->release_req) {
+ ff->reserved_req = fuse_request_alloc();
+ if (!ff->reserved_req) {
kfree(ff);
ff = NULL;
}
@@ -70,7 +69,7 @@ struct fuse_file *fuse_file_alloc(void)
void fuse_file_free(struct fuse_file *ff)
{
- fuse_request_free(ff->release_req);
+ fuse_request_free(ff->reserved_req);
kfree(ff);
}
@@ -127,37 +126,22 @@ int fuse_open_common(struct inode *inode, struct file *file, int isdir)
return err;
}
-/* Special case for failed iget in CREATE */
-static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req)
+struct fuse_req *fuse_release_fill(struct fuse_file *ff, u64 nodeid, int flags,
+ int opcode)
{
- /* If called from end_io_requests(), req has more than one
- reference and fuse_reset_request() cannot work */
- if (fc->connected) {
- u64 nodeid = req->in.h.nodeid;
- fuse_reset_request(req);
- fuse_send_forget(fc, req, nodeid, 1);
- } else
- fuse_put_request(fc, req);
-}
-
-void fuse_send_release(struct fuse_conn *fc, struct fuse_file *ff,
- u64 nodeid, struct inode *inode, int flags, int isdir)
-{
- struct fuse_req * req = ff->release_req;
+ struct fuse_req *req = ff->reserved_req;
struct fuse_release_in *inarg = &req->misc.release_in;
inarg->fh = ff->fh;
inarg->flags = flags;
- req->in.h.opcode = isdir ? FUSE_RELEASEDIR : FUSE_RELEASE;
+ req->in.h.opcode = opcode;
req->in.h.nodeid = nodeid;
- req->inode = inode;
req->in.numargs = 1;
req->in.args[0].size = sizeof(struct fuse_release_in);
req->in.args[0].value = inarg;
- request_send_background(fc, req);
- if (!inode)
- req->end = fuse_release_end;
kfree(ff);
+
+ return req;
}
int fuse_release_common(struct inode *inode, struct file *file, int isdir)
@@ -165,8 +149,15 @@ int fuse_release_common(struct inode *inode, struct file *file, int isdir)
struct fuse_file *ff = file->private_data;
if (ff) {
struct fuse_conn *fc = get_fuse_conn(inode);
- u64 nodeid = get_node_id(inode);
- fuse_send_release(fc, ff, nodeid, inode, file->f_flags, isdir);
+ struct fuse_req *req;
+
+ req = fuse_release_fill(ff, get_node_id(inode), file->f_flags,
+ isdir ? FUSE_RELEASEDIR : FUSE_RELEASE);
+
+ /* Hold vfsmount and dentry until release is finished */
+ req->vfsmount = mntget(file->f_vfsmnt);
+ req->dentry = dget(file->f_dentry);
+ request_send_background(fc, req);
}
/* Return value is ignored by VFS */
@@ -183,7 +174,33 @@ static int fuse_release(struct inode *inode, struct file *file)
return fuse_release_common(inode, file, 0);
}
+/*
+ * Scramble the ID space with XTEA, so that the value of the files_struct
+ * pointer is not exposed to userspace.
+ */
+static u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id)
+{
+ u32 *k = fc->scramble_key;
+ u64 v = (unsigned long) id;
+ u32 v0 = v;
+ u32 v1 = v >> 32;
+ u32 sum = 0;
+ int i;
+
+ for (i = 0; i < 32; i++) {
+ v0 += ((v1 << 4 ^ v1 >> 5) + v1) ^ (sum + k[sum & 3]);
+ sum += 0x9E3779B9;
+ v1 += ((v0 << 4 ^ v0 >> 5) + v0) ^ (sum + k[sum>>11 & 3]);
+ }
+
+ return (u64) v0 + ((u64) v1 << 32);
+}
+
+#ifdef KERNEL_2_6_18_PLUS
+static int fuse_flush(struct file *file, fl_owner_t id)
+#else
static int fuse_flush(struct file *file)
+#endif
{
struct inode *inode = file->f_dentry->d_inode;
struct fuse_conn *fc = get_fuse_conn(inode);
@@ -198,19 +215,20 @@ static int fuse_flush(struct file *file)
if (fc->no_flush)
return 0;
- req = fuse_get_req(fc);
- if (IS_ERR(req))
- return PTR_ERR(req);
-
+ req = fuse_get_req_nofail(fc, file);
memset(&inarg, 0, sizeof(inarg));
inarg.fh = ff->fh;
+#ifdef KERNEL_2_6_18_PLUS
+ inarg.lock_owner = fuse_lock_owner_id(fc, id);
+#else
+ inarg.lock_owner = fuse_lock_owner_id(fc, NULL);
+#endif
req->in.h.opcode = FUSE_FLUSH;
req->in.h.nodeid = get_node_id(inode);
- req->inode = inode;
- req->file = file;
req->in.numargs = 1;
req->in.args[0].size = sizeof(inarg);
req->in.args[0].value = &inarg;
+ req->force = 1;
request_send(fc, req);
err = req->out.h.error;
fuse_put_request(fc, req);
@@ -246,8 +264,6 @@ int fuse_fsync_common(struct file *file, struct dentry *de, int datasync,
inarg.fsync_flags = datasync ? 1 : 0;
req->in.h.opcode = isdir ? FUSE_FSYNCDIR : FUSE_FSYNC;
req->in.h.nodeid = get_node_id(inode);
- req->inode = inode;
- req->file = file;
req->in.numargs = 1;
req->in.args[0].size = sizeof(inarg);
req->in.args[0].value = &inarg;
@@ -280,8 +296,6 @@ void fuse_read_fill(struct fuse_req *req, struct file *file,
inarg->size = count;
req->in.h.opcode = opcode;
req->in.h.nodeid = get_node_id(inode);
- req->inode = inode;
- req->file = file;
req->in.numargs = 1;
req->in.args[0].size = sizeof(struct fuse_read_in);
req->in.args[0].value = inarg;
@@ -357,6 +371,8 @@ static void fuse_send_readpages(struct fuse_req *req, struct file *file,
req->out.page_zeroing = 1;
fuse_read_fill(req, file, inode, pos, count, FUSE_READ);
if (fc->async_read) {
+ get_file(file);
+ req->file = file;
req->end = fuse_readpages_end;
request_send_background(fc, req);
} else {
@@ -534,8 +550,6 @@ static size_t fuse_send_write(struct fuse_req *req, struct file *file,
inarg.size = count;
req->in.h.opcode = FUSE_WRITE;
req->in.h.nodeid = get_node_id(inode);
- req->inode = inode;
- req->file = file;
req->in.argpages = 1;
req->in.numargs = 2;
req->in.args[0].size = sizeof(struct fuse_write_in);
@@ -763,6 +777,136 @@ static int fuse_set_page_dirty(struct page *page)
}
#endif
+static int convert_fuse_file_lock(const struct fuse_file_lock *ffl,
+ struct file_lock *fl)
+{
+ switch (ffl->type) {
+ case F_UNLCK:
+ break;
+
+ case F_RDLCK:
+ case F_WRLCK:
+ if (ffl->start > OFFSET_MAX || ffl->end > OFFSET_MAX ||
+ ffl->end < ffl->start)
+ return -EIO;
+
+ fl->fl_start = ffl->start;
+ fl->fl_end = ffl->end;
+ fl->fl_pid = ffl->pid;
+ break;
+
+ default:
+ return -EIO;
+ }
+ fl->fl_type = ffl->type;
+ return 0;
+}
+
+static void fuse_lk_fill(struct fuse_req *req, struct file *file,
+ const struct file_lock *fl, int opcode, pid_t pid)
+{
+ struct inode *inode = file->f_dentry->d_inode;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_file *ff = file->private_data;
+ struct fuse_lk_in *arg = &req->misc.lk_in;
+
+ arg->fh = ff->fh;
+ arg->owner = fuse_lock_owner_id(fc, fl->fl_owner);
+ arg->lk.start = fl->fl_start;
+ arg->lk.end = fl->fl_end;
+ arg->lk.type = fl->fl_type;
+ arg->lk.pid = pid;
+ req->in.h.opcode = opcode;
+ req->in.h.nodeid = get_node_id(inode);
+ req->in.numargs = 1;
+ req->in.args[0].size = sizeof(*arg);
+ req->in.args[0].value = arg;
+}
+
+static int fuse_getlk(struct file *file, struct file_lock *fl)
+{
+ struct inode *inode = file->f_dentry->d_inode;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_req *req;
+ struct fuse_lk_out outarg;
+ int err;
+
+ req = fuse_get_req(fc);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
+ fuse_lk_fill(req, file, fl, FUSE_GETLK, 0);
+ req->out.numargs = 1;
+ req->out.args[0].size = sizeof(outarg);
+ req->out.args[0].value = &outarg;
+ request_send(fc, req);
+ err = req->out.h.error;
+ fuse_put_request(fc, req);
+ if (!err)
+ err = convert_fuse_file_lock(&outarg.lk, fl);
+
+ return err;
+}
+
+static int fuse_setlk(struct file *file, struct file_lock *fl)
+{
+ struct inode *inode = file->f_dentry->d_inode;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_req *req;
+ int opcode = (fl->fl_flags & FL_SLEEP) ? FUSE_SETLKW : FUSE_SETLK;
+ pid_t pid = fl->fl_type != F_UNLCK ? current->tgid : 0;
+ int err;
+
+#ifdef KERNEL_2_6_18_PLUS
+ /* Unlock on close is handled by the flush method */
+ if (fl->fl_flags & FL_CLOSE)
+ return 0;
+#endif
+
+ req = fuse_get_req(fc);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
+ fuse_lk_fill(req, file, fl, opcode, pid);
+ request_send(fc, req);
+ err = req->out.h.error;
+ /* locking is restartable */
+ if (err == -EINTR)
+ err = -ERESTARTSYS;
+ fuse_put_request(fc, req);
+ return err;
+}
+
+static int fuse_file_lock(struct file *file, int cmd, struct file_lock *fl)
+{
+ struct inode *inode = file->f_dentry->d_inode;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ int err;
+
+ if (cmd == F_GETLK) {
+ if (fc->no_lock) {
+#ifdef KERNEL_2_6_17_PLUS
+ if (!posix_test_lock(file, fl, fl))
+ fl->fl_type = F_UNLCK;
+#else
+ struct file_lock *cfl = posix_test_lock(file, fl);
+ if (!cfl)
+ fl->fl_type = F_UNLCK;
+ else
+ *fl = *cfl;
+#endif
+ err = 0;
+ } else
+ err = fuse_getlk(file, fl);
+ } else {
+ if (fc->no_lock)
+ err = posix_lock_file_wait(file, fl);
+ else
+ err = fuse_setlk(file, fl);
+ }
+ return err;
+}
+
static struct file_operations fuse_file_operations = {
.llseek = generic_file_llseek,
#ifdef KERNEL_2_6
@@ -777,6 +921,7 @@ static struct file_operations fuse_file_operations = {
.flush = fuse_flush,
.release = fuse_release,
.fsync = fuse_fsync,
+ .lock = fuse_file_lock,
#ifdef KERNEL_2_6
.sendfile = generic_file_sendfile,
#endif
@@ -790,6 +935,7 @@ static struct file_operations fuse_direct_io_file_operations = {
.flush = fuse_flush,
.release = fuse_release,
.fsync = fuse_fsync,
+ .lock = fuse_file_lock,
/* no mmap and sendfile */
};
diff --git a/kernel/fuse_i.h b/kernel/fuse_i.h
index 1693b05..5f718a2 100644
--- a/kernel/fuse_i.h
+++ b/kernel/fuse_i.h
@@ -43,6 +43,12 @@
# if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
# define KERNEL_2_6_16_PLUS
# endif
+# if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,17)
+# define KERNEL_2_6_17_PLUS
+# endif
+# if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,18)
+# define KERNEL_2_6_18_PLUS
+# endif
#endif
#include "config.h"
@@ -62,6 +68,7 @@
#endif /* KERNEL_2_6 */
#endif /* FUSE_MAINLINE */
#include <linux/fs.h>
+#include <linux/mount.h>
#include <linux/wait.h>
#include <linux/list.h>
#include <linux/spinlock.h>
@@ -69,7 +76,14 @@
#include <linux/mm.h>
#include <linux/backing-dev.h>
#endif
+#ifdef KERNEL_2_6_17_PLUS
+#include <linux/mutex.h>
+#else
#include <asm/semaphore.h>
+#define DEFINE_MUTEX(m) DECLARE_MUTEX(m)
+#define mutex_lock(m) down(m)
+#define mutex_unlock(m) up(m)
+#endif
#ifndef BUG_ON
#define BUG_ON(x)
@@ -90,22 +104,6 @@ static inline void set_page_dirty_lock(struct page *page)
set_page_dirty(page);
unlock_page(page);
}
-
-struct kobject {
- atomic_t count;
- void (*release) (struct kobject *);
-};
-
-static inline void kobject_get(struct kobject *kobj)
-{
- atomic_inc(&kobj->count);
-}
-
-static inline void kobject_put(struct kobject *kobj)
-{
- if (atomic_dec_and_test(&kobj->count))
- kobj->release(kobj);
-}
#endif
/** Max number of pages that can be used in a single read request */
#define FUSE_MAX_PAGES_PER_REQ 32
@@ -116,6 +114,9 @@ static inline void kobject_put(struct kobject *kobj)
/** It could be as large as PATH_MAX, but would that have any uses? */
#define FUSE_NAME_MAX 1024
+/** Number of dentries for each connection in the control filesystem */
+#define FUSE_CTL_NUM_DENTRIES 3
+
/** If the FUSE_DEFAULT_PERMISSIONS flag is given, the filesystem
module will check permissions based on the file mode. Otherwise no
permission checking is done in the kernel */
@@ -125,6 +126,15 @@ static inline void kobject_put(struct kobject *kobj)
doing the mount will be allowed to access the filesystem */
#define FUSE_ALLOW_OTHER (1 << 1)
+/** List of active connections */
+extern struct list_head fuse_conn_list;
+
+/** Global mutex protecting fuse_conn_list and the control filesystem */
+#ifdef KERNEL_2_6_17_PLUS
+extern struct mutex fuse_mutex;
+#else
+extern struct semaphore fuse_mutex;
+#endif
#ifndef KERNEL_2_6
/** Allow FUSE to combine reads into 64k chunks. This is useful if
the filesystem is better at handling large chunks */
@@ -147,13 +157,13 @@ struct fuse_inode {
struct fuse_req *forget_req;
/** Time in jiffies until the file attributes are valid */
- unsigned long i_time;
+ u64 i_time;
};
/** FUSE specific file data */
struct fuse_file {
/** Request reserved for flush and release */
- struct fuse_req *release_req;
+ struct fuse_req *reserved_req;
/** File handle used by userspace */
u64 fh;
@@ -219,6 +229,7 @@ enum fuse_req_state {
FUSE_REQ_PENDING,
FUSE_REQ_READING,
FUSE_REQ_SENT,
+ FUSE_REQ_WRITING,
FUSE_REQ_FINISHED
};
@@ -232,12 +243,15 @@ struct fuse_req {
fuse_conn */
struct list_head list;
- /** Entry on the background list */
- struct list_head bg_entry;
+ /** Entry on the interrupts list */
+ struct list_head intr_entry;
/** refcount */
atomic_t count;
+ /** Unique ID for the interrupt request */
+ u64 intr_unique;
+
/*
* The following bitfields are either set once before the
* request is queued or setting/clearing them is protected by
@@ -247,12 +261,18 @@ struct fuse_req {
/** True if the request has reply */
unsigned isreply:1;
- /** The request was interrupted */
- unsigned interrupted:1;
+ /** Force sending of the request even if interrupted */
+ unsigned force:1;
+
+ /** The request was aborted */
+ unsigned aborted:1;
/** Request is sent in the background */
unsigned background:1;
+ /** The request has been interrupted */
+ unsigned interrupted:1;
+
/** Data is being copied to/from the request */
unsigned locked:1;
@@ -278,6 +298,7 @@ struct fuse_req {
struct fuse_init_in init_in;
struct fuse_init_out init_out;
struct fuse_read_in read_in;
+ struct fuse_lk_in lk_in;
} misc;
/** page vector */
@@ -289,17 +310,20 @@ struct fuse_req {
/** offset of data on first page */
unsigned page_offset;
- /** Inode used in the request */
- struct inode *inode;
-
- /** Second inode used in the request (or NULL) */
- struct inode *inode2;
-
/** File used in the request (or NULL) */
struct file *file;
+ /** vfsmount used in release */
+ struct vfsmount *vfsmount;
+
+ /** dentry used in release */
+ struct dentry *dentry;
+
/** Request completion callback */
void (*end)(struct fuse_conn *, struct fuse_req *);
+
+ /** Request is stolen from fuse_file->reserved_req */
+ struct file *stolen_file;
};
/**
@@ -313,6 +337,9 @@ struct fuse_conn {
/** Lock protecting accessess to members of this structure */
spinlock_t lock;
+ /** Refcount */
+ atomic_t count;
+
/** The user id for this mount */
uid_t user_id;
@@ -340,13 +367,12 @@ struct fuse_conn {
/** The list of requests under I/O */
struct list_head io;
- /** Requests put in the background (RELEASE or any other
- interrupted request) */
- struct list_head background;
-
/** Number of requests currently in the background */
unsigned num_background;
+ /** Pending interrupts */
+ struct list_head interrupts;
+
/** Flag indicating if connection is blocked. This will be
the case before the INIT reply is received, and if there
are too many outstading backgrounds requests */
@@ -355,15 +381,9 @@ struct fuse_conn {
/** waitq for blocked connection */
wait_queue_head_t blocked_waitq;
- /** RW semaphore for exclusion with fuse_put_super() */
- struct rw_semaphore sbput_sem;
-
/** The next unique request id */
u64 reqctr;
- /** Mount is active */
- unsigned mounted;
-
/** Connection established, cleared on umount, connection
abort and device release */
unsigned connected;
@@ -402,12 +422,18 @@ struct fuse_conn {
/** Is removexattr not implemented by fs? */
unsigned no_removexattr : 1;
+ /** Are file locking primitives not implemented by fs? */
+ unsigned no_lock : 1;
+
/** Is access not implemented by fs? */
unsigned no_access : 1;
/** Is create not implemented by fs? */
unsigned no_create : 1;
+ /** Is interrupt not implemented by fs? */
+ unsigned no_interrupt : 1;
+
/** The number of requests waiting for completion */
atomic_t num_waiting;
@@ -419,11 +445,23 @@ struct fuse_conn {
struct backing_dev_info bdi;
#endif
- /** kobject */
- struct kobject kobj;
+ /** Entry on the fuse_conn_list */
+ struct list_head entry;
+
+ /** Unique ID */
+ u64 id;
+
+ /** Dentries in the control filesystem */
+ struct dentry *ctl_dentry[FUSE_CTL_NUM_DENTRIES];
+
+ /** number of dentries used in the above array */
+ int ctl_ndents;
/** O_ASYNC requests */
struct fasync_struct *fasync;
+
+ /** Key for lock owner ID scrambling */
+ u32 scramble_key[4];
};
static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb)
@@ -436,11 +474,6 @@ static inline struct fuse_conn *get_fuse_conn(struct inode *inode)
return get_fuse_conn_super(inode->i_sb);
}
-static inline struct fuse_conn *get_fuse_conn_kobj(struct kobject *obj)
-{
- return container_of(obj, struct fuse_conn, kobj);
-}
-
static inline struct fuse_inode *get_fuse_inode(struct inode *inode)
{
return container_of(inode, struct fuse_inode, inode);
@@ -482,12 +515,9 @@ void fuse_file_free(struct fuse_file *ff);
void fuse_finish_open(struct inode *inode, struct file *file,
struct fuse_file *ff, struct fuse_open_out *outarg);
-/**
- * Send a RELEASE request
- */
-void fuse_send_release(struct fuse_conn *fc, struct fuse_file *ff,
- u64 nodeid, struct inode *inode, int flags, int isdir);
-
+/** */
+struct fuse_req *fuse_release_fill(struct fuse_file *ff, u64 nodeid, int flags,
+ int opcode);
/**
* Send RELEASE or RELEASEDIR request
*/
@@ -534,6 +564,9 @@ int fuse_dev_init(void);
*/
void fuse_dev_cleanup(void);
+int fuse_ctl_init(void);
+void fuse_ctl_cleanup(void);
+
/**
* Allocate a request
*/
@@ -545,14 +578,14 @@ struct fuse_req *fuse_request_alloc(void);
void fuse_request_free(struct fuse_req *req);
/**
- * Reinitialize a request, the preallocated flag is left unmodified
+ * Get a request, may fail with -ENOMEM
*/
-void fuse_reset_request(struct fuse_req *req);
+struct fuse_req *fuse_get_req(struct fuse_conn *fc);
/**
- * Reserve a preallocated request
+ * Gets a requests for a file operation, always succeeds
*/
-struct fuse_req *fuse_get_req(struct fuse_conn *fc);
+struct fuse_req *fuse_get_req_nofail(struct fuse_conn *fc, struct file *file);
/**
* Decrement reference count of a request. If count goes to zero free
@@ -575,12 +608,7 @@ void request_send_noreply(struct fuse_conn *fc, struct fuse_req *req);
*/
void request_send_background(struct fuse_conn *fc, struct fuse_req *req);
-/**
- * Release inodes and file associated with background request
- */
-void fuse_release_background(struct fuse_conn *fc, struct fuse_req *req);
-
-/** Abort all requests */
+/* Abort all requests */
void fuse_abort_conn(struct fuse_conn *fc);
/**
@@ -592,3 +620,23 @@ int fuse_do_getattr(struct inode *inode);
* Invalidate inode attributes
*/
void fuse_invalidate_attr(struct inode *inode);
+
+/**
+ * Acquire reference to fuse_conn
+ */
+struct fuse_conn *fuse_conn_get(struct fuse_conn *fc);
+
+/**
+ * Release reference to fuse_conn
+ */
+void fuse_conn_put(struct fuse_conn *fc);
+
+/**
+ * Add connection to control filesystem
+ */
+int fuse_ctl_add_conn(struct fuse_conn *fc);
+
+/**
+ * Remove connection from control filesystem
+ */
+void fuse_ctl_remove_conn(struct fuse_conn *fc);
diff --git a/kernel/inode.c b/kernel/inode.c
index a2f144c..0eb8ced 100644
--- a/kernel/inode.c
+++ b/kernel/inode.c
@@ -11,13 +11,13 @@
#include <linux/pagemap.h>
#include <linux/slab.h>
#include <linux/file.h>
-#include <linux/mount.h>
#include <linux/seq_file.h>
#include <linux/init.h>
#include <linux/module.h>
#ifdef KERNEL_2_6
#include <linux/parser.h>
#include <linux/statfs.h>
+#include <linux/random.h>
#else
#include "compat/parser.h"
#endif
@@ -29,15 +29,8 @@ MODULE_LICENSE("GPL");
#endif
static kmem_cache_t *fuse_inode_cachep;
-#ifdef KERNEL_2_6
-static struct subsystem connections_subsys;
-
-struct fuse_conn_attr {
- struct attribute attr;
- ssize_t (*show)(struct fuse_conn *, char *);
- ssize_t (*store)(struct fuse_conn *, const char *, size_t);
-};
-#endif
+struct list_head fuse_conn_list;
+DEFINE_MUTEX(fuse_mutex);
#define FUSE_SUPER_MAGIC 0x65735546
@@ -73,7 +66,7 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
inode->u.generic_ip = NULL;
#endif
fi = get_fuse_inode(inode);
- fi->i_time = jiffies - 1;
+ fi->i_time = 0;
fi->nodeid = 0;
fi->nlookup = 0;
fi->forget_req = fuse_request_alloc();
@@ -121,6 +114,14 @@ static void fuse_clear_inode(struct inode *inode)
}
}
+static int fuse_remount_fs(struct super_block *sb, int *flags, char *data)
+{
+ if (*flags & MS_MANDLOCK)
+ return -EINVAL;
+
+ return 0;
+}
+
void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr)
{
if (S_ISREG(inode->i_mode) && i_size_read(inode) != attr->size)
@@ -278,24 +279,19 @@ static void fuse_put_super(struct super_block *sb)
{
struct fuse_conn *fc = get_fuse_conn_super(sb);
- down_write(&fc->sbput_sem);
- while (!list_empty(&fc->background))
- fuse_release_background(fc,
- list_entry(fc->background.next,
- struct fuse_req, bg_entry));
-
spin_lock(&fc->lock);
- fc->mounted = 0;
fc->connected = 0;
+ fc->blocked = 0;
spin_unlock(&fc->lock);
- up_write(&fc->sbput_sem);
/* Flush all readers on this fs */
kill_fasync(&fc->fasync, SIGIO, POLL_IN);
wake_up_all(&fc->waitq);
-#ifdef KERNEL_2_6
- kobject_del(&fc->kobj);
-#endif
- kobject_put(&fc->kobj);
+ wake_up_all(&fc->blocked_waitq);
+ mutex_lock(&fuse_mutex);
+ list_del(&fc->entry);
+ fuse_ctl_remove_conn(fc);
+ mutex_unlock(&fuse_mutex);
+ fuse_conn_put(fc);
}
static void convert_fuse_statfs(struct kstatfs *stbuf, struct fuse_kstatfs *attr)
@@ -314,8 +310,16 @@ static void convert_fuse_statfs(struct kstatfs *stbuf, struct fuse_kstatfs *attr
/* fsid is left zero */
}
+#ifdef KERNEL_2_6_18_PLUS
+static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf)
+{
+#else
static int fuse_statfs(struct super_block *sb, struct kstatfs *buf)
+#endif
{
+#ifdef KERNEL_2_6_18_PLUS
+ struct super_block *sb = dentry->d_sb;
+#endif
struct fuse_conn *fc = get_fuse_conn_super(sb);
struct fuse_req *req;
struct fuse_statfs_out outarg;
@@ -328,6 +332,9 @@ static int fuse_statfs(struct super_block *sb, struct kstatfs *buf)
memset(&outarg, 0, sizeof(outarg));
req->in.numargs = 0;
req->in.h.opcode = FUSE_STATFS;
+#ifdef KERNEL_2_6_18_PLUS
+ req->in.h.nodeid = get_node_id(dentry->d_inode);
+#endif
req->out.numargs = 1;
req->out.args[0].size =
fc->minor < 4 ? FUSE_COMPAT_STATFS_SIZE : sizeof(outarg);
@@ -461,11 +468,6 @@ static int fuse_show_options(struct seq_file *m, struct vfsmount *mnt)
return 0;
}
-static void fuse_conn_release(struct kobject *kobj)
-{
- kfree(get_fuse_conn_kobj(kobj));
-}
-
#ifndef HAVE_KZALLOC
static void *kzalloc(size_t size, int flags)
{
@@ -482,20 +484,13 @@ static struct fuse_conn *new_conn(void)
fc = kzalloc(sizeof(*fc), GFP_KERNEL);
if (fc) {
spin_lock_init(&fc->lock);
+ atomic_set(&fc->count, 1);
init_waitqueue_head(&fc->waitq);
init_waitqueue_head(&fc->blocked_waitq);
INIT_LIST_HEAD(&fc->pending);
INIT_LIST_HEAD(&fc->processing);
INIT_LIST_HEAD(&fc->io);
- INIT_LIST_HEAD(&fc->background);
- init_rwsem(&fc->sbput_sem);
-#ifdef KERNEL_2_6
- kobj_set_kset_s(fc, connections_subsys);
- kobject_init(&fc->kobj);
-#else
- atomic_set(&fc->kobj.count, 1);
- fc->kobj.release = fuse_conn_release;
-#endif
+ INIT_LIST_HEAD(&fc->interrupts);
atomic_set(&fc->num_waiting, 0);
#ifdef KERNEL_2_6_6_PLUS
fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
@@ -503,10 +498,23 @@ static struct fuse_conn *new_conn(void)
#endif
fc->reqctr = 0;
fc->blocked = 1;
+ get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key));
}
return fc;
}
+void fuse_conn_put(struct fuse_conn *fc)
+{
+ if (atomic_dec_and_test(&fc->count))
+ kfree(fc);
+}
+
+struct fuse_conn *fuse_conn_get(struct fuse_conn *fc)
+{
+ atomic_inc(&fc->count);
+ return fc;
+}
+
static struct inode *get_root_inode(struct super_block *sb, unsigned mode)
{
struct fuse_attr attr;
@@ -586,6 +594,7 @@ static struct super_operations fuse_super_operations = {
.destroy_inode = fuse_destroy_inode,
.read_inode = fuse_read_inode,
.clear_inode = fuse_clear_inode,
+ .remount_fs = fuse_remount_fs,
.put_super = fuse_put_super,
.umount_begin = fuse_umount_begin,
.statfs = fuse_statfs,
@@ -606,8 +615,12 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
ra_pages = arg->max_readahead / PAGE_CACHE_SIZE;
if (arg->flags & FUSE_ASYNC_READ)
fc->async_read = 1;
- } else
+ if (!(arg->flags & FUSE_POSIX_LOCKS))
+ fc->no_lock = 1;
+ } else {
ra_pages = fc->max_read / PAGE_CACHE_SIZE;
+ fc->no_lock = 1;
+ }
fc->bdi.ra_pages = min(fc->bdi.ra_pages, ra_pages);
#endif
@@ -627,7 +640,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
arg->minor = FUSE_KERNEL_MINOR_VERSION;
#ifdef KERNEL_2_6
arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE;
- arg->flags |= FUSE_ASYNC_READ;
+ arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS;
#endif
req->in.h.opcode = FUSE_INIT;
req->in.numargs = 1;
@@ -644,14 +657,11 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
request_send_background(fc, req);
}
-#ifdef KERNEL_2_6
-static unsigned long long conn_id(void)
+static u64 conn_id(void)
{
- /* BKL is held for ->get_sb() */
- static unsigned long long ctr = 1;
+ static u64 ctr = 1;
return ctr++;
}
-#endif
static int fuse_fill_super(struct super_block *sb, void *data, int silent)
{
@@ -663,6 +673,9 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
struct fuse_req *init_req;
int err;
+ if (sb->s_flags & MS_MANDLOCK)
+ return -EINVAL;
+
if (!parse_fuse_opt((char *) data, &d))
return -EINVAL;
@@ -711,27 +724,21 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
if (!init_req)
goto err_put_root;
-#ifdef KERNEL_2_6
- err = kobject_set_name(&fc->kobj, "%llu", conn_id());
- if (err)
- goto err_free_req;
-
- err = kobject_add(&fc->kobj);
- if (err)
- goto err_free_req;
-#endif
-
- /* Setting file->private_data can't race with other mount()
- instances, since BKL is held for ->get_sb() */
+ mutex_lock(&fuse_mutex);
err = -EINVAL;
if (file->private_data)
- goto err_kobject_del;
+ goto err_unlock;
+
+ fc->id = conn_id();
+ err = fuse_ctl_add_conn(fc);
+ if (err)
+ goto err_unlock;
+ list_add_tail(&fc->entry, &fuse_conn_list);
sb->s_root = root_dentry;
- fc->mounted = 1;
fc->connected = 1;
- kobject_get(&fc->kobj);
- file->private_data = fc;
+ file->private_data = fuse_conn_get(fc);
+ mutex_unlock(&fuse_mutex);
/*
* atomic_dec_and_test() in fput() provides the necessary
* memory barrier for file->private_data to be visible on all
@@ -743,17 +750,14 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
return 0;
- err_kobject_del:
-#ifdef KERNEL_2_6
- kobject_del(&fc->kobj);
- err_free_req:
+ err_unlock:
+ mutex_unlock(&fuse_mutex);
fuse_request_free(init_req);
-#endif
err_put_root:
dput(root_dentry);
err:
fput(file);
- kobject_put(&fc->kobj);
+ fuse_conn_put(fc);
return err;
}
@@ -786,78 +790,11 @@ static DECLARE_FSTYPE(fuse_fs_type, "fuse", fuse_read_super_compat, 0);
#endif
#ifdef KERNEL_2_6
-static ssize_t fuse_conn_waiting_show(struct fuse_conn *fc, char *page)
-{
- return sprintf(page, "%i\n", atomic_read(&fc->num_waiting));
-}
-
-static ssize_t fuse_conn_abort_store(struct fuse_conn *fc, const char *page,
- size_t count)
-{
- fuse_abort_conn(fc);
- return count;
-}
-
-#ifndef __ATTR
-#define __ATTR(_name,_mode,_show,_store) { \
- .attr = {.name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE }, \
- .show = _show, \
- .store = _store, \
-}
-#endif
-static struct fuse_conn_attr fuse_conn_waiting =
- __ATTR(waiting, 0400, fuse_conn_waiting_show, NULL);
-static struct fuse_conn_attr fuse_conn_abort =
- __ATTR(abort, 0600, NULL, fuse_conn_abort_store);
-
-static struct attribute *fuse_conn_attrs[] = {
- &fuse_conn_waiting.attr,
- &fuse_conn_abort.attr,
- NULL,
-};
-
-static ssize_t fuse_conn_attr_show(struct kobject *kobj,
- struct attribute *attr,
- char *page)
-{
- struct fuse_conn_attr *fca =
- container_of(attr, struct fuse_conn_attr, attr);
-
- if (fca->show)
- return fca->show(get_fuse_conn_kobj(kobj), page);
- else
- return -EACCES;
-}
-
-static ssize_t fuse_conn_attr_store(struct kobject *kobj,
- struct attribute *attr,
- const char *page, size_t count)
-{
- struct fuse_conn_attr *fca =
- container_of(attr, struct fuse_conn_attr, attr);
-
- if (fca->store)
- return fca->store(get_fuse_conn_kobj(kobj), page, count);
- else
- return -EACCES;
-}
-
-static struct sysfs_ops fuse_conn_sysfs_ops = {
- .show = &fuse_conn_attr_show,
- .store = &fuse_conn_attr_store,
-};
-
-static struct kobj_type ktype_fuse_conn = {
- .release = fuse_conn_release,
- .sysfs_ops = &fuse_conn_sysfs_ops,
- .default_attrs = fuse_conn_attrs,
-};
-
#ifndef HAVE_FS_SUBSYS
static decl_subsys(fs, NULL, NULL);
#endif
static decl_subsys(fuse, NULL, NULL);
-static decl_subsys(connections, &ktype_fuse_conn, NULL);
+static decl_subsys(connections, NULL, NULL);
#endif /* KERNEL_2_6 */
static void fuse_inode_init_once(void *foo, kmem_cache_t *cachep,
@@ -956,6 +893,7 @@ static int __init fuse_init(void)
printk("fuse distribution version: %s\n", FUSE_VERSION);
#endif
+ INIT_LIST_HEAD(&fuse_conn_list);
res = fuse_fs_init();
if (res)
goto err;
@@ -968,8 +906,14 @@ static int __init fuse_init(void)
if (res)
goto err_dev_cleanup;
+ res = fuse_ctl_init();
+ if (res)
+ goto err_sysfs_cleanup;
+
return 0;
+ err_sysfs_cleanup:
+ fuse_sysfs_cleanup();
err_dev_cleanup:
fuse_dev_cleanup();
err_fs_cleanup:
@@ -982,6 +926,7 @@ static void __exit fuse_exit(void)
{
printk(KERN_DEBUG "fuse exit\n");
+ fuse_ctl_cleanup();
fuse_sysfs_cleanup();
fuse_fs_cleanup();
fuse_dev_cleanup();