aboutsummaryrefslogtreecommitdiff
path: root/drivers/staging/lustre/lustre/llite/file.c
diff options
context:
space:
mode:
authorJinshan Xiong <jinshan.xiong@intel.com>2013-11-19 21:23:41 +0800
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2013-11-19 10:29:44 -0800
commitd3a8a4e2d5e42e80625d734bb2bc5f71ecd9ffc1 (patch)
tree901ee6e5acf43ec3159891f2a961b945f6c4641f /drivers/staging/lustre/lustre/llite/file.c
parent86c17c0b5eed0bb4e1e61d5889a80f56bf59c93f (diff)
staging/lustre/hsm: Implementation of exclusive open
Proposed way to do exclusive open: 0. First of all, we have to perform most of the work in kernel space; 1. Client exclusively opens the file with IT_RELEASE_OPEN. 1.1 exclusive open means the open will fail if the file is being opened by somebody else; 2. the MDT will handle IT_RELEASE as follows: 2.1 Revoke OPEN_LOCK on this file, just request EX mode of MDS_INODELOCK_OPEN lock and release it; 2.2 Acquire a rwsem, say open_rwsem, with write mode before trying to the file; for the normal open, it should acquire read mode of open_rwsem; 2.3 Check if the file is already being opened by others, if not return with -EBUSY; 2.4 Check if the file can be released; 2.5 Set a special flag in struct mdt_file_data to mark this open is exclusive; 2.5 Release open rwsem. >From now on, if the file is opened by others, it will mark mdt_file_data that the exclusive open is broken. 3. Client: if IT_RELEASE_OPEN is finished successfully, and do followings: 3.1 Acquire full PW or EX extent lock to flush dirty cache; 3.2 Pack the handle of layout lock, data version and other attars; 3.3 Close the file with IT_RELEASE_CLOSE. 4. Back to MDT to handle IT_RELEASE CLOSE: 4.1 Grab the open_rwsem 4.2 Check if the exclusive open has ever been broken, in that case, the RELEASE process will fail; 4.3 Verify the data version matches archive; 4.4 Grab EX layout lock 4.5 Swap the layout 4.6 Release EX layout lock 4.7 Close the exclusive open Basically we avoid granting EX layout lock back to client and introduce exclusive open so that we know it if the file has ever being accessed. I hope this can simplify things a little bit. Also, exclusive open can be used to implement file lease. In this patch, a framework of lease is implemented. However, only exclusive lease is supported right now. To apply a lease, MDS_OPEN_LEASE must be set to open the file, EX mode open lock is returned to the client side to hold a lease. From that time on, if this file is opened again by other processes, the open lock will be revoked so the client who holds the lease will know the lease is already broken by checking that open lock. To release a lease, normal close is used. The client will revoke the open lock before sending CLOSE request. Lease can be applied in two ways. ll_lease_open()/close() can be called directly if the lease holder is in kernel space; or if the lease holder lives in user space, it has to open the file first and then use ioctl() with command LL_IOC_SET_LEASE to apply a lease. The lease holder has to poll the lease status itself. Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-2919 Lustre-change: http://review.whamcloud.com/6730 Signed-off-by: Jinshan Xiong <jinshan.xiong@intel.com> Signed-off-by: John L. Hammond <john.hammond@intel.com> Reviewed-by: Andreas Dilger <andreas.dilger@intel.com> Reviewed-by: Oleg Drokin <oleg.drokin@intel.com> Signed-off-by: Peng Tao <bergwolf@gmail.com> Signed-off-by: Andreas Dilger <andreas.dilger@intel.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'drivers/staging/lustre/lustre/llite/file.c')
-rw-r--r--drivers/staging/lustre/lustre/llite/file.c297
1 files changed, 296 insertions, 1 deletions
diff --git a/drivers/staging/lustre/lustre/llite/file.c b/drivers/staging/lustre/lustre/llite/file.c
index 17064b4f617..971409ed52e 100644
--- a/drivers/staging/lustre/lustre/llite/file.c
+++ b/drivers/staging/lustre/lustre/llite/file.c
@@ -241,6 +241,24 @@ int ll_md_close(struct obd_export *md_exp, struct inode *inode,
if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED))
ll_put_grouplock(inode, file, fd->fd_grouplock.cg_gid);
+ if (fd->fd_lease_och != NULL) {
+ bool lease_broken;
+
+ /* Usually the lease is not released when the
+ * application crashed, we need to release here. */
+ rc = ll_lease_close(fd->fd_lease_och, inode, &lease_broken);
+ CDEBUG(rc ? D_ERROR : D_INODE, "Clean up lease "DFID" %d/%d\n",
+ PFID(&lli->lli_fid), rc, lease_broken);
+
+ fd->fd_lease_och = NULL;
+ }
+
+ if (fd->fd_och != NULL) {
+ rc = ll_close_inode_openhandle(md_exp, inode, fd->fd_och);
+ fd->fd_och = NULL;
+ GOTO(out, rc);
+ }
+
/* Let's see if we have good enough OPEN lock on the file and if
we can skip talking to MDS */
if (file->f_dentry->d_inode) { /* Can this ever be false? */
@@ -277,6 +295,7 @@ int ll_md_close(struct obd_export *md_exp, struct inode *inode,
file, file->f_dentry, file->f_dentry->d_name.name);
}
+out:
LUSTRE_FPRIVATE(file) = NULL;
ll_file_data_put(fd);
ll_capa_close(inode);
@@ -440,6 +459,7 @@ static int ll_och_fill(struct obd_export *md_exp, struct lookup_intent *it,
body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
och->och_fh = body->handle;
och->och_fid = body->fid1;
+ och->och_lease_handle.cookie = it->d.lustre.it_lock_handle;
och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
och->och_flags = it->it_flags;
@@ -471,7 +491,7 @@ int ll_local_open(struct file *file, struct lookup_intent *it,
LUSTRE_FPRIVATE(file) = fd;
ll_readahead_init(inode, &fd->fd_ras);
- fd->fd_omode = it->it_flags;
+ fd->fd_omode = it->it_flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC);
return 0;
}
@@ -673,6 +693,196 @@ out_openerr:
return rc;
}
+static int ll_md_blocking_lease_ast(struct ldlm_lock *lock,
+ struct ldlm_lock_desc *desc, void *data, int flag)
+{
+ int rc;
+ struct lustre_handle lockh;
+
+ switch (flag) {
+ case LDLM_CB_BLOCKING:
+ ldlm_lock2handle(lock, &lockh);
+ rc = ldlm_cli_cancel(&lockh, LCF_ASYNC);
+ if (rc < 0) {
+ CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
+ return rc;
+ }
+ break;
+ case LDLM_CB_CANCELING:
+ /* do nothing */
+ break;
+ }
+ return 0;
+}
+
+/**
+ * Acquire a lease and open the file.
+ */
+struct obd_client_handle *ll_lease_open(struct inode *inode, struct file *file,
+ fmode_t fmode)
+{
+ struct lookup_intent it = { .it_op = IT_OPEN };
+ struct ll_sb_info *sbi = ll_i2sbi(inode);
+ struct md_op_data *op_data;
+ struct ptlrpc_request *req;
+ struct lustre_handle old_handle = { 0 };
+ struct obd_client_handle *och = NULL;
+ int rc;
+ int rc2;
+
+ if (fmode != FMODE_WRITE && fmode != FMODE_READ)
+ return ERR_PTR(-EINVAL);
+
+ if (file != NULL) {
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
+ struct obd_client_handle **och_p;
+ __u64 *och_usecount;
+
+ if (!(fmode & file->f_mode) || (file->f_mode & FMODE_EXEC))
+ return ERR_PTR(-EPERM);
+
+ /* Get the openhandle of the file */
+ rc = -EBUSY;
+ mutex_lock(&lli->lli_och_mutex);
+ if (fd->fd_lease_och != NULL) {
+ mutex_unlock(&lli->lli_och_mutex);
+ return ERR_PTR(rc);
+ }
+
+ if (fd->fd_och == NULL) {
+ if (file->f_mode & FMODE_WRITE) {
+ LASSERT(lli->lli_mds_write_och != NULL);
+ och_p = &lli->lli_mds_write_och;
+ och_usecount = &lli->lli_open_fd_write_count;
+ } else {
+ LASSERT(lli->lli_mds_read_och != NULL);
+ och_p = &lli->lli_mds_read_och;
+ och_usecount = &lli->lli_open_fd_read_count;
+ }
+ if (*och_usecount == 1) {
+ fd->fd_och = *och_p;
+ *och_p = NULL;
+ *och_usecount = 0;
+ rc = 0;
+ }
+ }
+ mutex_unlock(&lli->lli_och_mutex);
+ if (rc < 0) /* more than 1 opener */
+ return ERR_PTR(rc);
+
+ LASSERT(fd->fd_och != NULL);
+ old_handle = fd->fd_och->och_fh;
+ }
+
+ OBD_ALLOC_PTR(och);
+ if (och == NULL)
+ return ERR_PTR(-ENOMEM);
+
+ op_data = ll_prep_md_op_data(NULL, inode, inode, NULL, 0, 0,
+ LUSTRE_OPC_ANY, NULL);
+ if (IS_ERR(op_data))
+ GOTO(out, rc = PTR_ERR(op_data));
+
+ /* To tell the MDT this openhandle is from the same owner */
+ op_data->op_handle = old_handle;
+
+ it.it_flags = fmode | MDS_OPEN_LOCK | MDS_OPEN_BY_FID | MDS_OPEN_LEASE;
+ rc = md_intent_lock(sbi->ll_md_exp, op_data, NULL, 0, &it, 0, &req,
+ ll_md_blocking_lease_ast,
+ /* LDLM_FL_NO_LRU: To not put the lease lock into LRU list, otherwise
+ * it can be cancelled which may mislead applications that the lease is
+ * broken;
+ * LDLM_FL_EXCL: Set this flag so that it won't be matched by normal
+ * open in ll_md_blocking_ast(). Otherwise as ll_md_blocking_lease_ast
+ * doesn't deal with openhandle, so normal openhandle will be leaked. */
+ LDLM_FL_NO_LRU | LDLM_FL_EXCL);
+ ll_finish_md_op_data(op_data);
+ if (req != NULL) {
+ ptlrpc_req_finished(req);
+ it_clear_disposition(&it, DISP_ENQ_COMPLETE);
+ }
+ if (rc < 0)
+ GOTO(out_release_it, rc);
+
+ if (it_disposition(&it, DISP_LOOKUP_NEG))
+ GOTO(out_release_it, rc = -ENOENT);
+
+ rc = it_open_error(DISP_OPEN_OPEN, &it);
+ if (rc)
+ GOTO(out_release_it, rc);
+
+ LASSERT(it_disposition(&it, DISP_ENQ_OPEN_REF));
+ ll_och_fill(sbi->ll_md_exp, &it, och);
+
+ if (!it_disposition(&it, DISP_OPEN_LEASE)) /* old server? */
+ GOTO(out_close, rc = -EOPNOTSUPP);
+
+ /* already get lease, handle lease lock */
+ ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
+ if (it.d.lustre.it_lock_mode == 0 ||
+ it.d.lustre.it_lock_bits != MDS_INODELOCK_OPEN) {
+ /* open lock must return for lease */
+ CERROR(DFID "lease granted but no open lock, %d/%llu.\n",
+ PFID(ll_inode2fid(inode)), it.d.lustre.it_lock_mode,
+ it.d.lustre.it_lock_bits);
+ GOTO(out_close, rc = -EPROTO);
+ }
+
+ ll_intent_release(&it);
+ return och;
+
+out_close:
+ rc2 = ll_close_inode_openhandle(sbi->ll_md_exp, inode, och);
+ if (rc2)
+ CERROR("Close openhandle returned %d\n", rc2);
+
+ /* cancel open lock */
+ if (it.d.lustre.it_lock_mode != 0) {
+ ldlm_lock_decref_and_cancel(&och->och_lease_handle,
+ it.d.lustre.it_lock_mode);
+ it.d.lustre.it_lock_mode = 0;
+ }
+out_release_it:
+ ll_intent_release(&it);
+out:
+ OBD_FREE_PTR(och);
+ return ERR_PTR(rc);
+}
+EXPORT_SYMBOL(ll_lease_open);
+
+/**
+ * Release lease and close the file.
+ * It will check if the lease has ever broken.
+ */
+int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
+ bool *lease_broken)
+{
+ struct ldlm_lock *lock;
+ bool cancelled = true;
+ int rc;
+
+ lock = ldlm_handle2lock(&och->och_lease_handle);
+ if (lock != NULL) {
+ lock_res_and_lock(lock);
+ cancelled = ldlm_is_cancel(lock);
+ unlock_res_and_lock(lock);
+ ldlm_lock_put(lock);
+ }
+
+ CDEBUG(D_INODE, "lease for "DFID" broken? %d\n",
+ PFID(&ll_i2info(inode)->lli_fid), cancelled);
+
+ if (!cancelled)
+ ldlm_cli_cancel(&och->och_lease_handle, 0);
+ if (lease_broken != NULL)
+ *lease_broken = cancelled;
+
+ rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, inode, och);
+ return rc;
+}
+EXPORT_SYMBOL(ll_lease_close);
+
/* Fills the obdo with the attributes for the lsm */
static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
struct obd_capa *capa, struct obdo *obdo,
@@ -2066,6 +2276,91 @@ long ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
OBD_FREE_PTR(hca);
return rc;
}
+ case LL_IOC_SET_LEASE: {
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct obd_client_handle *och = NULL;
+ bool lease_broken;
+ fmode_t mode = 0;
+
+ switch (arg) {
+ case F_WRLCK:
+ if (!(file->f_mode & FMODE_WRITE))
+ return -EPERM;
+ mode = FMODE_WRITE;
+ break;
+ case F_RDLCK:
+ if (!(file->f_mode & FMODE_READ))
+ return -EPERM;
+ mode = FMODE_READ;
+ break;
+ case F_UNLCK:
+ mutex_lock(&lli->lli_och_mutex);
+ if (fd->fd_lease_och != NULL) {
+ och = fd->fd_lease_och;
+ fd->fd_lease_och = NULL;
+ }
+ mutex_unlock(&lli->lli_och_mutex);
+
+ if (och != NULL) {
+ mode = och->och_flags &
+ (FMODE_READ|FMODE_WRITE);
+ rc = ll_lease_close(och, inode, &lease_broken);
+ if (rc == 0 && lease_broken)
+ mode = 0;
+ } else {
+ rc = -ENOLCK;
+ }
+
+ /* return the type of lease or error */
+ return rc < 0 ? rc : (int)mode;
+ default:
+ return -EINVAL;
+ }
+
+ CDEBUG(D_INODE, "Set lease with mode %d\n", mode);
+
+ /* apply for lease */
+ och = ll_lease_open(inode, file, mode);
+ if (IS_ERR(och))
+ return PTR_ERR(och);
+
+ rc = 0;
+ mutex_lock(&lli->lli_och_mutex);
+ if (fd->fd_lease_och == NULL) {
+ fd->fd_lease_och = och;
+ och = NULL;
+ }
+ mutex_unlock(&lli->lli_och_mutex);
+ if (och != NULL) {
+ /* impossible now that only excl is supported for now */
+ ll_lease_close(och, inode, &lease_broken);
+ rc = -EBUSY;
+ }
+ return rc;
+ }
+ case LL_IOC_GET_LEASE: {
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct ldlm_lock *lock = NULL;
+
+ rc = 0;
+ mutex_lock(&lli->lli_och_mutex);
+ if (fd->fd_lease_och != NULL) {
+ struct obd_client_handle *och = fd->fd_lease_och;
+
+ lock = ldlm_handle2lock(&och->och_lease_handle);
+ if (lock != NULL) {
+ lock_res_and_lock(lock);
+ if (!ldlm_is_cancel(lock))
+ rc = och->och_flags &
+ (FMODE_READ | FMODE_WRITE);
+ unlock_res_and_lock(lock);
+ ldlm_lock_put(lock);
+ }
+ }
+ mutex_unlock(&lli->lli_och_mutex);
+
+ return rc;
+ }
default: {
int err;