tech-kern archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Re: Fileassoc locking



On Dec,Saturday 26 2009, at 1:11 AM, Elad Efrat wrote:

> Hi,
> 
> I'm trying to add locking to fileassoc. Attached is my first such
> attempt... I've tested it locally (with Veriexec) and it seems to
> be working (or at least not crashing :) but I'm not entirely sure
> it's right.
> 
> I'd appreciate if some of the more locking-fluent people on this
> list take a look and let me know if the thing is "getting there",
> or closer to "completely wrong."

RW locks have some performance problems, and they should not be used until it's 
needed. I think that same think can be accomplished by using reference counters 
and mutexes/cv variables. You can look at dm_dev_t I have used same approach 
there. Rw locks has very bad effect on CPU caches and can slow things down on a 
bigger SMP systems.

Also it would be good to describe locking protocol used in fileassoc on top of 
this file

> Thanks,
> 
> -e.
> Index: kern_fileassoc.c
> ===================================================================
> RCS file: /cvsroot/src/sys/kern/kern_fileassoc.c,v
> retrieving revision 1.34
> diff -u -p -r1.34 kern_fileassoc.c
> --- kern_fileassoc.c  25 Dec 2009 20:07:18 -0000      1.34
> +++ kern_fileassoc.c  26 Dec 2009 00:04:49 -0000
> @@ -58,9 +58,11 @@ struct fileassoc {
>       const char *assoc_name;                         /* Name. */
>       fileassoc_cleanup_cb_t assoc_cleanup_cb;        /* Clear callback. */
>       specificdata_key_t assoc_key;
> +     krwlock_t assoc_lock;
> };
> 
> static LIST_HEAD(, fileassoc) fileassoc_list;
> +static krwlock_t fileassoc_list_lock;
> 
> /* An entry in the per-mount hash table. */
> struct fileassoc_file {
> @@ -68,6 +70,7 @@ struct fileassoc_file {
>       specificdata_reference faf_data;                /* Assoc data. */
>       u_int faf_nassocs;                              /* # of assocs. */
>       LIST_ENTRY(fileassoc_file) faf_list;            /* List pointer. */
> +     krwlock_t faf_lock;
> };
> 
> LIST_HEAD(fileassoc_hash_entry, fileassoc_file);
> @@ -78,6 +81,7 @@ struct fileassoc_table {
>       size_t tbl_nslots;                              /* Number of slots. */
>       size_t tbl_nused;                               /* # of used slots. */
>       specificdata_reference tbl_data;
> +     krwlock_t tbl_lock;
> };
> 
> /*
> @@ -88,6 +92,7 @@ struct fileassoc_table {
>       (hash32_buf((handle), FHANDLE_SIZE(handle), HASH32_BUF_INIT) \
>        & ((tbl)->tbl_mask))
> 
> +/* Expects faf and assoc reader-locked. */
> static void *
> file_getdata(struct fileassoc_file *faf, const struct fileassoc *assoc)
> {
> @@ -96,6 +101,7 @@ file_getdata(struct fileassoc_file *faf,
>           assoc->assoc_key);
> }
> 
> +/* Expects faf writer-locked and assoc reader-locked. */
> static void
> file_setdata(struct fileassoc_file *faf, const struct fileassoc *assoc,
>     void *data)
> @@ -105,6 +111,7 @@ file_setdata(struct fileassoc_file *faf,
>           assoc->assoc_key, data);
> }
> 
> +/* Expects faf and assoc reader-locked. */
> static void
> file_cleanup(struct fileassoc_file *faf, const struct fileassoc *assoc)
> {
> @@ -119,6 +126,7 @@ file_cleanup(struct fileassoc_file *faf,
>       (*cb)(data);
> }
> 
> +/* Expects faf writer-locked. */
> static void
> file_free(struct fileassoc_file *faf)
> {
> @@ -126,11 +134,19 @@ file_free(struct fileassoc_file *faf)
> 
>       LIST_REMOVE(faf, faf_list);
> 
> +     rw_enter(&fileassoc_list_lock, RW_READER);
>       LIST_FOREACH(assoc, &fileassoc_list, assoc_list) {
> +             rw_enter(&assoc->assoc_lock, RW_READER);
>               file_cleanup(faf, assoc);
> +             rw_exit(&assoc->assoc_lock);
>       }
> +     rw_exit(&fileassoc_list_lock);
> +
>       vfs_composefh_free(faf->faf_handle);
>       specificdata_fini(fileassoc_domain, &faf->faf_data);
> +
> +     rw_exit(&faf->faf_lock);
> +

Where faf->faf_lock was entered here ?

>       kmem_free(faf, sizeof(*faf));
> }
> 
> @@ -145,11 +161,11 @@ table_dtor(void *v)
>               struct fileassoc_file *faf;
> 
>               while ((faf = LIST_FIRST(&tbl->tbl_hash[i])) != NULL) {
> +                     rw_enter(&faf->faf_lock, RW_WRITER);
>                       file_free(faf);
>               }
>       }
> 
> -     /* Remove hash table and sysctl node */
>       hashdone(tbl->tbl_hash, HASH_LIST, tbl->tbl_mask);
>       specificdata_fini(fileassoc_domain, &tbl->tbl_data);
>       kmem_free(tbl, sizeof(*tbl));
> @@ -170,11 +186,15 @@ fileassoc_init(void)
>       }
>       fileassoc_domain = specificdata_domain_create();
> 
> +     rw_init(&fileassoc_list_lock);
> +
>       return 0;
> }
> 
> /*
>  * Register a new assoc.
> + *
> + * The new assoc is returned locked with a writer lock.
>  */
> int
> fileassoc_register(const char *name, fileassoc_cleanup_cb_t cleanup_cb,
> @@ -196,8 +216,16 @@ fileassoc_register(const char *name, fil
>       assoc->assoc_name = name;
>       assoc->assoc_cleanup_cb = cleanup_cb;
>       assoc->assoc_key = key;
> +     rw_init(&assoc->assoc_lock);
> 
> +     /* Lock it so no one can use this assoc until we're done. */
> +     rw_enter(&assoc->assoc_lock, RW_WRITER);
> +
> +     rw_enter(&fileassoc_list_lock, RW_WRITER);
>       LIST_INSERT_HEAD(&fileassoc_list, assoc, assoc_list);
> +     rw_exit(&fileassoc_list_lock);
> +
> +     rw_exit(&assoc->assoc_lock);
> 

With one mutex used for fileassoc_list_lock you do not need to do this dance 
you jsut lock mutex, insert head, unlock.


>       *result = assoc;
> 
> @@ -211,8 +239,11 @@ int
> fileassoc_deregister(fileassoc_t assoc)
> {
> 
> +     rw_enter(&assoc->assoc_lock, RW_WRITER);
>       LIST_REMOVE(assoc, assoc_list);
>       specificdata_key_delete(fileassoc_domain, assoc->assoc_key);
> +     rw_exit(&assoc->assoc_lock);
> +
>       kmem_free(assoc, sizeof(*assoc));
> 
>       return 0;
> @@ -224,13 +255,20 @@ fileassoc_deregister(fileassoc_t assoc)
> static struct fileassoc_table *
> fileassoc_table_lookup(struct mount *mp)
> {
> +     struct fileassoc_table *tbl;
>       int error;
> 
>       error = RUN_ONCE(&control, fileassoc_init);
>       if (error) {
>               return NULL;
>       }
> -     return mount_getspecific(mp, fileassoc_mountspecific_key);
> +
> +     tbl = mount_getspecific(mp, fileassoc_mountspecific_key);
> +     if (tbl != NULL)
> +             rw_enter(&tbl->tbl_lock, RW_WRITER);
> +
> +     return tbl;
> +

If function returns locked tbl it should be documented somewhere in this file.

> }
> 
> /*
> @@ -253,10 +291,15 @@ fileassoc_file_lookup(struct vnode *vp, 
>               return NULL;
>       }
> 
> +     /* We don't need a writer lock. */
> +     rw_downgrade(&tbl->tbl_lock);
> +
>       if (hint == NULL) {
>               error = vfs_composefh_alloc(vp, &th);
> -             if (error)
> +             if (error) {
> +                     rw_exit(&tbl->tbl_lock);
>                       return (NULL);
> +             }
>       } else {
>               th = hint;
>       }
> @@ -273,6 +316,11 @@ fileassoc_file_lookup(struct vnode *vp, 
>               }
>       }
> 
> +     if (faf != NULL)
> +             rw_enter(&faf->faf_lock, RW_WRITER);
> +
> +     rw_exit(&tbl->tbl_lock);
> +
>       if (hint == NULL)
>               vfs_composefh_free(th);
> 
> @@ -286,14 +334,22 @@ void *
> fileassoc_lookup(struct vnode *vp, fileassoc_t assoc)
> {
>       struct fileassoc_file *faf;
> +     void *data;
> 
>       faf = fileassoc_file_lookup(vp, NULL);
>       if (faf == NULL)
>               return (NULL);
> 
> -     return file_getdata(faf, assoc);
> +     rw_enter(&assoc->assoc_lock, RW_READER);
> +     data = file_getdata(faf, assoc);
> +     rw_exit(&assoc->assoc_lock);
> +
> +     rw_exit(&faf->faf_lock);
> +
> +     return data;

Here you return pointer for assoc data but, this can be released/freed by 
another thread while caller of this will sleep. With reference counting you 
will hold reference counter on assoc/data and no one can change them until he 
has last reference holding by himself.  

> }
> 
> +/* Expects tbl writer-locked. */
> static struct fileassoc_table *
> fileassoc_table_resize(struct fileassoc_table *tbl)
> {
> @@ -312,6 +368,7 @@ fileassoc_table_resize(struct fileassoc_
>           true, &newtbl->tbl_mask);
>       newtbl->tbl_nused = 0;
>       specificdata_init(fileassoc_domain, &newtbl->tbl_data);
> +     rw_init(&newtbl->tbl_lock);
> 
>       /* XXX we need to make sure nothing uses fileassoc here! */
> 
> @@ -321,10 +378,12 @@ fileassoc_table_resize(struct fileassoc_
>               while ((faf = LIST_FIRST(&tbl->tbl_hash[i])) != NULL) {
>                       struct fileassoc_hash_entry *hash_entry;
>                       size_t indx;
> -
> +     
>                       LIST_REMOVE(faf, faf_list);
> 
> +                     rw_enter(&faf->faf_lock, RW_READER);
>                       indx = FILEASSOC_HASH(newtbl, faf->faf_handle);
> +                     rw_exit(&faf->faf_lock);
>                       hash_entry = &(newtbl->tbl_hash[indx]);
> 
>                       LIST_INSERT_HEAD(hash_entry, faf, faf_list);
> @@ -342,6 +401,8 @@ fileassoc_table_resize(struct fileassoc_
>       specificdata_fini(fileassoc_domain, &tbl->tbl_data);
>       kmem_free(tbl, sizeof(*tbl));
> 
> +     rw_enter(&newtbl->tbl_lock, RW_WRITER);
> +

Again here see above text about returning locked table.

>       return (newtbl);
> }
> 
> @@ -365,6 +426,9 @@ fileassoc_table_add(struct mount *mp)
>           &tbl->tbl_mask);
>       tbl->tbl_nused = 0;
>       specificdata_init(fileassoc_domain, &tbl->tbl_data);
> +     rw_init(&tbl->tbl_lock);
> +
> +     rw_enter(&tbl->tbl_lock, RW_WRITER);
> 
>       mount_setspecific(mp, fileassoc_mountspecific_key, tbl);
> 
> @@ -403,18 +467,30 @@ fileassoc_table_run(struct mount *mp, fi
>       if (tbl == NULL)
>               return (EEXIST);
> 
> +     rw_downgrade(&tbl->tbl_lock);
> +
> +     rw_enter(&assoc->assoc_lock, RW_READER);
> +
>       for (i = 0; i < tbl->tbl_nslots; i++) {
>               struct fileassoc_file *faf;
> 
>               LIST_FOREACH(faf, &tbl->tbl_hash[i], faf_list) {
>                       void *data;
> 
> +                     rw_enter(&faf->faf_lock, RW_READER);
> +
>                       data = file_getdata(faf, assoc);
>                       if (data != NULL)
>                               cb(data, cookie);
> +
> +                     rw_exit(&faf->faf_lock);
>               }
>       }
> 
> +     rw_exit(&assoc->assoc_lock);
> +
> +     rw_exit(&tbl->tbl_lock);
> +
>       return (0);
> }
> 
> @@ -431,15 +507,27 @@ fileassoc_table_clear(struct mount *mp, 
>       if (tbl == NULL)
>               return (EEXIST);
> 
> +     rw_downgrade(&tbl->tbl_lock);
> +
> +     rw_enter(&assoc->assoc_lock, RW_READER);
> +
>       for (i = 0; i < tbl->tbl_nslots; i++) {
>               struct fileassoc_file *faf;
> 
>               LIST_FOREACH(faf, &tbl->tbl_hash[i], faf_list) {
> +                     rw_enter(&faf->faf_lock, RW_WRITER);
> +
>                       file_cleanup(faf, assoc);
>                       file_setdata(faf, assoc, NULL);
> +
> +                     rw_exit(&faf->faf_lock);
>               }
>       }
> 
> +     rw_exit(&assoc->assoc_lock);
> +
> +     rw_exit(&tbl->tbl_lock);
> +
>       return (0);
> }
> 
> @@ -476,13 +564,12 @@ fileassoc_file_add(struct vnode *vp, fha
>               tbl = fileassoc_table_add(vp->v_mount);
>       }
> 
> -     indx = FILEASSOC_HASH(tbl, th);
> -     hash_entry = &(tbl->tbl_hash[indx]);
> -
>       faf = kmem_zalloc(sizeof(*faf), KM_SLEEP);
>       faf->faf_handle = th;
>       specificdata_init(fileassoc_domain, &faf->faf_data);
> -     LIST_INSERT_HEAD(hash_entry, faf, faf_list);
> +     rw_init(&faf->faf_lock);
> +
> +     rw_enter(&faf->faf_lock, RW_WRITER);
> 
>       /*
>        * This decides when we need to resize the table. For now,
> @@ -490,14 +577,23 @@ fileassoc_file_add(struct vnode *vp, fha
>        * has. That's not really true unless of course we had zero
>        * collisions. Think positive! :)
>        */
> -     if (++(tbl->tbl_nused) == tbl->tbl_nslots) { 
> +     if (((tbl->tbl_nused) + 1) == tbl->tbl_nslots) { 
>               struct fileassoc_table *newtbl;
> 
>               newtbl = fileassoc_table_resize(tbl);
>               mount_setspecific(vp->v_mount, fileassoc_mountspecific_key,
>                   newtbl);
> +
> +             tbl = newtbl;
>       }
> 
> +     indx = FILEASSOC_HASH(tbl, th);
> +     hash_entry = &(tbl->tbl_hash[indx]);
> +     LIST_INSERT_HEAD(hash_entry, faf, faf_list);
> +     ++(tbl->tbl_nused);
> +
> +     rw_exit(&tbl->tbl_lock);
> +
>       return (faf);
> }
> 
> @@ -523,6 +619,8 @@ fileassoc_file_delete(struct vnode *vp)
>       tbl = fileassoc_table_lookup(vp->v_mount);
>       --(tbl->tbl_nused); /* XXX gc? */
> 
> +     rw_exit(&tbl->tbl_lock);
> +
>       KERNEL_UNLOCK_ONE(NULL);
> 
>       return (0);
> @@ -544,14 +642,23 @@ fileassoc_add(struct vnode *vp, fileasso
>                       return (ENOTDIR);
>       }
> 
> +     rw_enter(&assoc->assoc_lock, RW_READER);
> +
>       olddata = file_getdata(faf, assoc);
> -     if (olddata != NULL)
> +     if (olddata != NULL) {
> +             rw_exit(&assoc->assoc_lock);
> +             rw_exit(&faf->faf_lock);
>               return (EEXIST);
> +     }
> 
>       file_setdata(faf, assoc, data);
> 
>       faf->faf_nassocs++;
> 
> +     rw_exit(&assoc->assoc_lock);
> +
> +     rw_exit(&faf->faf_lock);
> +
>       return (0);
> }
> 
> @@ -567,10 +674,16 @@ fileassoc_clear(struct vnode *vp, fileas
>       if (faf == NULL)
>               return (ENOENT);
> 
> +     rw_enter(&assoc->assoc_lock, RW_READER);
> +
>       file_cleanup(faf, assoc);
>       file_setdata(faf, assoc, NULL);
> 
> +     rw_exit(&assoc->assoc_lock);
> +
>       --(faf->faf_nassocs); /* XXX gc? */
> 
> +     rw_exit(&faf->faf_lock);
> +
>       return (0);
> }

After quick look I think that using mutexes/cvs and reference counting on faf's 
and tbl's can do same job with much nicer/effective code. Maybe you should post 
whole file somewhere because it's hard to review your changes this way because 
they are too large.


Regards

Adam.



Home | Main Index | Thread Index | Old Index