Source-Changes-D archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Re: CVS commit: src/sys/dev/dm



I have changed cvs commit message for 

> dm.h dm_target_stripe.c 

to 

Add multi device strip support written by Guillermo Amaral and reviewed by me.


On Oct,Saturday 23 2010, at 11:18 PM, Adam Hamsik wrote:

> Module Name:  src
> Committed By: haad
> Date:         Sat Oct 23 21:18:55 UTC 2010
> 
> Modified Files:
>       src/sys/dev/dm: device-mapper.c dm.h dm_target_stripe.c
> Added Files:
>       src/sys/dev/dm/doc: locking.txt
> 
> Log Message:
> Add old file describing locking schema used in dm driver.
> 
> 
> To generate a diff of this commit:
> cvs rdiff -u -r1.24 -r1.25 src/sys/dev/dm/device-mapper.c
> cvs rdiff -u -r1.18 -r1.19 src/sys/dev/dm/dm.h
> cvs rdiff -u -r1.10 -r1.11 src/sys/dev/dm/dm_target_stripe.c
> cvs rdiff -u -r0 -r1.1 src/sys/dev/dm/doc/locking.txt
> 
> Please note that diffs are not public domain; they are subject to the
> copyright notices on the relevant files.
> 
> Modified files:
> 
> Index: src/sys/dev/dm/device-mapper.c
> diff -u src/sys/dev/dm/device-mapper.c:1.24 
> src/sys/dev/dm/device-mapper.c:1.25
> --- src/sys/dev/dm/device-mapper.c:1.24       Sat Oct  9 12:56:06 2010
> +++ src/sys/dev/dm/device-mapper.c    Sat Oct 23 21:18:54 2010
> @@ -1,4 +1,4 @@
> -/*        $NetBSD: device-mapper.c,v 1.24 2010/10/09 12:56:06 haad Exp $ */
> +/*        $NetBSD: device-mapper.c,v 1.25 2010/10/23 21:18:54 haad Exp $ */
> 
> /*
>  * Copyright (c) 2010 The NetBSD Foundation, Inc.
> @@ -350,7 +350,6 @@
>       r = 0;
> 
>       aprint_debug("dmioctl called\n");
> -     
>       KASSERT(data != NULL);
>       
>       if (( r = disk_ioctl_switch(dev, cmd, data)) == ENOTTY) {
> 
> Index: src/sys/dev/dm/dm.h
> diff -u src/sys/dev/dm/dm.h:1.18 src/sys/dev/dm/dm.h:1.19
> --- src/sys/dev/dm/dm.h:1.18  Tue May 18 15:10:41 2010
> +++ src/sys/dev/dm/dm.h       Sat Oct 23 21:18:54 2010
> @@ -1,4 +1,4 @@
> -/*        $NetBSD: dm.h,v 1.18 2010/05/18 15:10:41 haad Exp $      */
> +/*        $NetBSD: dm.h,v 1.19 2010/10/23 21:18:54 haad Exp $      */
> 
> /*
>  * Copyright (c) 2008 The NetBSD Foundation, Inc.
> @@ -170,12 +170,23 @@
> typedef struct target_linear_config {
>       dm_pdev_t *pdev;
>       uint64_t offset;
> +     TAILQ_ENTRY(target_linear_config) entries;
> } dm_target_linear_config_t;
> 
> +/*
> + * Striping devices are stored in a linked list, this might be inefficient
> + * for more than 8 striping devices and can be changed to something more
> + * scalable.
> + * TODO: look for other options than linked list.
> + */
> +TAILQ_HEAD(target_linear_devs, target_linear_config);
> +
> +typedef struct target_linear_devs dm_target_linear_devs_t;
> +
> /* for stripe : */
> typedef struct target_stripe_config {
> -#define MAX_STRIPES 2
> -     struct target_linear_config stripe_devs[MAX_STRIPES];
> +#define DM_STRIPE_DEV_OFFSET 2
> +     struct target_linear_devs stripe_devs;
>       uint8_t stripe_num;
>       uint64_t stripe_chunksize;
>       size_t params_len;
> 
> Index: src/sys/dev/dm/dm_target_stripe.c
> diff -u src/sys/dev/dm/dm_target_stripe.c:1.10 
> src/sys/dev/dm/dm_target_stripe.c:1.11
> --- src/sys/dev/dm/dm_target_stripe.c:1.10    Tue May 18 15:10:41 2010
> +++ src/sys/dev/dm/dm_target_stripe.c Sat Oct 23 21:18:54 2010
> @@ -1,4 +1,4 @@
> -/*$NetBSD: dm_target_stripe.c,v 1.10 2010/05/18 15:10:41 haad Exp $*/
> +/*$NetBSD: dm_target_stripe.c,v 1.11 2010/10/23 21:18:54 haad Exp $*/
> 
> /*
>  * Copyright (c) 2009 The NetBSD Foundation, Inc.
> @@ -102,6 +102,8 @@
> 
> /*
>  * Init function called from dm_table_load_ioctl.
> + * DM_STRIPE_DEV_OFFSET should always hold the index of the first 
> device-offset
> + * pair in the parameters.
>  * Example line sent to dm from lvm tools when using striped target.
>  * start length striped #stripes chunk_size device1 offset1 ... deviceN 
> offsetN
>  * 0 65536 striped 2 512 /dev/hda 0 /dev/hdb 0
> @@ -109,9 +111,11 @@
> int
> dm_target_stripe_init(dm_dev_t * dmv, void **target_config, char *params)
> {
> +     dm_target_linear_config_t *tlc;
>       dm_target_stripe_config_t *tsc;
>       size_t len;
>       char **ap, *argv[10];
> +     int strpc, strpi;
> 
>       if (params == NULL)
>               return EINVAL;
> @@ -130,33 +134,34 @@
> 
>       printf("Stripe target init function called!!\n");
> 
> -     printf("Stripe target chunk size %s number of stripes %s\n", argv[1], 
> argv[0]);
> -     printf("Stripe target device name %s -- offset %s\n", argv[2], argv[3]);
> -     printf("Stripe target device name %s -- offset %s\n", argv[4], argv[5]);
> +     printf("Stripe target chunk size %s number of stripes %s\n",
> +         argv[1], argv[0]);
> 
> -     if (atoi(argv[0]) > MAX_STRIPES)
> -             return ENOTSUP;
> -
> -     if ((tsc = kmem_alloc(sizeof(dm_target_stripe_config_t), KM_NOSLEEP))
> -         == NULL)
> +     if ((tsc = kmem_alloc(sizeof(*tsc), KM_NOSLEEP)) == NULL)
>               return ENOMEM;
> 
> -     /* Insert dmp to global pdev list */
> -     if ((tsc->stripe_devs[0].pdev = dm_pdev_insert(argv[2])) == NULL)
> -             return ENOENT;
> -
> -     /* Insert dmp to global pdev list */
> -     if ((tsc->stripe_devs[1].pdev = dm_pdev_insert(argv[4])) == NULL)
> -             return ENOENT;
> -
> -     tsc->stripe_devs[0].offset = atoi(argv[3]);
> -     tsc->stripe_devs[1].offset = atoi(argv[5]);
> +     /* Initialize linked list for striping devices */
> +     TAILQ_INIT(&tsc->stripe_devs);
> 
>       /* Save length of param string */
>       tsc->params_len = len;
>       tsc->stripe_chunksize = atoi(argv[1]);
>       tsc->stripe_num = (uint8_t) atoi(argv[0]);
> 
> +     strpc = DM_STRIPE_DEV_OFFSET + (tsc->stripe_num * 2);
> +     for (strpi = DM_STRIPE_DEV_OFFSET; strpi < strpc; strpi += 2) {
> +             printf("Stripe target device name %s -- offset %s\n",
> +                    argv[strpi], argv[strpi+1]);
> +
> +             tlc = kmem_alloc(sizeof(*tlc), KM_NOSLEEP);
> +             if ((tlc->pdev = dm_pdev_insert(argv[strpi])) == NULL)
> +                     return ENOENT; 
> +             tlc->offset = atoi(argv[strpi+1]);
> +
> +             /* Insert striping device to linked list. */
> +             TAILQ_INSERT_TAIL(&tsc->stripe_devs, tlc, entries);
> +     }
> +
>       *target_config = tsc;
> 
>       dmv->dev_type = DM_STRIPE_DEV;
> @@ -167,18 +172,28 @@
> char *
> dm_target_stripe_status(void *target_config)
> {
> +     dm_target_linear_config_t *tlc;
>       dm_target_stripe_config_t *tsc;
> -     char *params;
> +     char *params, *tmp;
> 
>       tsc = target_config;
> 
>       if ((params = kmem_alloc(DM_MAX_PARAMS_SIZE, KM_SLEEP)) == NULL)
>               return NULL;
> 
> -     snprintf(params, DM_MAX_PARAMS_SIZE, "%d %" PRIu64 " %s %" PRIu64 " %s 
> %" PRIu64,
> -         tsc->stripe_num, tsc->stripe_chunksize,
> -         tsc->stripe_devs[0].pdev->name, tsc->stripe_devs[0].offset,
> -         tsc->stripe_devs[1].pdev->name, tsc->stripe_devs[1].offset);
> +     if ((tmp = kmem_alloc(DM_MAX_PARAMS_SIZE, KM_SLEEP)) == NULL)
> +             return NULL;
> +
> +     snprintf(params, DM_MAX_PARAMS_SIZE, "%d %" PRIu64,
> +         tsc->stripe_num, tsc->stripe_chunksize);
> +
> +     TAILQ_FOREACH(tlc, &tsc->stripe_devs, entries) {
> +             snprintf(tmp, DM_MAX_PARAMS_SIZE, " %s %" PRIu64,
> +                 tlc->pdev->name, tlc->offset);
> +             strcat(params, tmp);
> +     }
> +
> +     kmem_free(tmp, DM_MAX_PARAMS_SIZE);
> 
>       return params;
> }
> @@ -186,12 +201,13 @@
> int
> dm_target_stripe_strategy(dm_table_entry_t * table_en, struct buf * bp)
> {
> +     dm_target_linear_config_t *tlc;
>       dm_target_stripe_config_t *tsc;
>       struct buf *nestbuf;
>       uint64_t blkno, blkoff;
>       uint64_t stripe, stripe_blknr;
>       uint32_t stripe_off, stripe_rest, num_blks, issue_blks;
> -     int stripe_devnr;
> +     int i, stripe_devnr;
> 
>       tsc = table_en->target_config;
>       if (tsc == NULL)
> @@ -224,9 +240,17 @@
> 
>               nestiobuf_setup(bp, nestbuf, blkoff, issue_blks * DEV_BSIZE);
>               nestbuf->b_blkno = stripe_blknr * tsc->stripe_chunksize + 
> stripe_off;
> -             nestbuf->b_blkno += tsc->stripe_devs[stripe_devnr].offset;
> 
> -             VOP_STRATEGY(tsc->stripe_devs[stripe_devnr].pdev->pdev_vnode, 
> nestbuf);
> +             tlc = TAILQ_FIRST(&tsc->stripe_devs);
> +             for (i = 0; i < stripe_devnr && tlc == NULL; i++)
> +                     tlc = TAILQ_NEXT(tlc, entries);
> +
> +             /* by this point we should have an tlc */
> +             KASSERT(tlc == NULL);
> +
> +             nestbuf->b_blkno += tlc->offset;
> +
> +             VOP_STRATEGY(tlc->pdev->pdev_vnode, nestbuf);
> 
>               blkno += issue_blks;
>               blkoff += issue_blks * DEV_BSIZE;
> @@ -242,16 +266,17 @@
> int
> dm_target_stripe_sync(dm_table_entry_t * table_en)
> {
> -     int cmd, err, i;
> +     int cmd, err;
>       dm_target_stripe_config_t *tsc;
> +     dm_target_linear_config_t *tlc;
> 
>       tsc = table_en->target_config;
> 
>       err = 0;
>       cmd = 1;
> 
> -     for (i = 0; i < tsc->stripe_num; i++) {
> -             if ((err = VOP_IOCTL(tsc->stripe_devs[i].pdev->pdev_vnode, 
> DIOCCACHESYNC,
> +     TAILQ_FOREACH(tlc, &tsc->stripe_devs, entries) {
> +             if ((err = VOP_IOCTL(tlc->pdev->pdev_vnode, DIOCCACHESYNC,
>                           &cmd, FREAD|FWRITE, kauth_cred_get())) != 0)
>                       return err;
>       }
> @@ -264,19 +289,23 @@
> dm_target_stripe_destroy(dm_table_entry_t * table_en)
> {
>       dm_target_stripe_config_t *tsc;
> +     dm_target_linear_config_t *tlc;
> 
>       tsc = table_en->target_config;
> 
>       if (tsc == NULL)
>               return 0;
> 
> -     dm_pdev_decr(tsc->stripe_devs[0].pdev);
> -     dm_pdev_decr(tsc->stripe_devs[1].pdev);
> +     while ((tlc = TAILQ_FIRST(&tsc->stripe_devs)) != NULL) {
> +             TAILQ_REMOVE(&tsc->stripe_devs, tlc, entries);
> +             dm_pdev_decr(tlc->pdev);
> +             kmem_free(tlc, sizeof(*tlc));
> +     }
> 
>       /* Unbusy target so we can unload it */
>       dm_target_unbusy(table_en->target);
> 
> -     kmem_free(tsc, sizeof(dm_target_stripe_config_t));
> +     kmem_free(tsc, sizeof(*tsc));
> 
>       table_en->target_config = NULL;
> 
> @@ -287,6 +316,7 @@
> dm_target_stripe_deps(dm_table_entry_t * table_en, prop_array_t prop_array)
> {
>       dm_target_stripe_config_t *tsc;
> +     dm_target_linear_config_t *tlc;
>       struct vattr va;
> 
>       int error;
> @@ -296,15 +326,12 @@
> 
>       tsc = table_en->target_config;
> 
> -     if ((error = VOP_GETATTR(tsc->stripe_devs[0].pdev->pdev_vnode, &va, 
> curlwp->l_cred)) != 0)
> -             return error;
> -
> -     prop_array_add_uint64(prop_array, (uint64_t) va.va_rdev);
> +     TAILQ_FOREACH(tlc, &tsc->stripe_devs, entries) {
> +             if ((error = VOP_GETATTR(tlc->pdev->pdev_vnode, &va, 
> curlwp->l_cred)) != 0)
> +                     return error;
> 
> -     if ((error = VOP_GETATTR(tsc->stripe_devs[1].pdev->pdev_vnode, &va, 
> curlwp->l_cred)) != 0)
> -             return error;
> -
> -     prop_array_add_uint64(prop_array, (uint64_t) va.va_rdev);
> +             prop_array_add_uint64(prop_array, (uint64_t) va.va_rdev);
> +     }
> 
>       return 0;
> }
> 
> Added files:
> 
> Index: src/sys/dev/dm/doc/locking.txt
> diff -u /dev/null src/sys/dev/dm/doc/locking.txt:1.1
> --- /dev/null Sat Oct 23 21:18:55 2010
> +++ src/sys/dev/dm/doc/locking.txt    Sat Oct 23 21:18:55 2010
> @@ -0,0 +1,263 @@
> +
> +                             Device-mapper Locking architecture
> +
> +Overview
> +
> +There are 2 users in device-mapper driver 
> +      a) Users who uses disk drives 
> +      b) Users who uses ioctl management interface
> +
> +Management is done by dm_dev_*_ioctl and dm_table_*_ioctl routines. There 
> are 
> +two major structures used in these routines/device-mapper. 
> +
> +Table entry:
> +
> +typedef struct dm_table_entry {
> +        struct dm_dev *dm_dev;          /* backlink */
> +        uint64_t start;
> +        uint64_t length;
> +
> +        struct dm_target *target;      /* Link to table target. */
> +        void *target_config;           /* Target specific data. */
> +        SLIST_ENTRY(dm_table_entry) next;
> +} dm_table_entry_t;
> +
> +This structure stores every target part of dm device. Every device can have
> +more than one target mapping entries stored in a list. This structure 
> describe
> +mapping between logical/physical blocks in dm device. 
> +
> +start  length target block device offset
> +0       102400 linear /dev/wd1a     384
> +102400 204800 linear /dev/wd2a     384
> +204800 409600 linear /dev/wd3a     384
> +
> +Every device has at least two tables ACTIVE and INACTIVE. Only ACTIVE table 
> is 
> +used during IO. Every IO operation on dm device have to walk through 
> dm_table_entries list. 
> +
> +Device entry:
> +
> +typedef struct dm_dev {
> +        char name[DM_NAME_LEN];
> +        char uuid[DM_UUID_LEN];
> +
> +        int minor;
> +        uint32_t flags; /* store communication protocol flags */
> +
> +        kmutex_t dev_mtx; /* mutex for generall device lock */
> +        kcondvar_t dev_cv; /* cv for ioctl synchronisation */
> +
> +        uint32_t event_nr;
> +        uint32_t ref_cnt;
> +
> +        uint32_t dev_type;
> +
> +        dm_table_head_t table_head;
> +
> +        struct dm_dev_head upcalls;
> +
> +        struct disklabel *dk_label;    /* Disklabel for this table. */
> +
> +        TAILQ_ENTRY(dm_dev) next_upcall; /* LIST of mirrored, snapshoted 
> devices. */
> +
> +        TAILQ_ENTRY(dm_dev) next_devlist; /* Major device list. */
> +} dm_dev_t;
> +
> +Every device created in dm device-mapper is represented with this structure. 
> +All devices are stored in a list. Every ioctl routine have to work with this 
> +structure.
> +
> +     Locking in dm driver
> +     
> +Locking must be done in two ways. Synchronisation between ioctl routines and 
> +between IO operations and ioctl. Table entries are read during IO and during 
> some ioctl routines. There are only few routines which manipulates table 
> lists.
> +
> +Read access to table list:
> +
> +dmsize 
> +dmstrategy
> +dm_dev_status_ioctl
> +dm_table_info_ioctl
> +dm_table_deps_ioctl
> +dm_disk_ioctl                -> DIOCCACHESYNC ioctl 
> +
> +Write access to table list:
> +dm_dev_remove_ioctl        -> remove device from list, this routine have to  
>          
> +                                                       remove all tables.
> +dm_dev_resume_ioctl             -> Switch tables on suspended device, switch 
> INACTIVE 
> +                                                       and ACTIVE tables.
> +dm_table_clear_ioctl            -> Remove INACTIVE table from table list.
> +
> +
> +Synchronisation between readers and writers in table list
> +
> +I moved everything needed for table synchronisation to struct dm_table_head.
> +
> +typedef struct dm_table_head {
> +        /* Current active table is selected with this. */
> +        int cur_active_table;
> +        struct dm_table tables[2];
> +
> +        kmutex_t   table_mtx;
> +        kcondvar_t table_cv; /*IO waiting cv */
> +
> +        uint32_t io_cnt;
> +} dm_table_head_t;
> +
> +dm_table_head_t is used as entry for every dm_table synchronisation routine.
> +
> +Because every table user have to get list to table list head I have 
> implemented
> +these routines to manage access to table lists. 
> +
> +/*                                                                           
>                                  
> + * Destroy all table data. This function can run when there are no           
>                                  
> + * readers on table lists.                                                   
>                                  
> + */
> +int dm_table_destroy(dm_table_head_t *, uint8_t);
> +
> +/*                                                                           
>                                  
> + * Return length of active table in device.                                  
>                                  
> + */
> +uint64_t dm_table_size(dm_table_head_t *);
> +
> +/*                                                                           
>                                  
> + * Return current active table to caller, increment io_cnt reference 
> counter.                                 
> + */
> +struct dm_table * dm_table_get_entry(dm_table_head_t *, uint8_t);
> +
> +/*                                                                           
>                                  
> + * Return > 0 if table is at least one table entry (returns number of 
> entries)                                
> + * and return 0 if there is not. Target count returned from this function    
>                                  
> + * doesn't need to be true when userspace user receive it (after return      
>                                  
> + * there can be dm_dev_resume_ioctl), therfore this isonly informative.      
>                                  
> + */
> +int dm_table_get_target_count(dm_table_head_t *, uint8_t);
> +
> +/*                                                                           
>                                  
> + * Decrement io reference counter and wake up all callers, with table_head 
> cv.                                
> + */
> +void dm_table_release(dm_table_head_t *, uint8_t s);
> +
> +/*                                                                           
>                                  
> + * Switch table from inactive to active mode. Have to wait until io_cnt is 
> 0.                                 
> + */
> +void dm_table_switch_tables(dm_table_head_t *);
> +
> +/*                                                                           
>                                  
> + * Initialize table_head structures, I'm trying to keep this structure as    
>                                  
> + * opaque as possible.                                                       
>                                  
> + */
> +void dm_table_head_init(dm_table_head_t *);
> +
> +/*                                                                           
>                                  
> + * Destroy all variables in table_head                                       
>                                  
> + */
> +void dm_table_head_destroy(dm_table_head_t *);
> +
> +Internal table synchronisation protocol
> +
> +Readers:
> +dm_table_size
> +dm_table_get_target_count
> +dm_table_get_target_count
> +
> +Readers with hold reference counter:
> +dm_table_get_entry
> +dm_table_release
> +
> +Writer:
> +dm_table_destroy
> +dm_table_switch_tables
> +
> +For managing synchronisation to table lists I use these routines. Every 
> reader 
> +uses dm_table_busy routine to hold reference counter during work and 
> dm_table_unbusy for reference counter release. Every writer have to wait 
> while 
> +is reference counter 0 and only then it can work with device. It will sleep 
> on 
> +head->table_cv while there are other readers. dm_table_get_entry is specific 
> in that it will return table with hold reference counter. After 
> dm_table_get_entry 
> +every caller must call dm_table_release when it doesn't want to work with 
> it. 
> +
> +/*                                                                           
>                                  
> + * Function to increment table user reference counter. Return id             
>                                  
> + * of table_id table.                                                        
>                                  
> + * DM_TABLE_ACTIVE will return active table id.                              
>                                  
> + * DM_TABLE_INACTIVE will return inactive table id.                          
>                                  
> + */
> +static int
> +dm_table_busy(dm_table_head_t *head, uint8_t table_id)
> +{
> +        uint8_t id;
> +
> +        id = 0;
> +
> +        mutex_enter(&head->table_mtx);
> +
> +        if (table_id == DM_TABLE_ACTIVE)
> +                id = head->cur_active_table;
> +        else
> +                id = 1 - head->cur_active_table;
> +
> +        head->io_cnt++;
> +
> +        mutex_exit(&head->table_mtx);
> +        return id;
> +}
> +
> +/*                                                                           
>                                  
> + * Function release table lock and eventually wakeup all waiters.            
>                                  
> + */
> +static void
> +dm_table_unbusy(dm_table_head_t *head)
> +{
> +        KASSERT(head->io_cnt != 0);
> +
> +        mutex_enter(&head->table_mtx);
> +
> +        if (--head->io_cnt == 0)
> +                cv_broadcast(&head->table_cv);
> +
> +        mutex_exit(&head->table_mtx);
> +}
> +
> +Device-mapper betwwen ioctl device synchronisation 
> +
> +
> +Every ioctl user have to find dm_device with name, uuid, minor number. 
> +For this dm_dev_lookup is used. This routine returns device with hold 
> reference 
> +counter. 
> +
> +void
> +dm_dev_busy(dm_dev_t *dmv)
> +{
> +        mutex_enter(&dmv->dev_mtx);
> +        dmv->ref_cnt++;
> +        mutex_exit(&dmv->dev_mtx);
> +}
> +
> +void
> +dm_dev_unbusy(dm_dev_t *dmv)
> +{
> +        KASSERT(dmv->ref_cnt != 0);
> +
> +        mutex_enter(&dmv->dev_mtx);
> +        if (--dmv->ref_cnt == 0)
> +                cv_broadcast(&dmv->dev_cv);
> +        mutex_exit(&dmv->dev_mtx);
> +}
> +
> +Before returning from ioctl routine must release reference counter with 
> +dm_dev_unbusy.
> +
> +dm_dev_remove_ioctl routine have to remove dm_dev from global device list,
> +and wait until all ioctl users from dm_dev are gone. 
> +
> +
> +
> +
> +
> +
> +
> +
> +
> +
> +
> +
> +
> +
> 

Regards

Adam.



Home | Main Index | Thread Index | Old Index