tech-userlevel archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Re: Bad threading performance



Your program makes life somewhat difficult for the scheduler
because your jobs are short lived.  It sees a number of threads in a 
tight loop of communication together, running short tasks and so will try to
run these on the same CPU.

When you signal and unlock each task queue, it's likely to preempt the
controlling thread and run the short job on the same CPU, so you get a sort
of serializing effect because the controlling thread is off the CPU and
can't signal the next worker thread to run.

As each worker thread consumes CPU time it will be penalised by having its
priority lowered below that of the controlling thread.  This cancels the
preemption effect, causing things to be queued up on the controlling CPU
thereby allowing remote CPUs to steal tasks from it and get a slice of
the action.

For some reason this distribution as a result of lowered priority is not
happening. perhaps because of this test in sched_takecpu():

    385                 /* If CPU of this thread is idling - run there */
    386                 if (ci_rq->r_count == 0) {

Can you try changing this to:

        /*
         * If CPU of this thread is idling - run there.
         * XXXAD Test for PL_PPWAIT and special case for vfork()??
         */
        if (ci->ci_data.cpu_onproc == ci->ci_data.cpu_idlelwp)

On Tue, Mar 08, 2011 at 05:19:35PM +0000, Sad Clouds wrote:

> On Mon, 7 Mar 2011 18:39:39 +0000
> Sad Clouds <cryintothebluesky%googlemail.com@localhost> wrote:
> 
> > Below are test results. Any ideas why concurrency on NetBSD is so bad
> > compared to Solaris? It seems as if on NetBSD threads are stuck on a
> > wait list for much longer.
> 
> OK this is a follow up on the issue I've raised previously. I have
> attached a test program, which has good concurrency on Linux and
> Solaris, however on NetBSD concurrency is very poor, i.e. CPU sits idle
> a lot of the time.
> 
> I'd be interested to hear from NetBSD developers as to what causes
> this, or at least if this is a known issue...

> /*
> Build with:
> gcc -O1 test_ptask.c -lpthread
> */
> 
> #include <stdio.h>
> #include <stdint.h>
> #include <stdlib.h>
> #include <pthread.h>
> 
> #define NTHREADS 4
> 
> /* Function call structure */
> struct fcall
> {
>       /* Function pointers */
>       void *(*fptr)(void *arg);
>       /* Pointer to function arguments structure */
>       void *arg;
> };
> 
> /* Function arguments */
> struct farg
> {
>       uint32_t n1;
>       uint32_t n2;
>       /* Pad to 64-byte boundary */
>       uint8_t pad[64 - (sizeof(uint32_t) * 2)];
> };
> 
> /* Parallel task */
> struct ptask
> {
>       pthread_mutex_t mutex;
>       pthread_cond_t cond;
>       uint8_t pad0[64];
> 
>       /* Array of function call parameters */
>       struct fcall fcalls[NTHREADS];
>       uint8_t pad1[64];
> 
>       /* Array of function arguments */
>       struct farg args[NTHREADS];
> 
>       uint32_t task_run_cnt; /* Counter of tasks to run */
> };
> 
> /* Thread instance */
> struct thread
> {
>       pthread_mutex_t mutex;
>       pthread_cond_t cond;
> 
>       struct fcall *fcall_ptr;
>       struct ptask *ptask_ptr;
> 
>       uint8_t pad[64];
> };
> 
> /* Thread pool */
> struct tpool
> {
>       struct thread threads_array[NTHREADS];
> };
> 
> /* Thread function passed to pthread_create() */
> void *thread_func(void *arg)
> {
>       struct thread *tptr = (struct thread *)arg;
>       struct fcall *fcall;
>       struct ptask *ptask;
> 
>       while (1)
>       {
>               if (pthread_mutex_lock(&(tptr->mutex)) != 0)
>                       abort();
> 
>               /* Sleep on a condition variable */
>               while (tptr->fcall_ptr == NULL)
>               {
>                       if (pthread_cond_wait(&(tptr->cond), &(tptr->mutex)) != 
> 0)
>                               abort();
>               }
> 
>               /* Copy pointers to local variables */
>               fcall = tptr->fcall_ptr;
>               ptask = tptr->ptask_ptr;
> 
>               /* Reset to null values */
>               tptr->fcall_ptr = NULL;
>               tptr->ptask_ptr = NULL;
> 
>               if (pthread_mutex_unlock(&(tptr->mutex)) != 0)
>                       abort();
> 
>               /* Run current task */
>               fcall->fptr(fcall->arg);
> 
>               if (pthread_mutex_lock(&(ptask->mutex)) != 0)
>                       abort();
> 
>               /* If this is last task, signal to waiting main thread */
>               if (--(ptask->task_run_cnt) == 0)
>               {
>                       if (pthread_cond_signal(&(ptask->cond)) != 0)
>                               abort();
>               }
> 
>               if (pthread_mutex_unlock(&(ptask->mutex)) != 0)
>                       abort();
>       } /* while (1) */
> }
> 
> void *test_func(void *arg)
> {
>       struct farg *farg = (struct farg *)arg;
>       int i;
> 
>       for (i = 0; i < 1000000; i++)
>       {
>               farg->n1++;
>               farg->n2++;
>       }
>       return NULL;
> }
> 
> static struct tpool tpool;
> 
> int main(void)
> {
>       int i, j;
>       struct ptask ptask;
>       pthread_t tid;
> 
>       /* Initialize ptask */
>       if (pthread_mutex_init(&(ptask.mutex), NULL) != 0)
>               abort();
>       if (pthread_cond_init(&(ptask.cond), NULL) != 0)
>               abort();
> 
>       /* Initialize threads */
>       for (j = 0; j < NTHREADS; j++)
>       {
>               if (pthread_mutex_init(&(tpool.threads_array[j].mutex), NULL) 
> != 0)
>                       abort();
> 
>               if (pthread_cond_init(&(tpool.threads_array[j].cond), NULL) != 
> 0)
>                       abort();
> 
>               tpool.threads_array[j].fcall_ptr = NULL;
>               tpool.threads_array[j].ptask_ptr = NULL;
> 
>               if (pthread_create(
>                       &tid, NULL, &thread_func, &(tpool.threads_array[j])) != 
> 0)
>               {
>                       abort();
>               }
>       }
> 
>       for (i = 0; i < 100000; i++)
>       {
>               /* Set function arguments */
>               for (j = 0; j < NTHREADS; j++)
>               {
>                       ptask.fcalls[j].fptr = &test_func;
>                       ptask.fcalls[j].arg = &(ptask.args[j]);
>                       ptask.args[j].n1 = j;
>                       ptask.args[j].n2 = j;
>               }
>               ptask.task_run_cnt = NTHREADS;
> 
>               /* Tell threads to execute functions */
>               for (j = 0; j < NTHREADS; j++)
>               {
>                       if (pthread_mutex_lock(&(tpool.threads_array[j].mutex)) 
> != 0)
>                               abort();
> 
>                       tpool.threads_array[j].fcall_ptr = &(ptask.fcalls[j]);
>                       tpool.threads_array[j].ptask_ptr = &ptask;
> 
>                       if (pthread_cond_signal(&(tpool.threads_array[j].cond)) 
> != 0)
>                               abort();
> 
>                       if 
> (pthread_mutex_unlock(&(tpool.threads_array[j].mutex)) != 0)
>                               abort();
>               }
> 
>               /* Wait for all threads to finish */
>               if (pthread_mutex_lock(&(ptask.mutex)) != 0)
>                       abort();
> 
>               while (ptask.task_run_cnt != 0)
>               {
>                       if (pthread_cond_wait(&(ptask.cond), &(ptask.mutex)) != 
> 0)
>                               abort();
>               }
> 
>               if (pthread_mutex_unlock(&(ptask.mutex)) != 0)
>                       abort();
>       }
> 
>       return 0;
> }



Home | Main Index | Thread Index | Old Index