Subject: kern/36865: opencrypto - deflate wastes time with useless data copying
To: None <kern-bug-people@netbsd.org, gnats-admin@netbsd.org,>
From: Wolfgang Stukenbrock <Wolfgang.Stukenbrock@nagler-company.com>
List: netbsd-bugs
Date: 08/30/2007 10:55:00
>Number:         36865
>Category:       kern
>Synopsis:       opencrypto - deflate wastes time with useless data copying
>Confidential:   no
>Severity:       non-critical
>Priority:       medium
>Responsible:    kern-bug-people
>State:          open
>Class:          change-request
>Submitter-Id:   net
>Arrival-Date:   Thu Aug 30 10:55:00 +0000 2007
>Originator:     Wolfgang Stukenbrock
>Release:        NetBSD 3.1
>Organization:
Dr. Nagler & company GmbH
	
>Environment:
	
	
System: NetBSD test-s0 3.1 NetBSD 3.1 (test-s0) #0: Tue Apr 3 11:33:43 CEST 2007 root@test-s0:/usr/src/sys/arch/i386/compile/test-s0 i386
Architecture: i386
Machine: i386
>Description:
	In /usr/src/sys/opencrypto/deflate.c there will be some buffers be
	allocated during operation.
	After operation is done, a new buffer is allocated and all resulting
	data is copyed into that one, even if there is only one result buffer.
	It would be faster to return the result-buffer directly if there
	is only on present.
	This routine is only used in crptosoft.c via xform.c from function
	swcr_compdec() and the returned buffer is freed there after moving
	the data out of it.
	A second subject to be enhanced is the structure component "flag" of
	struct deflate_buf.
	It is used to detect, if there is valid information in thsi structure
	in order to call FREE() only for valid information.
	The same checking can be done by using the variable i, that is used
	to determine the next insertion point in case of buffer expansion.
	The struct deflate_buf is only used inside of this file.
>How-To-Repeat:
	not relevant
>Fix:
	The follwoing patch will remove the structure component "flag" and
	avoid useless copy operations.
	remark: this patch assunes that patch from 36864 has not been applied yet!

*** deflate.h   2007/08/30 09:51:30     1.1
--- deflate.h   2007/08/30 09:52:06
***************
*** 51,57 ****
  struct deflate_buf {
  	u_int8_t *out;
  	u_int32_t size;
- 	int flag;
  };
  
  #endif /* _CRYPTO_DEFLATE_H_ */
--- 51,56 ----


*** deflate.c	2007/08/30 09:51:30	1.1
--- deflate.c	2007/08/30 10:47:16
***************
*** 81,92 ****
  	z_stream zbuf;
  	u_int8_t *output;
  	u_int32_t count, result;
! 	int error, i = 0, j;
  	struct deflate_buf buf[ZBUF];
  
  	bzero(&zbuf, sizeof(z_stream));
- 	for (j = 0; j < ZBUF; j++)
- 		buf[j].flag = 0;
  
  	zbuf.next_in = data;	/* data that is going to be processed */
  	zbuf.zalloc = ocf_zalloc;
--- 81,90 ----
  	z_stream zbuf;
  	u_int8_t *output;
  	u_int32_t count, result;
! 	int error, i, j;
  	struct deflate_buf buf[ZBUF];
  
  	bzero(&zbuf, sizeof(z_stream));
  
  	zbuf.next_in = data;	/* data that is going to be processed */
  	zbuf.zalloc = ocf_zalloc;
***************
*** 95,107 ****
  	zbuf.avail_in = size;	/* Total length of data to be processed */
  
  	if (!decomp) {
! 		MALLOC(buf[i].out, u_int8_t *, (u_long) size, M_CRYPTO_DATA,
! 		    M_NOWAIT);
! 		if (buf[i].out == NULL)
! 			goto bad;
! 		buf[i].size = size;
! 		buf[i].flag = 1;
! 		i++;
  	} else {
  		/*
  	 	 * Choose a buffer with 4x the size of the input buffer
--- 93,99 ----
  	zbuf.avail_in = size;	/* Total length of data to be processed */
  
  	if (!decomp) {
! 		buf[0].size = size;
  	} else {
  		/*
  	 	 * Choose a buffer with 4x the size of the input buffer
***************
*** 110,123 ****
  	 	 * updated while the decompression is going on
  	 	 */
  
! 		MALLOC(buf[i].out, u_int8_t *, (u_long) (size * 4),
! 		    M_CRYPTO_DATA, M_NOWAIT);
! 		if (buf[i].out == NULL)
! 			goto bad;
! 		buf[i].size = size * 4;
! 		buf[i].flag = 1;
! 		i++;
  	}
  
  	zbuf.next_out = buf[0].out;
  	zbuf.avail_out = buf[0].size;
--- 102,113 ----
  	 	 * updated while the decompression is going on
  	 	 */
  
! 		buf[0].size = size * 4;
  	}
+ 	MALLOC(buf[0].out, u_int8_t *, (u_long) buf[0].size, M_CRYPTO_DATA, M_NOWAIT);
+ 	if (buf[0].out == NULL)
+ 		goto bad;
+ 	i = 1;
  
  	zbuf.next_out = buf[0].out;
  	zbuf.avail_out = buf[0].size;
***************
*** 135,141 ****
  			goto bad;
  		else if (zbuf.avail_in == 0 && zbuf.avail_out != 0)
  			goto end;
! 		else if (zbuf.avail_out == 0 && i < (ZBUF - 1)) {
  			/* we need more output space, allocate size */
  			MALLOC(buf[i].out, u_int8_t *, (u_long) size,
  			    M_CRYPTO_DATA, M_NOWAIT);
--- 125,131 ----
  			goto bad;
  		else if (zbuf.avail_in == 0 && zbuf.avail_out != 0)
  			goto end;
! 		else if (zbuf.avail_out == 0 && i < ZBUF) {
  			/* we need more output space, allocate size */
  			MALLOC(buf[i].out, u_int8_t *, (u_long) size,
  			    M_CRYPTO_DATA, M_NOWAIT);
***************
*** 143,149 ****
  				goto bad;
  			zbuf.next_out = buf[i].out;
  			buf[i].size = size;
- 			buf[i].flag = 1;
  			zbuf.avail_out = buf[i].size;
  			i++;
  		} else
--- 133,138 ----
***************
*** 153,186 ****
  end:
  	result = count = zbuf.total_out;
  
- 	MALLOC(*out, u_int8_t *, (u_long) result, M_CRYPTO_DATA, M_NOWAIT);
- 	if (*out == NULL)
- 		goto bad;
  	if (decomp)
  		inflateEnd(&zbuf);
  	else
  		deflateEnd(&zbuf);
! 	output = *out;
! 	for (j = 0; buf[j].flag != 0; j++) {
! 		if (count > buf[j].size) {
! 			bcopy(buf[j].out, *out, buf[j].size);
! 			*out += buf[j].size;
! 			FREE(buf[j].out, M_CRYPTO_DATA);
! 			count -= buf[j].size;
! 		} else {
! 			/* it should be the last buffer */
! 			bcopy(buf[j].out, *out, count);
! 			*out += count;
! 			FREE(buf[j].out, M_CRYPTO_DATA);
! 			count = 0;
  		}
  	}
- 	*out = output;
  	return result;
  
  bad:
  	*out = NULL;
! 	for (j = 0; buf[j].flag != 0; j++)
  		FREE(buf[j].out, M_CRYPTO_DATA);
  	if (decomp)
  		inflateEnd(&zbuf);
--- 142,181 ----
  end:
  	result = count = zbuf.total_out;
  
  	if (decomp)
  		inflateEnd(&zbuf);
  	else
  		deflateEnd(&zbuf);
! 
! 	if (i != 1) { /* copy everything into one buffer */
! 		MALLOC(*out, u_int8_t *, (u_long) result, M_CRYPTO_DATA, M_NOWAIT);
! 		if (*out == NULL)
! 			goto bad;
! 		output = *out;
! 		for (j = 0; j < i; j++) {
! 			if (count > buf[j].size) {
! 				bcopy(buf[j].out, output, buf[j].size);
! 				output += buf[j].size;
! 				FREE(buf[j].out, M_CRYPTO_DATA);
! 				count -= buf[j].size;
! 			} else {
! 				/* it should be the last buffer */
! 				bcopy(buf[j].out, output, count);
! 				output += count;
! 				FREE(buf[j].out, M_CRYPTO_DATA);
! 				count = 0;
! 			}
  		}
+ 	} else { /* only one result buffer present - just return that one
+ 		  * do not wast time in with malloc/free and copying things around
+ 		  */
+ 		*out = buf[0].out;
  	}
  	return result;
  
  bad:
  	*out = NULL;
! 	for (j = 0; j < i; j++)
  		FREE(buf[j].out, M_CRYPTO_DATA);
  	if (decomp)
  		inflateEnd(&zbuf);

>Unformatted: