Subject: proposed
To: None <tech-kern@netbsd.org>
From: Luke Mewburn <lukem@wasabisystems.com>
List: tech-kern
Date: 11/30/2001 17:57:08
--IMjqdzrDRly81ofr
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

after the long discussions on tech-{kern,perform}@netbsd.org about
hash functions, here's a proposed <sys/hash.h>.

some notes:

- we provide two types of hash functions
	- binary blobs:
		hash32_buf()	given size
	- ascii strings:
		hash32_str()	NUL terminated
		hash32_strn()	NUL terminated or up to a given size
				(same hash implementation has hash32_str())

- it's ok if the hash functions for binary and ascii are different,
  but the different ascii variants must return the same result on the
  same data & number of bytes.

- we provide the ability to provide an MD <machine/hash.h> if
  <machine/types.h> (pulled in by <sys/types.h>) defines
  __HAVE_MACHINE_HASH_H which may provide MD overrides of one
  or both types of the hash functions

- the initial implementations use:
	- binary: "nemesi" (k=257, result hash*257)
	- ascii: "perl" (k=33, result hash+hash/32)
  based on various test results provided by ozan yigit, simon burge
  and others

- each of the functions has an initial hash value, to allow incremental
  hashes (suggested by Peter Wemm). e.g,
	hash = hash32_buf(cnp->cn_nameptr, cnp->cn_namelen, HASH32_BUF_INIT);
	hash = hash32_buf(&dvp->v_id, sizeof(dvp->v_id), hash);


here's <sys/hash.h>, and a test program that tests if hash32_strn() DTRT.
the results of that test program on my i386 system are:
	str  brendan = 913f6f2d
	strn brend (5), = fa605f7
	strn brenda (6), = fc66c54b
	strn brendan (7), = 913f6f2d
	strn brendan (8), = 913f6f2d
	strn brendan (9), = 913f6f2d
	buf  brendan = d80aa6df
	str  brenda = fc66c54b

luke.

--IMjqdzrDRly81ofr
Content-Type: text/plain; charset=us-ascii
Content-Description: sys/hash.h
Content-Disposition: attachment; filename="hash.h"

/*	$NetBSD$	*/

#ifndef	_SYS_HASH_H_
#define	_SYS_HASH_H_

#include <sys/types.h>
#ifdef __HAVE_MACHINE_HASH_H
#include <machine/hash.h>
#endif


#ifndef __HAVE_HASH32_BUF			/* not overridden by MD hash */

#define	HASH32_BUF_INIT	5381

/*
 * uint32_t
 * hash32_buf(const void *buf, size_t len, uint32_t hash)
 *	return a 32 bit hash of the binary buffer buf (size len),
 *	seeded with an initial hash value of hash (usually HASH32_BUF_INIT).
 */
static __inline uint32_t
hash32_buf(const void *buf, size_t len, uint32_t hash)
{
	const uint8_t *s = buf;

	while (len-- != 0)			/* "nemesi": k=257, r*257 */
		hash = hash * 257 + *s++;
	return (hash * 257);
}
#endif	/* __HAVE_HASH32_BUF */


#ifndef __HAVE_HASH32_STR			/* not overridden by MD hash */

#define	HASH32_STR_INIT	5381
/*
 * uint32_t
 * hash32_str(const u_char *buf, uint32_t hash)
 *	return a 32 bit hash of NUL terminated ASCII string buf,
 *	seeded with an initial hash value of hash (usually HASH32_STR_INIT).
 */
static __inline uint32_t
hash32_str(const u_char *buf, uint32_t hash)
{
	const uint8_t *s = buf;
	uint8_t	c;

	while ((c = *s++) != 0)
		hash = hash * 33 + c;		/* "perl": k=33, r+r/32 */
	return (hash + (hash >> 5));
}

/*
 * uint32_t
 * hash32_strn(const u_char *buf, size_t len, uint32_t hash)
 *	return a 32 bit hash of NUL terminated ASCII string buf up to
 *	a maximum of len bytes,
 *	seeded with an initial hash value of hash (usually HASH32_STR_INIT).
 */
static __inline uint32_t
hash32_strn(const u_char *buf, size_t len, uint32_t hash)
{
	const uint8_t	*s = buf;
	uint8_t	c;

	while ((c = *s++) != 0 && len-- != 0)
		hash = hash * 33 + c;		/* "perl": k=33, r+r/32 */
	return (hash + (hash >> 5));
}
#endif	/* __HAVE_HASH32_STR */


#endif	/* !_SYS_HASH_H_ */

--IMjqdzrDRly81ofr
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="hashtest.c"

#include <sys/hash.h>

#include <stdio.h>

int
main(int argc, char *argv[])
{
	char str[] = "brendan";
	int i;

	printf("str  %s = %x\n", str, hash32_str(str, HASH32_STR_INIT));
	for (i = -3; i < 2; i++)
		printf("strn %.*s (%d), = %x\n",
		    sizeof(str)+i, str, sizeof(str)+i,
		    hash32_strn(str, sizeof(str)+i, HASH32_STR_INIT));
	printf("buf  %s = %x\n", str,
	    hash32_buf(str, sizeof(str)-1, HASH32_STR_INIT));
	str[sizeof(str)-2] = '\0';
	printf("str  %s = %x\n", str, hash32_str(str, HASH32_STR_INIT));
}

--IMjqdzrDRly81ofr--