tech-userlevel archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

RFC: Constant Database Support



Hi all,
attached is an implement of a constant database based on a minimal
perfect hash table. This is a very compact read-only database. Two
special design constraint is that more than one key can exist for a
value and that the key itself is not stored by the database. Most users
already do that one way or the other in the data, so it is just
redundant and with a proper content format not even cheaper.

A patch to modify services_mkdb for using the new format is attached as
well to demonstrate the size difference. For this use case, the
database size drops from 2.1MB (disk space, 4.2MB file size) to 320KB
(disk space, 306KB file size). Creation time drops from 1.9s to 0.23s
on AMD64.

I would like to integrate this into libc for the various purposes at
hand.

Joerg

Attachment: cdb.tar.gz
Description: application/tar-gz

Index: Makefile
===================================================================
RCS file: /home/joerg/repo/netbsd/src/usr.sbin/services_mkdb/Makefile,v
retrieving revision 1.6
diff -u -p -r1.6 Makefile
--- Makefile    22 Apr 2009 15:23:08 -0000      1.6
+++ Makefile    3 Mar 2010 23:55:00 -0000
@@ -4,7 +4,7 @@ PROG=   services_mkdb
 MAN=   services_mkdb.8
 SRCS=  services_mkdb.c uniq.c
 
-LDADD+=        -lutil
+LDADD+=        -lutil -lcdbw
 DPADD+=        ${LIBUTIL}
 #COPTS+=-g
 
Index: services_mkdb.c
===================================================================
RCS file: /home/joerg/repo/netbsd/src/usr.sbin/services_mkdb/services_mkdb.c,v
retrieving revision 1.14
diff -u -p -r1.14 services_mkdb.c
--- services_mkdb.c     28 Apr 2008 20:24:17 -0000      1.14
+++ services_mkdb.c     3 Mar 2010 23:54:31 -0000
@@ -37,7 +37,7 @@ __RCSID("$NetBSD: services_mkdb.c,v 1.14
 #include <sys/param.h>
 
 #include <assert.h>
-#include <db.h>
+#include <cdbw.h>
 #include <err.h>
 #include <fcntl.h>
 #include <netdb.h>
@@ -57,31 +57,21 @@ static char tname[MAXPATHLEN];
 
 extern void    uniq(const char *);
 
-static void    add(DB *, StringList *, size_t, const char *, size_t *, int);
+static void    add(StringList *, size_t, const char *, size_t *, int);
 static StringList ***parseservices(const char *, StringList *);
 static void    cleanup(void);
-static void    store(DB *, DBT *, DBT *, int);
-static void    killproto(DBT *);
 static char    *getstring(const char *, size_t, char **, const char *);
 static size_t  getprotoindex(StringList *, const char *);
 static const char *getprotostr(StringList *, size_t);
 static const char *mkaliases(StringList *, char *, size_t);
 static void    usage(void) __dead;
 
-const HASHINFO hinfo = {
-       .bsize = 256,
-       .ffactor = 4,
-       .nelem = 32768,
-       .cachesize = 1024,
-       .hash = NULL,
-       .lorder = 0
-};
-
+struct cdbw *cdbw;
 
 int
 main(int argc, char *argv[])
 {
-       DB      *db;
+       FILE    *output;
        int      ch;
        const char *fname = _PATH_SERVICES;
        const char *dbname = _PATH_SERVICES_DB;
@@ -129,11 +119,13 @@ main(int argc, char *argv[])
                err(1, "Cannot install exit handler");
 
        (void)snprintf(tname, sizeof(tname), "%s.tmp", dbname);
-       db = dbopen(tname, O_RDWR | O_CREAT | O_EXCL,
-           (S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH), DB_HASH, &hinfo);
-       if (!db)
+       output = fopen(tname, "w");
+       if (output == NULL)
                err(1, "Error opening temporary database `%s'", tname);
 
+       cdbw = cdbw_open();
+       if (cdbw == NULL)
+               err(1, "Error opening cdb writer");
 
        for (port = 0; port < PMASK + 1; port++) {
                if (svc[port] == NULL)
@@ -143,7 +135,7 @@ main(int argc, char *argv[])
                        StringList *s;
                        if ((s = svc[port][proto]) == NULL)
                                continue;
-                       add(db, s, port, getprotostr(sl, proto), &cnt, warndup);
+                       add(s, port, getprotostr(sl, proto), &cnt, warndup);
                }
 
                free(svc[port]);
@@ -152,7 +144,13 @@ main(int argc, char *argv[])
        free(svc);
        sl_free(sl, 1);
 
-       if ((db->close)(db))
+       if (cdbw_output(cdbw, output, NULL)) {
+               err(1, "Error computing temporary database `%s'", tname);
+       }
+
+       cdbw_close(cdbw);
+
+       if (fclose(output))
                err(1, "Error closing temporary database `%s'", tname);
 
        if (rename(tname, dbname) == -1)
@@ -162,14 +160,14 @@ main(int argc, char *argv[])
 }
 
 static void
-add(DB *db, StringList *sl, size_t port, const char *proto, size_t *cnt,
+add(StringList *sl, size_t port, const char *proto, size_t *cnt,
     int warndup)
 {
+       char abuf[BUFSIZ];
+       char *data, *key;
+       size_t datalen, keylen;
+       uint32_t idx;
        size_t i;
-       char     keyb[BUFSIZ], datab[BUFSIZ], abuf[BUFSIZ];
-       DBT      data, key;
-       key.data = keyb;
-       data.data = datab;
 
 #ifdef DEBUG
        (void)printf("add %s %zu %s [ ", sl->sl_str[0], port, proto);
@@ -178,32 +176,31 @@ add(DB *db, StringList *sl, size_t port,
        (void)printf("]\n");
 #endif
 
-       /* key `indirect key', data `full line' */
-       data.size = snprintf(datab, sizeof(datab), "%zu", (*cnt)++) + 1;
-       key.size = snprintf(keyb, sizeof(keyb), "%s %zu/%s %s",
-           sl->sl_str[0], port, proto, mkaliases(sl, abuf, sizeof(abuf))) + 1;
-       store(db, &data, &key, warndup);
-
-       /* key `\377port/proto', data = `indirect key' */
-       key.size = snprintf(keyb, sizeof(keyb), "\377%zu/%s",
-           port, proto) + 1;
-       store(db, &key, &data, warndup);
-
-       /* key `\377port', data = `indirect key' */
-       killproto(&key);
-       store(db, &key, &data, warndup);
+       datalen = asprintf(&data, "%s %zu/%s %s",
+           sl->sl_str[0], port, proto, mkaliases(sl, abuf, sizeof(abuf)));
+
+       if (cdbw_put_data(cdbw, data, datalen, &idx))
+               err(1, "cdbw_put_data failed");
+
+       keylen = asprintf(&key, "\377%zu/%s", port, proto);
+       if (cdbw_put_key(cdbw, key, keylen, idx) && warndup)
+               warnx("Duplicate key: `%s'", key);
+
+       keylen = asprintf(&key, "\377%zu", port);
+       if (cdbw_put_key(cdbw, key, keylen, idx) && warndup)
+               warnx("Duplicate key: `%s'", key);
 
        /* add references for service and all aliases */
        for (i = 0; i < sl->sl_cur; i++) {
-               /* key `\376service/proto', data = `indirect key' */
-               key.size = snprintf(keyb, sizeof(keyb), "\376%s/%s",
-                   sl->sl_str[i], proto) + 1;
-               store(db, &key, &data, warndup);
-
-               /* key `\376service', data = `indirect key' */
-               killproto(&key);
-               store(db, &key, &data, warndup);
+               keylen = asprintf(&key, "\376%s/%s", sl->sl_str[i], proto);
+               if (cdbw_put_key(cdbw, key, keylen, idx) && warndup)
+                       warnx("Duplicate key: `%s'", key);
+
+               keylen = asprintf(&key, "\376%s", sl->sl_str[i]);
+               if (cdbw_put_key(cdbw, key, keylen, idx) && warndup)
+                       warnx("Duplicate key: `%s'", key);
        }
+
        sl_free(sl, 1);
 }
 
@@ -318,44 +315,6 @@ getstring(const char *fname, size_t line
        return str;
 }
 
-static void
-killproto(DBT *key)
-{
-       char *p, *d = key->data;
-
-       if ((p = strchr(d, '/')) == NULL)
-               abort();
-       *p++ = '\0';
-       key->size = p - d;
-}
-
-static void
-store(DB *db, DBT *key, DBT *data, int warndup)
-{
-#ifdef DEBUG
-       int k = key->size - 1;
-       int d = data->size - 1;
-       (void)printf("store [%*.*s] [%*.*s]\n",
-               k, k, (char *)key->data + 1,
-               d, d, (char *)data->data + 1);
-#endif
-       switch ((db->put)(db, key, data, R_NOOVERWRITE)) {
-       case 0:
-               break;
-       case 1:
-               if (warndup)
-                       warnx("duplicate service `%s'",
-                           &((char *)key->data)[1]);
-               break;
-       case -1:
-               err(1, "put");
-               break;
-       default:
-               abort();
-               break;
-       }
-}
-
 static size_t
 getprotoindex(StringList *sl, const char *str)
 {
Index: uniq.c
===================================================================
RCS file: /home/joerg/repo/netbsd/src/usr.sbin/services_mkdb/uniq.c,v
retrieving revision 1.4
diff -u -p -r1.4 uniq.c
--- uniq.c      28 Apr 2008 20:24:17 -0000      1.4
+++ uniq.c      3 Mar 2010 23:55:21 -0000
@@ -40,7 +40,14 @@ __RCSID("$NetBSD: uniq.c,v 1.4 2008/04/2
 #include <ctype.h>
 #include <fcntl.h>
 
-extern const HASHINFO hinfo;
+static const HASHINFO hinfo = {
+       .bsize = 256,
+       .ffactor = 4,
+       .nelem = 32768,
+       .cachesize = 1024,
+       .hash = NULL,
+       .lorder = 0
+};
 
 void uniq(const char *);
 static int comp(const char *, char **, size_t *);


Home | Main Index | Thread Index | Old Index