tech-userlevel archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

(De-)Compression support for makemandb



Hi all,
attached patches allows makemandb to process compressed man pages.
All the normal formats (compress, gzip, bzip2, xz) are handled.

Joerg
Index: Makefile
===================================================================
RCS file: /home/joerg/repo/netbsd/src/usr.sbin/makemandb/Makefile,v
retrieving revision 1.1
diff -u -p -r1.1 Makefile
--- Makefile    7 Feb 2012 19:13:32 -0000       1.1
+++ Makefile    15 Feb 2012 14:20:45 -0000
@@ -22,8 +22,8 @@ CPPFLAGS+=-I${MDIST} -I${.OBJDIR}
 MDOCMLOBJDIR!= cd ${MDOCDIR}/lib/libmandoc && ${PRINTOBJDIR}
 MDOCMLLIB=     ${MDOCMLOBJDIR}/libmandoc.a
 
-DPADD.makemandb+=      ${MDOCMLLIB}
-LDADD.makemandb+=      -L${MDOCMLOBJDIR} -lmandoc
+DPADD.makemandb+=      ${MDOCMLLIB} ${LIBARCHIVE} ${LIBBZ2} ${LIBLZMA}
+LDADD.makemandb+=      -L${MDOCMLOBJDIR} -lmandoc -larchive -lbz2 -llzma
 DPADD+=                ${LIBSQLITE3} ${LIBM} ${LIBZ} ${LIBUTIL}
 LDADD+=                -lsqlite3 -lm -lz -lutil
 
Index: makemandb.c
===================================================================
RCS file: /home/joerg/repo/netbsd/src/usr.sbin/makemandb/makemandb.c,v
retrieving revision 1.2
diff -u -p -r1.2 makemandb.c
--- makemandb.c 7 Feb 2012 19:17:16 -0000       1.2
+++ makemandb.c 15 Feb 2012 15:42:06 -0000
@@ -26,6 +26,7 @@ __RCSID("$NetBSD: makemandb.c,v 1.2 2012
 #include <ctype.h>
 #include <dirent.h>
 #include <err.h>
+#include <archive.h>
 #include <md5.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -93,12 +94,13 @@ typedef struct mandb_rec {
 static void append(secbuff *sbuff, const char *src);
 static void init_secbuffs(mandb_rec *);
 static void free_secbuffs(mandb_rec *);
-static int check_md5(const char *, sqlite3 *, const char *, char **);
+static int check_md5(const char *, sqlite3 *, const char *, char **, void *, 
size_t);
 static void cleanup(mandb_rec *);
 static void set_section(const struct mdoc *, const struct man *, mandb_rec *);
 static void set_machine(const struct mdoc *, mandb_rec *);
 static int insert_into_db(sqlite3 *, mandb_rec *);
-static void begin_parse(const char *, struct mparse *, mandb_rec *);
+static void begin_parse(const char *, struct mparse *, mandb_rec *,
+                        const void *, size_t len);
 static void pmdoc_node(const struct mdoc_node *, mandb_rec *);
 static void pmdoc_Nm(const struct mdoc_node *, mandb_rec *);
 static void pmdoc_Nd(const struct mdoc_node *, mandb_rec *);
@@ -587,6 +589,59 @@ update_existing_entry(sqlite3 *db, const
        sqlite3_finalize(inner_stmt);
 }
 
+/* read_and_decompress --
+ *     Reads the given file into memory. If it is compressed, decompres
+ *     it before returning to the caller.
+ */
+static int
+read_and_decompress(const char *file, void **buf, size_t *len)
+{
+       size_t off;
+       ssize_t r;
+       struct archive *a;
+       struct archive_entry *ae;
+
+       if ((a = archive_read_new()) == NULL)
+               errx(EXIT_FAILURE, "memory allocation failed");
+
+       if (archive_read_support_compression_all(a) != ARCHIVE_OK ||
+           archive_read_support_format_raw(a) != ARCHIVE_OK ||
+           archive_read_open_filename(a, file, 65536) != ARCHIVE_OK ||
+           archive_read_next_header(a, &ae) != ARCHIVE_OK)
+               goto archive_error;
+       *len = 65536;
+       *buf = emalloc(*len);
+       off = 0;
+       for (;;) {
+               r = archive_read_data(a, (char *)*buf + off, *len - off);
+               if (r == ARCHIVE_OK) {
+                       archive_read_close(a);
+                       *len = off;
+                       return 0;
+               }
+               if (r <= 0) {
+                       free(*buf);
+                       break;
+               }
+               off += r;
+               if (off == *len) {
+                       *len *= 2;
+                       if (*len < off) {
+                               warnx("File too large: %s", file);
+                               free(*buf);
+                               archive_read_close(a);
+                               return -1;
+                       }
+                       *buf = erealloc(*buf, *len);
+               }
+       }
+
+archive_error:
+       warnx("Error while reading `%s': %s", file, archive_error_string(a));
+       archive_read_close(a);
+       return -1;
+}
+
 /* update_db --
  *     Does an incremental updation of the database by checking the file_cache.
  *     It parses and adds the pages which are present in file_cache,
@@ -601,7 +656,9 @@ update_db(sqlite3 *db, struct mparse *mp
        sqlite3_stmt *stmt = NULL;
        const char *file;
        char *errmsg = NULL;
-       char *buf = NULL;
+       char *md5sum;
+       void *buf;
+       size_t buflen;
        int new_count = 0;      /* Counter for newly indexed/updated pages */
        int total_count = 0;    /* Counter for total number of pages */
        int err_count = 0;      /* Counter for number of failed pages */
@@ -619,14 +676,21 @@ update_db(sqlite3 *db, struct mparse *mp
                errx(EXIT_FAILURE, "Could not query file cache");
        }
 
+       buf = NULL;
        while (sqlite3_step(stmt) == SQLITE_ROW) {
+               free(buf);
                total_count++;
                rec->device = sqlite3_column_int64(stmt, 0);
                rec->inode = sqlite3_column_int64(stmt, 1);
                rec->mtime = sqlite3_column_int64(stmt, 2);
                file = (const char *) sqlite3_column_text(stmt, 3);
-               md5_status = check_md5(file, db, "mandb_meta", &buf);
-               assert(buf != NULL);
+               if (read_and_decompress(file, &buf, &buflen)) {
+                       err_count++;
+                       buf = NULL;
+                       continue;
+               }
+               md5_status = check_md5(file, db, "mandb_meta", &md5sum, buf, 
buflen);
+               assert(md5sum != NULL);
                if (md5_status == -1) {
                        warnx("An error occurred in checking md5 value"
                              " for file %s", file);
@@ -642,13 +706,13 @@ update_db(sqlite3 *db, struct mparse *mp
                        struct stat sb;
                        stat(file, &sb);
                        if (S_ISLNK(sb.st_mode)) {
-                               free(buf);
+                               free(md5sum);
                                link_count++;
                                continue;
                        }
-                       update_existing_entry(db, file, buf, rec,
+                       update_existing_entry(db, file, md5sum, rec,
                            &new_count, &link_count, &err_count);
-                       free(buf);
+                       free(md5sum);
                        continue;
                }
 
@@ -660,10 +724,10 @@ update_db(sqlite3 *db, struct mparse *mp
                         */
                        if (mflags.verbosity > 1)
                                printf("Parsing: %s\n", file);
-                       rec->md5_hash = buf;
+                       rec->md5_hash = md5sum;
                        rec->file_path = estrdup(file);
                        // file_path is freed by insert_into_db itself.
-                       begin_parse(file, mp, rec);
+                       begin_parse(file, mp, rec, buf, buflen);
                        if (insert_into_db(db, rec) < 0) {
                                warnx("Error in indexing %s", file);
                                err_count++;
@@ -672,7 +736,8 @@ update_db(sqlite3 *db, struct mparse *mp
                        }
                }
        }
-       
+       free(buf);
+
        sqlite3_finalize(stmt);
        
        if (mflags.verbosity) {
@@ -711,7 +776,8 @@ update_db(sqlite3 *db, struct mparse *mp
  *  parses the man page using libmandoc
  */
 static void
-begin_parse(const char *file, struct mparse *mp, mandb_rec *rec)
+begin_parse(const char *file, struct mparse *mp, mandb_rec *rec,
+    const void *buf, size_t len)
 {
        struct mdoc *mdoc;
        struct man *man;
@@ -719,7 +785,7 @@ begin_parse(const char *file, struct mpa
 
        rec->xr_found = 0;
 
-       if (mparse_readfd(mp, -1, file) >= MANDOCLEVEL_FATAL) {
+       if (mparse_readmem(mp, buf, len, file) >= MANDOCLEVEL_FATAL) {
                warnx("%s: Parse failure", file);
                return;
        }
@@ -1675,7 +1741,8 @@ insert_into_db(sqlite3 *db, mandb_rec *r
  *  1: If the hash exists in the database.
  */
 static int
-check_md5(const char *file, sqlite3 *db, const char *table, char **buf)
+check_md5(const char *file, sqlite3 *db, const char *table, char **md5sum,
+    void *buf, size_t buflen)
 {
        int rc = 0;
        int idx = -1;
@@ -1683,8 +1750,8 @@ check_md5(const char *file, sqlite3 *db,
        sqlite3_stmt *stmt = NULL;
 
        assert(file != NULL);
-       *buf = MD5File(file, NULL);
-       if (*buf == NULL) {
+       *md5sum = MD5Data(buf, buflen, NULL);
+       if (*md5sum == NULL) {
                warn("md5 failed: %s", file);
                return -1;
        }
@@ -1694,19 +1761,19 @@ check_md5(const char *file, sqlite3 *db,
        rc = sqlite3_prepare_v2(db, sqlstr, -1, &stmt, NULL);
        if (rc != SQLITE_OK) {
                free(sqlstr);
-               free(*buf);
-               *buf = NULL;
+               free(*md5sum);
+               *md5sum = NULL;
                return -1;
        }
 
        idx = sqlite3_bind_parameter_index(stmt, ":md5_hash");
-       rc = sqlite3_bind_text(stmt, idx, *buf, -1, NULL);
+       rc = sqlite3_bind_text(stmt, idx, *md5sum, -1, NULL);
        if (rc != SQLITE_OK) {
                warnx("%s", sqlite3_errmsg(db));
                sqlite3_finalize(stmt);
                free(sqlstr);
-               free(*buf);
-               *buf = NULL;
+               free(*md5sum);
+               *md5sum = NULL;
                return -1;
        }
 
Index: mandoc.h
===================================================================
RCS file: /home/joerg/repo/netbsd/src/external/bsd/mdocml/dist/mandoc.h,v
retrieving revision 1.1.1.9
diff -u -p -r1.1.1.9 mandoc.h
--- mandoc.h    30 Jan 2012 16:44:19 -0000      1.1.1.9
+++ mandoc.h    15 Feb 2012 13:54:00 -0000
@@ -418,6 +418,8 @@ struct mparse        *mparse_alloc(enum mparse
 void             mparse_free(struct mparse *);
 void             mparse_keep(struct mparse *);
 enum mandoclevel  mparse_readfd(struct mparse *, int, const char *);
+enum mandoclevel  mparse_readmem(struct mparse *, const void *, size_t,
+                       const char *);
 void             mparse_reset(struct mparse *);
 void             mparse_result(struct mparse *, 
                        struct mdoc **, struct man **);
Index: read.c
===================================================================
RCS file: /home/joerg/repo/netbsd/src/external/bsd/mdocml/dist/read.c,v
retrieving revision 1.5
diff -u -p -r1.5 read.c
--- read.c      6 Feb 2012 10:42:44 -0000       1.5
+++ read.c      15 Feb 2012 13:55:55 -0000
@@ -28,6 +28,7 @@
 #include <ctype.h>
 #include <fcntl.h>
 #include <stdarg.h>
+#include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -37,6 +38,7 @@
 #include "libmandoc.h"
 #include "mdoc.h"
 #include "man.h"
+#include "main.h"
 
 #ifndef MAP_FILE
 #define        MAP_FILE        0
@@ -70,7 +72,6 @@ static        void      resize_buf(struct buf *, s
 static void      mparse_buf_r(struct mparse *, struct buf, int);
 static void      mparse_readfd_r(struct mparse *, int, const char *, int);
 static void      pset(const char *, int, struct mparse *);
-static void      pdesc(struct mparse *, const char *, int);
 static int       read_whole_file(const char *, int, struct buf *, int *);
 static void      mparse_end(struct mparse *);
 
@@ -547,38 +548,6 @@ rerun:
        free(ln.buf);
 }
 
-static void
-pdesc(struct mparse *curp, const char *file, int fd)
-{
-       struct buf       blk;
-       int              with_mmap;
-
-       /*
-        * Run for each opened file; may be called more than once for
-        * each full parse sequence if the opened file is nested (i.e.,
-        * from `so').  Simply sucks in the whole file and moves into
-        * the parse phase for the file.
-        */
-
-       if ( ! read_whole_file(file, fd, &blk, &with_mmap)) {
-               curp->file_status = MANDOCLEVEL_SYSERR;
-               return;
-       }
-
-       /* Line number is per-file. */
-
-       curp->line = 1;
-
-       mparse_buf_r(curp, blk, 1);
-
-#ifdef HAVE_MMAP
-       if (with_mmap)
-               munmap(blk.buf, blk.sz);
-       else
-#endif
-               free(blk.buf);
-}
-
 static int
 read_whole_file(const char *file, int fd, struct buf *fb, int *with_mmap)
 {
@@ -674,29 +643,72 @@ mparse_end(struct mparse *curp)
 }
 
 static void
-mparse_readfd_r(struct mparse *curp, int fd, const char *file, int re)
+mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file,
+               int re)
 {
        const char      *svfile;
 
+       /* Line number is per-file. */
+       svfile = curp->file;
+       curp->file = file;
+       curp->line = 1;
+
+       mparse_buf_r(curp, blk, 1);
+
+       if (0 == re && MANDOCLEVEL_FATAL > curp->file_status)
+               mparse_end(curp);
+
+       curp->file = svfile;
+}
+
+enum mandoclevel
+mparse_readmem(struct mparse *curp, const void *buf, size_t len,
+               const char *file)
+{
+       struct buf blk;
+
+       blk.buf = UNCONST(buf);
+       blk.sz = len;
+
+       mparse_parse_buffer(curp, blk, file, 0);
+       return(curp->file_status);
+}
+
+static void
+mparse_readfd_r(struct mparse *curp, int fd, const char *file, int re)
+{
+       struct buf       blk;
+       int              with_mmap;
+
        if (-1 == fd)
                if (-1 == (fd = open(file, O_RDONLY, 0))) {
                        perror(file);
                        curp->file_status = MANDOCLEVEL_SYSERR;
                        return;
                }
+       /*
+        * Run for each opened file; may be called more than once for
+        * each full parse sequence if the opened file is nested (i.e.,
+        * from `so').  Simply sucks in the whole file and moves into
+        * the parse phase for the file.
+        */
 
-       svfile = curp->file;
-       curp->file = file;
+       if ( ! read_whole_file(file, fd, &blk, &with_mmap)) {
+               curp->file_status = MANDOCLEVEL_SYSERR;
+               return;
+       }
 
-       pdesc(curp, file, fd);
+       mparse_parse_buffer(curp, blk, file, re);
 
-       if (0 == re && MANDOCLEVEL_FATAL > curp->file_status)
-               mparse_end(curp);
+#ifdef HAVE_MMAP
+       if (with_mmap)
+               munmap(blk.buf, blk.sz);
+       else
+#endif
+               free(blk.buf);
 
        if (STDIN_FILENO != fd && -1 == close(fd))
                perror(file);
-
-       curp->file = svfile;
 }
 
 enum mandoclevel


Home | Main Index | Thread Index | Old Index