tech-pkg archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Incremental pbulk-scan patch for testing



Hi all,
attached is a preliminary patch that allows pbulk-scan to reuse the
results of the last run, if the files relevant for a directory haven't
changed. It hasn't received much testing yet and it is a bit rough at
the edges.

Joerg
Index: mk/pbulk/pbulk-index.mk
===================================================================
RCS file: /home/joerg/repo/netbsd/pkgsrc/mk/pbulk/pbulk-index.mk,v
retrieving revision 1.14
diff -u -p -r1.14 pbulk-index.mk
--- mk/pbulk/pbulk-index.mk     12 Nov 2011 15:21:53 -0000      1.14
+++ mk/pbulk/pbulk-index.mk     16 Nov 2012 00:25:19 -0000
@@ -102,6 +102,7 @@ pbulk-index-item:
        @echo "USE_DESTDIR="${_USE_DESTDIR:Q}
        @echo "BOOTSTRAP_PKG="${BOOTSTRAP_PKG}
        @echo "USERGROUP_PHASE="${USERGROUP_PHASE:Q}
+       @echo "SCAN_DEPENDS="${.MAKE.MAKEFILES:Q}
 .if defined(_PBULK_MULTI_NEEDED)
        @printf "MULTI_VERSION="
 .for _t in ${_PBULK_MULTI_NEEDED}
Index: pkgtools/pbulk/files/pbulk/lib/alloc.c
===================================================================
RCS file: 
/home/joerg/repo/netbsd/pkgsrc/pkgtools/pbulk/files/pbulk/lib/alloc.c,v
retrieving revision 1.2
diff -u -p -r1.2 alloc.c
--- pkgtools/pbulk/files/pbulk/lib/alloc.c      25 Jun 2007 21:38:43 -0000      
1.2
+++ pkgtools/pbulk/files/pbulk/lib/alloc.c      16 Nov 2012 00:25:19 -0000
@@ -95,3 +95,23 @@ xstrndup(const char *str, size_t len)
 
        return buf;
 }
+
+size_t
+djb_hash(const char *s)
+{
+       size_t h = 5381;
+
+       while (*s)
+               h = h * 33 + (size_t)(unsigned char)*s++;
+       return h;
+}
+
+size_t
+djb_hash2(const char *s, const char *e)
+{
+       size_t h = 5381;
+
+       while (*s && s < e)
+               h = h * 33 + (size_t)(unsigned char)*s++;
+       return h;
+}
Index: pkgtools/pbulk/files/pbulk/lib/pbulk.h
===================================================================
RCS file: 
/home/joerg/repo/netbsd/pkgsrc/pkgtools/pbulk/files/pbulk/lib/pbulk.h,v
retrieving revision 1.3
diff -u -p -r1.3 pbulk.h
--- pkgtools/pbulk/files/pbulk/lib/pbulk.h      31 Jan 2009 23:25:38 -0000      
1.3
+++ pkgtools/pbulk/files/pbulk/lib/pbulk.h      16 Nov 2012 00:25:19 -0000
@@ -91,3 +91,6 @@ char          *xstrndup(const char *, size_t);
 
 int             pkg_match(const char *, const char *);
 const char     *pkg_order(const char *, const char *);
+
+size_t          djb_hash(const char *);
+size_t          djb_hash2(const char *, const char *);
Index: pkgtools/pbulk/files/pbulk/pbuild/jobs.c
===================================================================
RCS file: 
/home/joerg/repo/netbsd/pkgsrc/pkgtools/pbulk/files/pbulk/pbuild/jobs.c,v
retrieving revision 1.13
diff -u -p -r1.13 jobs.c
--- pkgtools/pbulk/files/pbulk/pbuild/jobs.c    27 Nov 2011 19:53:30 -0000      
1.13
+++ pkgtools/pbulk/files/pbulk/pbuild/jobs.c    16 Nov 2012 00:25:19 -0000
@@ -536,10 +536,7 @@ finish_build(const char *report_file)
 static size_t
 hash_item(const char *s, size_t len)
 {
-       size_t h = 5381;
-       while (len--)
-               h = h * 33 + *s++;
-       return h & (HASH_SIZE - 1);
+       return djb_hash2(s, s + len) % HASH_SIZE;
 }
 
 static struct buildhash hash_table[HASH_SIZE];
Index: pkgtools/pbulk/files/pbulk/pscan/jobs.c
===================================================================
RCS file: 
/home/joerg/repo/netbsd/pkgsrc/pkgtools/pbulk/files/pbulk/pscan/jobs.c,v
retrieving revision 1.5
diff -u -p -r1.5 jobs.c
--- pkgtools/pbulk/files/pbulk/pscan/jobs.c     15 Jan 2008 22:14:30 -0000      
1.5
+++ pkgtools/pbulk/files/pbulk/pscan/jobs.c     16 Nov 2012 00:25:19 -0000
@@ -46,6 +46,218 @@
 
 #define UNCONST(x) ((void *)(uintptr_t)(x))
 
+#define        STAT_HASH_SIZE  131072
+#define        HASH_SIZE 32768
+
+struct stat_cache {
+       char *path;
+       time_t mtime;
+       SLIST_ENTRY(stat_cache) hash_link;
+};
+SLIST_HEAD(stat_cache_hash, stat_cache);
+
+static struct stat_cache_hash stat_hash_table[STAT_HASH_SIZE];
+
+static time_t
+stat_path(const char *path)
+{
+       size_t val = djb_hash(path) % STAT_HASH_SIZE;
+       struct stat_cache_hash *h = &stat_hash_table[val];
+       struct stat_cache *e;
+       struct stat sb;
+
+       SLIST_FOREACH(e, h, hash_link) {
+               if (strcmp(path, e->path) == 0)
+                       return e->mtime;
+       }
+       e = xmalloc(sizeof(*e));
+       e->path = xstrdup(path);
+       if (stat(path, &sb) == -1)
+               e->mtime = -1;
+       else
+               e->mtime = sb.st_mtime;
+       SLIST_INSERT_HEAD(h, e, hash_link);
+       return e->mtime;
+}
+
+struct scan_entry {
+       char *location;
+       char *data;
+       char *scan_depends;
+       SLIST_ENTRY(scan_entry) hash_link;
+};
+SLIST_HEAD(scan_entry_hash, scan_entry);
+
+static struct scan_entry_hash hash_table[HASH_SIZE];
+static time_t scan_mtime;
+
+static size_t
+hash_entry(const char *path)
+{
+       return djb_hash(path) % HASH_SIZE;
+}
+
+static size_t
+hash_entry2(const char *path, const char *path_end)
+{
+
+       return djb_hash2(path, path_end) % HASH_SIZE;
+}
+
+static void
+add_entry(const char *l_start, const char *l_end,
+         const char *s_start, const char *s_end,
+          const char *d_start, const char *d_end)
+{
+       struct scan_entry *e;
+       struct scan_entry_hash *h;
+
+       if (l_start == l_end)
+               errx(1, "Location entry missing");
+
+       h = &hash_table[hash_entry2(l_start, l_end)];
+       SLIST_FOREACH(e, h, hash_link) {
+               if (strncmp(e->location, l_start, l_end - l_start) == 0 &&
+                   e->location[l_end - l_start] == '\0') {
+                       size_t l1, l2, l3;
+                       l1 = strlen(e->data);
+                       l2 = d_end - d_start;
+                       l_start -= 13;
+                       ++l_end;
+                       l3 = l_start - d_start;
+                       e->data = xrealloc(e->data, l1 + l2 + 1);
+                       memcpy(e->data + l1, d_start, l3);
+                       memcpy(e->data + l1 + l3, l_end, d_end - l_end);
+                       e->data[l1 + l3 + d_end - l_end] = '\0';
+                       return;
+               }
+       }
+       e = xmalloc(sizeof(*e));
+       e->location = xstrndup(l_start, l_end - l_start);
+       e->data = xmalloc(d_end - d_start + 1);
+       l_start -= 13;
+       ++l_end;
+       memcpy(e->data, d_start, l_start - d_start);
+       memcpy(e->data + (l_start - d_start), l_end, d_end - l_end);
+       e->data[l_start - d_start + d_end - l_end] = '\0';
+
+       if (s_start != s_end)
+               e->scan_depends = xstrndup(s_start, s_end - s_start);
+       else
+               e->scan_depends = NULL;
+       SLIST_INSERT_HEAD(h, e, hash_link);
+}
+
+void
+read_old_scan(const char *path)
+{
+       size_t i;
+       int fd;
+       char *buf;
+       struct stat sb;
+       const char *entry_start;
+       const char *l_start, *l_end;
+       const char *s_start, *s_end;
+       const char *line, *eol;
+
+       for (i = 0; i < HASH_SIZE; ++i)
+               SLIST_INIT(&hash_table[i]);
+
+       if (path == NULL)
+               return;
+       if ((fd = open(path, O_RDONLY)) == -1)
+               return;
+       if (fstat(fd, &sb) == -1) {
+               close(fd);
+               return;
+       }
+       scan_mtime = sb.st_mtime;
+       buf = read_from_file(fd);
+       entry_start = buf;
+       l_start = l_end = NULL;
+       entry_start = buf;
+       for (line = buf; *line; line = eol) {
+               eol = strchr(line, '\n');
+               if (eol == NULL)
+                       errx(1, "Incomplete old scan");
+               ++eol;
+               if (strncmp(line, "PKGNAME=", 8) == 0) {
+                       if (line == buf)
+                               continue;
+                       add_entry(l_start, l_end, s_start, s_end,
+                           entry_start, line);
+                       l_start = l_end = NULL;
+                       entry_start = line;
+               } else if (strncmp(line, "PKG_LOCATION=", 13) == 0) {
+                       l_start = line + 13;
+                       l_end = eol - 1;
+               } else if (strncmp(line, "SCAN_DEPENDS=", 13) == 0) {
+                       s_start = line + 13;
+                       s_end = eol - 1;
+               }
+       }
+       if (entry_start != line)
+               add_entry(l_start, l_end, s_start, s_end,
+                   entry_start, line);
+}
+
+static struct scan_entry *
+find_old_scan(const char *location)
+{
+       struct scan_entry *e;
+       char *dep, *dep2, *path, *fullpath;
+       int is_current;
+       time_t mtime;
+
+       e = SLIST_FIRST(&hash_table[hash_entry(location)]);
+       while (e) {
+               if (strcmp(e->location, location) == 0)
+                       break;
+               e = SLIST_NEXT(e, hash_link);
+       }
+       if (e == NULL)
+               return NULL;
+
+       if (e->scan_depends == NULL)
+               return e;
+
+       is_current = 1;
+       dep2 = dep = xstrdup(e->scan_depends);
+       while ((path = strtok(dep, " ")) != NULL) {
+               dep = NULL;
+               if (*path == '\0')
+                       continue;
+               if (*path == '/') {
+                       mtime = stat_path(path);
+                       if (mtime == -1 || mtime >= scan_mtime) {
+                               is_current = 0;
+                               break;
+                       }
+                       continue;
+               }
+               if (strncmp("../../", path, 6) == 0) {
+                       const char *s1 = strrchr(location, '/');
+                       const char *s2 = strchr(location, '/');
+                       if (s1 == s2)
+                               fullpath = xasprintf("%s/%s", pkgsrc_tree,
+                                   path + 6);
+                       else
+                               fullpath = xasprintf("%s/%s/%s", pkgsrc_tree,
+                                   location, path);
+               } else {
+                       fullpath = xasprintf("%s/%s/%s", pkgsrc_tree,
+                           location, path);
+               }
+               mtime = stat_path(fullpath);
+               if (mtime == -1 || mtime >= scan_mtime) {
+                       is_current = 0;
+                       break;
+               }
+       }
+       free(dep2);
+       return is_current ? e : NULL;
+}
+
 static struct scan_job *jobs;
 static size_t len_jobs, allocated_jobs, first_undone_job, done_jobs;
 
@@ -86,12 +298,21 @@ struct scan_job *
 get_job(void)
 {
        size_t i;
+       struct scan_entry *e;
+       struct scan_job * job;
 
        for (i = first_undone_job; i < len_jobs; ++i) {
-               if (jobs[i].state == JOB_OPEN) {
-                       jobs[i].state = JOB_IN_PROCESSING;
-                       return &jobs[i];
+               job = &jobs[i];
+               if (job->state != JOB_OPEN)
+                       continue;
+               e = find_old_scan(job->pkg_location);
+               if (e == NULL) {
+                       job->state = JOB_IN_PROCESSING;
+                       return job;
                }
+               job->scan_output = xstrdup(e->data);
+               process_job(job, JOB_DONE);
+               i = first_undone_job - 1;
        }
 
        return NULL;
@@ -139,8 +360,7 @@ pkgname_dup(const char *line)
        return xstrndup(pkgname, pkgname_len);
 }
 
-#define        HASH_SIZE 1024
-#define        HASH_ITEM(x) (((unsigned char)(x)[0] + (unsigned char)(x)[1] * 
257) & (HASH_SIZE - 1))
+#define        HASH_ITEM(x) (djb_hash(x) % HASH_SIZE)
 
 static struct pkgname_hash *pkgname_hash[HASH_SIZE];
 
Index: pkgtools/pbulk/files/pbulk/pscan/pscan.c
===================================================================
RCS file: 
/home/joerg/repo/netbsd/pkgsrc/pkgtools/pbulk/files/pbulk/pscan/pscan.c,v
retrieving revision 1.7
diff -u -p -r1.7 pscan.c
--- pkgtools/pbulk/files/pbulk/pscan/pscan.c    6 Mar 2011 02:23:32 -0000       
1.7
+++ pkgtools/pbulk/files/pbulk/pscan/pscan.c    16 Nov 2012 00:25:19 -0000
@@ -33,11 +33,13 @@
 
 #include <nbcompat.h>
 
+#include <sys/stat.h>
 #include <sys/uio.h>
 #include <nbcompat/err.h>
 #ifdef HAVE_INTTYPES_H
 #include <inttypes.h>
 #endif
+#include <fcntl.h>
 #include <nbcompat/limits.h>
 #include <signal.h>
 #include <nbcompat/stdio.h>
@@ -52,7 +54,7 @@ int verbosity;
 
 static const char *bmake_path;
 static const char *output_file;
-static const char *pkgsrc_tree;
+const char *pkgsrc_tree;
 
 static void    find_full_tree(void);
 static void    read_limited_list(void);
@@ -62,14 +64,16 @@ static void
 usage(void)
 {
        (void)fprintf(stderr, "usage: pbulk-scan -c <master> [ -v ] -M <make> 
<pkgsrc tree>\n");
-       (void)fprintf(stderr, "usage: pbulk-scan [ -I <start> ] [ -l ] [ -v ] [ 
-m <port> ] -M <make> <pksgrc tree> <output file>\n");
+       (void)fprintf(stderr, "usage: pbulk-scan [ -I <start> ] [ -L <old scan> 
] [ -l ] [ -v ]\n"
+                             "                  [ -m <port> ] -M <make> 
<pksgrc tree> <output file>\n");
        exit(1);
 }
 
 int
 main(int argc, char **argv)
 {
-       const char *client_port = NULL, *master_port = NULL, *start_script = 
NULL;
+       const char *client_port = NULL, *last_scan = NULL, *master_port = NULL;
+       const char *start_script = NULL;
        int ch, limited_scan;
        struct sigaction sa;
 
@@ -77,7 +81,7 @@ main(int argc, char **argv)
 
        limited_scan = 0;
 
-       while ((ch = getopt(argc, argv, "I:M:lc:m:v")) != -1) {
+       while ((ch = getopt(argc, argv, "I:M:L:lc:m:v")) != -1) {
                switch (ch) {
                case 'I':
                        start_script = optarg;
@@ -85,6 +89,9 @@ main(int argc, char **argv)
                case 'c':
                        client_port = optarg;
                        break;
+               case 'L':
+                       last_scan = optarg;
+                       break;
                case 'l':
                        limited_scan = 1;
                        break;
@@ -121,6 +128,9 @@ main(int argc, char **argv)
                usage();
        }
 
+       if (client_port == NULL)
+               read_old_scan(last_scan);
+
        if (client_port) {
                if (limited_scan != 0 || argc != 1)
                        usage();
Index: pkgtools/pbulk/files/pbulk/pscan/pscan.h
===================================================================
RCS file: 
/home/joerg/repo/netbsd/pkgsrc/pkgtools/pbulk/files/pbulk/pscan/pscan.h,v
retrieving revision 1.2
diff -u -p -r1.2 pscan.h
--- pkgtools/pbulk/files/pbulk/pscan/pscan.h    25 Jun 2007 21:38:46 -0000      
1.2
+++ pkgtools/pbulk/files/pbulk/pscan/pscan.h    16 Nov 2012 00:25:19 -0000
@@ -48,6 +48,7 @@ struct scan_job {
 };
 
 extern int      verbosity;
+extern const char *pkgsrc_tree;
 
 char           *scan_pkglocation(const char *);
 
@@ -60,3 +61,4 @@ struct scan_job       *get_job(void);
 void            process_job(struct scan_job *, enum job_state);
 void            write_jobs(const char *);
 
+void           read_old_scan(const char *);
Index: pkgtools/pbulk/files/pbulk/scripts/pkg-build
===================================================================
RCS file: 
/home/joerg/repo/netbsd/pkgsrc/pkgtools/pbulk/files/pbulk/scripts/pkg-build,v
retrieving revision 1.24
diff -u -p -r1.24 pkg-build
--- pkgtools/pbulk/files/pbulk/scripts/pkg-build        19 Jun 2012 13:40:07 
-0000      1.24
+++ pkgtools/pbulk/files/pbulk/scripts/pkg-build        16 Nov 2012 00:25:19 
-0000
@@ -193,7 +193,7 @@ run_make run_direct package  > ${bulklog
 if [ "${use_destdir}" != "no" ] && \
    [ -z "${is_bootstrap}" ]; then
        if ! ${pkg_add_cmd} ${pkgname} \
-           > ${bulklog}/${pkgname}/package.log 2>&1; then
+           >> ${bulklog}/${pkgname}/package.log 2>&1; then
                run_make run_direct package-clean
                cleanup
        fi
Index: pkgtools/pbulk/files/pbulk/scripts/pre-build
===================================================================
RCS file: 
/home/joerg/repo/netbsd/pkgsrc/pkgtools/pbulk/files/pbulk/scripts/pre-build,v
retrieving revision 1.10
diff -u -p -r1.10 pre-build
--- pkgtools/pbulk/files/pbulk/scripts/pre-build        16 Dec 2009 19:41:41 
-0000      1.10
+++ pkgtools/pbulk/files/pbulk/scripts/pre-build        16 Nov 2012 00:25:19 
-0000
@@ -1,4 +1,4 @@
-#!@SH@
+#!/bin/sh
 # $NetBSD: pre-build,v 1.10 2009/12/16 19:41:41 joerg Exp $
 #
 # Copyright (c) 2007 Joerg Sonnenberger <joerg%NetBSD.org@localhost>.
@@ -36,7 +36,7 @@ set -e
 
 if [ "${config_version}" != "@PBULK_CONFIG_VERSION@" ]; then
        echo "Your configuration has version ${config_version}."
-       echo "This version of pbulk expects version @PBULK_CONFIG_VERSION@."
+       echo "This version of pbulk expects version 0.34."
        exit 1
 fi
 
@@ -59,16 +59,20 @@ if [ "$cross_compile" != "no" ]; then
        fi
 fi
 
-if [ -d "${bulklog}/meta" ]; then
-       echo "Warning: All log files of the previous pbulk run will be"
-       echo "removed in 5 seconds. If you want to abort, press Ctrl-C."
-       sleep 5
+if [ -f "${bulklog}/meta/pscan" ]; then
+       echo "Reusing old scan results"
+       rm -rf "${bulklog}.old"
+       mv "${bulklog}" "${bulklog}.old"
+elif [ -d "${bulklog}/meta" ]; then
+       echo "Removing old scan results"
+       rm -rf "${bulklog}"/* || true
+else
+       rm -rf "${bulklog}"/* || true
 fi
 
-rm -rf "${bulklog}"/* || true
 mkdir -p "${bulklog}" "${loc}"
 
-@PREFIX@/libexec/pbulk/client-clean
+/bulk-data/pbulk/libexec/pbulk/client-clean
 
 # Log common settings...
 opsys=`cd ${pkgsrc}/pkgtools/pkg_install && ${make} show-var VARNAME=OPSYS`
Index: pkgtools/pbulk/files/pbulk/scripts/scan
===================================================================
RCS file: 
/home/joerg/repo/netbsd/pkgsrc/pkgtools/pbulk/files/pbulk/scripts/scan,v
retrieving revision 1.7
diff -u -p -r1.7 scan
--- pkgtools/pbulk/files/pbulk/scripts/scan     15 Jun 2010 21:11:43 -0000      
1.7
+++ pkgtools/pbulk/files/pbulk/scripts/scan     16 Nov 2012 00:25:19 -0000
@@ -1,4 +1,4 @@
-#!@SH@
+#!/bin/sh
 # $NetBSD: scan,v 1.7 2010/06/15 21:11:43 joerg Exp $
 #
 # Copyright (c) 2007 Joerg Sonnenberger <joerg%NetBSD.org@localhost>.
@@ -36,18 +36,23 @@ set -e
 
 if [ "${config_version}" != "@PBULK_CONFIG_VERSION@" ]; then
        echo "Your configuration has version ${config_version}."
-       echo "This version of pbulk expects version @PBULK_CONFIG_VERSION@."
+       echo "This version of pbulk expects version 0.34."
        exit 1
 fi
 
+if [ -f "${bulklog}.old/meta/pscan" ]; then
+       echo "Using old scan results from ${bulklog}.old/meta/pscan"
+       extra_pscan_args="-L ${bulklog}.old/meta/pscan"
+fi
+
 if [ -z "${limited_list}" ]; then
        echo "Scanning..."
        case "${master_mode}" in
        [nN][oO])
-               ${pscan} -v -M ${make} ${pkgsrc} ${loc}/pscan 2>> 
${loc}/pscan.stderr
+               ${pscan} -v -M ${make} ${extra_pscan_args} ${pkgsrc} 
${loc}/pscan 2>> ${loc}/pscan.stderr
                ;;
        [yY][eE][sS])
-               ${pscan} -v -I ${pscan_start_script} -m ${master_port_scan} -M 
${make} ${pkgsrc} ${loc}/pscan 2>> ${loc}/pscan.stderr
+               ${pscan} -v -I ${pscan_start_script} -m ${master_port_scan} -M 
${make} ${extra_pscan_args} ${pkgsrc} ${loc}/pscan 2>> ${loc}/pscan.stderr
                ;;
        *)
                echo "master_mode must be either yes or no."
Index: pkgtools/pbulk/files/pbulk/scripts/scan-client-start
===================================================================
RCS file: 
/home/joerg/repo/netbsd/pkgsrc/pkgtools/pbulk/files/pbulk/scripts/scan-client-start,v
retrieving revision 1.2
diff -u -p -r1.2 scan-client-start
--- pkgtools/pbulk/files/pbulk/scripts/scan-client-start        16 Sep 2008 
18:21:30 -0000      1.2
+++ pkgtools/pbulk/files/pbulk/scripts/scan-client-start        16 Nov 2012 
00:25:19 -0000
@@ -11,6 +11,10 @@ if [ "${config_version}" != "@PBULK_CONF
        exit 1
 fi
 
+if [ -f "${bulklog}.old/meta/pscan" ]; then
+       extra_pscan_args="-L ${bulklog}.old/meta/pscan"
+fi
+
 for client in ${scan_clients}; do
-       ssh $client "${pscan_prepare} && ${pscan} -c ${master_port_scan} -M 
${make} ${pkgsrc}" &
+       ssh $client "${pscan_prepare} && ${pscan} -c ${master_port_scan} -M 
${make} ${extra_pscan_args} ${pkgsrc}" &
 done


Home | Main Index | Thread Index | Old Index