tech-pkg archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

HTTP index caching for pkg_add



Hi all,
attached are patches for net/libfetch and pkgtools/pkg_install to
make pkg_add reuse the index.html it received from the server without
fetching it again when it has more patterns to match. This is somewhat
crude as servers don't implement the if-modified-since for dynamic
index pages. Please report any issues with this changes.

Joerg
Index: common.c
===================================================================
RCS file: /home/joerg/repo/netbsd/pkgsrc/net/libfetch/files/common.c,v
retrieving revision 1.20
diff -u -p -r1.20 common.c
--- common.c    16 Aug 2009 20:31:29 -0000      1.20
+++ common.c    6 Oct 2009 16:55:19 -0000
@@ -753,6 +753,40 @@ fetchInitURLList(struct url_list *ue)
        ue->urls = NULL;
 }
 
+int
+fetchAppendURLList(struct url_list *dst, const struct url_list *src)
+{
+       size_t i, j, len;
+
+       len = dst->length + src->length;
+       if (len > dst->alloc_size) {
+               struct url *tmp;
+
+               tmp = realloc(dst->urls, len * sizeof(*tmp));
+               if (tmp == NULL) {
+                       errno = ENOMEM;
+                       fetch_syserr();
+                       return (-1);
+               }
+               dst->alloc_size = len;
+               dst->urls = tmp;
+       }
+
+       for (i = 0, j = dst->length; i < src->length; ++i, ++j) {
+               dst->urls[j] = src->urls[i];
+               dst->urls[j].doc = strdup(src->urls[i].doc);
+               if (dst->urls[j].doc == NULL) {
+                       while (i-- > 0)
+                               free(dst->urls[j].doc);
+                       fetch_syserr();
+                       return -1;
+               }
+       }
+       dst->length = len;
+
+       return 0;
+}
+
 void
 fetchFreeURLList(struct url_list *ue)
 {
Index: fetch.3
===================================================================
RCS file: /home/joerg/repo/netbsd/pkgsrc/net/libfetch/files/fetch.3,v
retrieving revision 1.12
diff -u -p -r1.12 fetch.3
--- fetch.3     16 Mar 2009 18:11:39 -0000      1.12
+++ fetch.3     6 Oct 2009 16:10:49 -0000
@@ -131,6 +131,8 @@
 .Fn fetchListFTP "struct url_list *list" "struct url *u" "const char *flags"
 .Ft void
 .Fn fetchInitURLList "struct url_list *ul"
+.Ft int
+.Fn fetchAppendURLList "struct url_list *dst" "const struct url_list *src"
 .Ft void
 .Fn fetchFreeURLList "struct url_list *ul"
 .Ft char *
@@ -281,6 +283,13 @@ The list should be initialized by callin
 .Fn fetchInitURLList
 and the entries be freed by calling
 .Fn fetchFreeURLList .
+The functio
+.Fn fetchAppendURLList
+can be used to append one URL lists to another.
+If the
+.Ql c
+(cache result) flag is specified, the library is allowed to internally
+cache the result.
 .Pp
 .Fn fetchStringifyURL
 returns the URL as string.
Index: fetch.cat3
===================================================================
RCS file: /home/joerg/repo/netbsd/pkgsrc/net/libfetch/files/fetch.cat3,v
retrieving revision 1.12
diff -u -p -r1.12 fetch.cat3
--- fetch.cat3  22 Jun 2009 12:05:59 -0000      1.12
+++ fetch.cat3  6 Oct 2009 17:11:53 -0000
@@ -108,6 +108,9 @@ SSYYNNOOPPSSIISS
      _v_o_i_d
      ffeettcchhIInniittUURRLLLLiisstt(_s_t_r_u_c_t 
_u_r_l___l_i_s_t _*_u_l);
 
+     _i_n_t
+     ffeettcchhAAppppeennddUURRLLLLiisstt(_s_t_r_u_c_t 
_u_r_l___l_i_s_t _*_d_s_t, _c_o_n_s_t _s_t_r_u_c_t 
_u_r_l___l_i_s_t _*_s_r_c);
+
      _v_o_i_d
      ffeettcchhFFrreeeeUURRLLLLiisstt(_s_t_r_u_c_t 
_u_r_l___l_i_s_t _*_u_l);
 
@@ -204,7 +207,10 @@ DDEESSCCRRIIPPTTIIOONN
      };
 
      The list should be initialized by calling 
ffeettcchhIInniittUURRLLLLiisstt() and the
-     entries be freed by calling 
ffeettcchhFFrreeeeUURRLLLLiisstt().
+     entries be freed by calling 
ffeettcchhFFrreeeeUURRLLLLiisstt().  The functio
+     ffeettcchhAAppppeennddUURRLLLLiisstt() can be used to 
append one URL lists to another.  If
+     the `c' (cache result) flag is specified, the library is allowed to
+     internally cache the result.
 
      ffeettcchhSSttrriinnggiiffyyUURRLL() returns the URL as 
string.  ffeettcchhUUnnqquuootteePPaatthh()
      returns the path name part of the URL with any quoting undone.  Query
Index: fetch.h
===================================================================
RCS file: /home/joerg/repo/netbsd/pkgsrc/net/libfetch/files/fetch.h,v
retrieving revision 1.14
diff -u -p -r1.14 fetch.h
--- fetch.h     5 Feb 2009 22:45:25 -0000       1.14
+++ fetch.h     6 Oct 2009 16:11:00 -0000
@@ -153,6 +153,7 @@ void                 fetchFreeURL(struct url *);
 
 /* URL listening */
 void            fetchInitURLList(struct url_list *);
+int             fetchAppendURLList(struct url_list *, const struct url_list *);
 void            fetchFreeURLList(struct url_list *);
 char           *fetchUnquotePath(struct url *);
 char           *fetchUnquoteFilename(struct url *);
Index: file.c
===================================================================
RCS file: /home/joerg/repo/netbsd/pkgsrc/net/libfetch/files/file.c,v
retrieving revision 1.14
diff -u -p -r1.14 file.c
--- file.c      10 Mar 2009 00:33:38 -0000      1.14
+++ file.c      6 Oct 2009 17:06:22 -0000
@@ -234,6 +234,7 @@ fetchListFile(struct url_list *ue, struc
        char *path;
        struct dirent *de;
        DIR *dir;
+       int ret;
 
        if ((path = fetchUnquotePath(u)) == NULL) {
                fetch_syserr();
@@ -248,13 +249,17 @@ fetchListFile(struct url_list *ue, struc
                return -1;
        }
 
+       ret = 0;
+
        while ((de = readdir(dir)) != NULL) {
                if (pattern && fnmatch(pattern, de->d_name, 0) != 0)
                        continue;
-               fetch_add_entry(ue, u, de->d_name, 0);
+               ret = fetch_add_entry(ue, u, de->d_name, 0);
+               if (ret)
+                       break;
        }
 
        closedir(dir);
 
-       return 0;
+       return ret;
 }
Index: ftp.c
===================================================================
RCS file: /home/joerg/repo/netbsd/pkgsrc/net/libfetch/files/ftp.c,v
retrieving revision 1.29
diff -u -p -r1.29 ftp.c
--- ftp.c       16 Aug 2009 20:31:29 -0000      1.29
+++ ftp.c       6 Oct 2009 17:07:01 -0000
@@ -1248,6 +1248,7 @@ fetchListFTP(struct url_list *ue, struct
        char buf[2 * PATH_MAX], *eol, *eos;
        ssize_t len;
        size_t cur_off;
+       int ret;
 
        /* XXX What about proxies? */
        if (pattern == NULL || strcmp(pattern, "*") == 0)
@@ -1257,6 +1258,8 @@ fetchListFTP(struct url_list *ue, struct
                return -1;
 
        cur_off = 0;
+       ret = 0;
+
        while ((len = fetchIO_read(f, buf + cur_off, sizeof(buf) - cur_off)) > 
0) {
                cur_off += len;
                while ((eol = memchr(buf, '\n', cur_off)) != NULL) {
@@ -1268,11 +1271,15 @@ fetchListFTP(struct url_list *ue, struct
                                else
                                        eos = eol;
                                *eos = '\0';
-                               fetch_add_entry(ue, url, buf, 0);
+                               ret = fetch_add_entry(ue, url, buf, 0);
+                               if (ret)
+                                       break;
                                cur_off -= eol - buf + 1;
                                memmove(buf, eol + 1, cur_off);
                        }
                }
+               if (ret)
+                       break;
        }
        if (cur_off != 0 || len < 0) {
                /* Not RFC conform, bail out. */
@@ -1280,5 +1287,5 @@ fetchListFTP(struct url_list *ue, struct
                return -1;
        }
        fetchIO_close(f);
-       return 0;
+       return ret;
 }
Index: http.c
===================================================================
RCS file: /home/joerg/repo/netbsd/pkgsrc/net/libfetch/files/http.c,v
retrieving revision 1.24
diff -u -p -r1.24 http.c
--- http.c      5 Mar 2009 19:07:03 -0000       1.24
+++ http.c      6 Oct 2009 17:08:29 -0000
@@ -1223,7 +1223,7 @@ struct index_parser {
        enum http_states state;
 };
 
-static size_t
+static ssize_t
 parse_index(struct index_parser *parser, const char *buf, size_t len)
 {
        char *end_attr, p = *buf;
@@ -1352,12 +1352,21 @@ parse_index(struct index_parser *parser,
                        return 0;
                *end_attr = '\0';
                parser->state = ST_TAGA;
-               fetch_add_entry(parser->ue, parser->url, buf, 1);
+               if (fetch_add_entry(parser->ue, parser->url, buf, 1))
+                       return -1;
                return end_attr + 1 - buf;
        }
        abort();
 }
 
+struct http_index_cache {
+       struct http_index_cache *next;
+       struct url *location;
+       struct url_list ue;
+};
+
+static struct http_index_cache *index_cache;
+
 /*
  * List a directory
  */
@@ -1366,17 +1375,53 @@ fetchListHTTP(struct url_list *ue, struc
 {
        fetchIO *f;
        char buf[2 * PATH_MAX];
-       size_t buf_len, processed, sum_processed;
-       ssize_t read_len;
+       size_t buf_len, sum_processed;
+       ssize_t read_len, processed;
        struct index_parser state;
+       struct http_index_cache *cache = NULL;
+       int do_cache, ret;
 
-       state.url = url;
-       state.state = ST_NONE;
-       state.ue = ue;
+       do_cache = CHECK_FLAG('c');
+
+       if (do_cache) {
+               for (cache = index_cache; cache != NULL; cache = cache->next) {
+                       if (strcmp(cache->location->scheme, url->scheme))
+                               continue;
+                       if (strcmp(cache->location->user, url->user))
+                               continue;
+                       if (strcmp(cache->location->pwd, url->pwd))
+                               continue;
+                       if (strcmp(cache->location->host, url->host))
+                               continue;
+                       if (cache->location->port != url->port)
+                               continue;
+                       if (strcmp(cache->location->doc, url->doc))
+                               continue;
+                       return fetchAppendURLList(ue, &cache->ue);
+               }
+
+               cache = malloc(sizeof(*cache));
+               fetchInitURLList(&cache->ue);
+               cache->location = fetchCopyURL(url);
+       }
 
        f = fetchGetHTTP(url, flags);
-       if (f == NULL)
+       if (f == NULL) {
+               if (do_cache) {
+                       fetchFreeURLList(&cache->ue);
+                       fetchFreeURL(cache->location);
+                       free(cache);
+               }
                return -1;
+       }
+
+       state.url = url;
+       state.state = ST_NONE;
+       if (do_cache) {
+               state.ue = &cache->ue;
+       } else {
+               state.ue = ue;
+       }
 
        buf_len = 0;
 
@@ -1385,12 +1430,31 @@ fetchListHTTP(struct url_list *ue, struc
                sum_processed = 0;
                do {
                        processed = parse_index(&state, buf + sum_processed, 
buf_len);
+                       if (processed == -1)
+                               break;
                        buf_len -= processed;
                        sum_processed += processed;
                } while (processed != 0 && buf_len > 0);
+               if (processed == -1) {
+                       read_len = -1;
+                       break;
+               }
                memmove(buf, buf + sum_processed, buf_len);
        }
 
        fetchIO_close(f);
-       return read_len < 0 ? -1 : 0;
+
+       ret = read_len < 0 ? -1 : 0;
+
+       if (do_cache) {
+               if (ret == 0) {
+                       cache->next = index_cache;
+                       index_cache = cache;
+               }
+
+               if (fetchAppendURLList(ue, &cache->ue))
+                       ret = -1;
+       }
+
+       return ret;
 }
Index: lib/parse-config.c
===================================================================
RCS file: 
/home/joerg/repo/netbsd/pkgsrc/pkgtools/pkg_install/files/lib/parse-config.c,v
retrieving revision 1.10
diff -u -p -r1.10 parse-config.c
--- lib/parse-config.c  7 Oct 2009 12:53:27 -0000       1.10
+++ lib/parse-config.c  7 Oct 2009 12:59:42 -0000
@@ -54,6 +54,7 @@ char fetch_flags[10] = ""; /* Workaround
 static const char *active_ftp;
 static const char *verbose_netio;
 static const char *ignore_proxy;
+const char *cache_index = "yes";
 const char *cert_chain_file;
 const char *certs_packages;
 const char *certs_pkg_vulnerabilities;
@@ -79,6 +80,7 @@ static struct config_variable {
 } config_variables[] = {
        { "ACCEPTABLE_LICENSES", &acceptable_licenses },
        { "ACTIVE_FTP", &active_ftp },
+       { "CACHE_INDEX", &cache_index },
        { "CERTIFICATE_ANCHOR_PKGS", &certs_packages },
        { "CERTIFICATE_ANCHOR_PKGVULN", &certs_pkg_vulnerabilities },
        { "CERTIFICATE_CHAIN", &cert_chain_file },
@@ -152,6 +154,7 @@ parse_pkg_install_conf(void)
 void
 pkg_install_config(void)
 {
+       int do_cache_index;
        char *value;
        parse_pkg_install_conf();
 
@@ -175,7 +178,17 @@ pkg_install_config(void)
        if ((value = getenv("PKG_PATH")) != NULL)
                config_pkg_path = value;
 
-       snprintf(fetch_flags, sizeof(fetch_flags), "%s%s%s",
+       if (strcasecmp(cache_index, "yes") == 0)
+               do_cache_index = 1;
+       else {
+               if (strcasecmp(cache_index, "no"))
+                       warnx("Invalid value for configuration option "
+                           "CACHE_INDEX");
+               do_cache_index = 0;
+       }
+
+       snprintf(fetch_flags, sizeof(fetch_flags), "%s%s%s%s",
+           (do_cache_index) ? "c" : "",
            (verbose_netio && *verbose_netio) ? "v" : "",
            (active_ftp && *active_ftp) ? "a" : "",
            (ignore_proxy && *ignore_proxy) ? "d" : "");


Home | Main Index | Thread Index | Old Index