Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/usr.bin/gzip Add -l support for xz files



details:   https://anonhg.NetBSD.org/src/rev/3844450f44fb
branches:  trunk
changeset: 993876:3844450f44fb
user:      martin <martin%NetBSD.org@localhost>
date:      Sat Oct 06 16:36:45 2018 +0000

description:
Add -l support for xz files

diffstat:

 usr.bin/gzip/gzip.c |   18 ++-
 usr.bin/gzip/unxz.c |  323 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 334 insertions(+), 7 deletions(-)

diffs (truncated from 402 to 300 lines):

diff -r 6f88cd7ddd3e -r 3844450f44fb usr.bin/gzip/gzip.c
--- a/usr.bin/gzip/gzip.c       Sat Oct 06 16:28:21 2018 +0000
+++ b/usr.bin/gzip/gzip.c       Sat Oct 06 16:36:45 2018 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: gzip.c,v 1.113 2018/06/12 00:42:17 kamil Exp $ */
+/*     $NetBSD: gzip.c,v 1.114 2018/10/06 16:36:45 martin Exp $        */
 
 /*
  * Copyright (c) 1997, 1998, 2003, 2004, 2006, 2008, 2009, 2010, 2011, 2015, 2017
@@ -31,7 +31,7 @@
 #ifndef lint
 __COPYRIGHT("@(#) Copyright (c) 1997, 1998, 2003, 2004, 2006, 2008,\
  2009, 2010, 2011, 2015, 2017 Matthew R. Green.  All rights reserved.");
-__RCSID("$NetBSD: gzip.c,v 1.113 2018/06/12 00:42:17 kamil Exp $");
+__RCSID("$NetBSD: gzip.c,v 1.114 2018/10/06 16:36:45 martin Exp $");
 #endif /* not lint */
 
 /*
@@ -213,6 +213,7 @@
 static const suffixes_t *check_suffix(char *, int);
 static ssize_t read_retry(int, void *, size_t);
 static ssize_t write_retry(int, const void *, size_t);
+static void    print_list_out(off_t, off_t, const char*);
 
 #ifdef SMALL
 #define infile_set(f,t) infile_set(f)
@@ -256,6 +257,7 @@
 
 #ifndef NO_XZ_SUPPORT
 static off_t   unxz(int, int, char *, size_t, off_t *);
+static off_t   unxz_len(int);
 #endif
 
 #ifdef SMALL
@@ -1579,10 +1581,10 @@
 #ifndef NO_XZ_SUPPORT
        case FT_XZ:
                if (lflag) {
-                       maybe_warnx("no -l with xz files");
-                       goto lose;
+                       size = unxz_len(fd);
+                       print_list_out(in_size, size, file);
+                       return -1;
                }
-
                size = unxz(fd, zfd, NULL, 0, NULL);
                break;
 #endif
@@ -2147,6 +2149,12 @@
        in_tot += in;
        out_tot += out;
 #endif
+       print_list_out(out, in, outfile);
+}
+
+static void
+print_list_out(off_t out, off_t in, const char *outfile)
+{
        printf("%12llu %12llu ", (unsigned long long)out, (unsigned long long)in);
        print_ratio(in, out, stdout);
        printf(" %s\n", outfile);
diff -r 6f88cd7ddd3e -r 3844450f44fb usr.bin/gzip/unxz.c
--- a/usr.bin/gzip/unxz.c       Sat Oct 06 16:28:21 2018 +0000
+++ b/usr.bin/gzip/unxz.c       Sat Oct 06 16:36:45 2018 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: unxz.c,v 1.7 2017/08/04 07:27:08 mrg Exp $     */
+/*     $NetBSD: unxz.c,v 1.8 2018/10/06 16:36:45 martin Exp $  */
 
 /*-
  * Copyright (c) 2011 The NetBSD Foundation, Inc.
@@ -29,7 +29,7 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 #include <sys/cdefs.h>
-__RCSID("$NetBSD: unxz.c,v 1.7 2017/08/04 07:27:08 mrg Exp $");
+__RCSID("$NetBSD: unxz.c,v 1.8 2018/10/06 16:36:45 martin Exp $");
 
 #include <stdarg.h>
 #include <errno.h>
@@ -154,3 +154,322 @@
                }
        }
 }
+
+#include <stdbool.h>
+
+/*
+ * Copied various bits and pieces from xz support code or brute force
+ * replacements.
+ */
+
+#define        my_min(A,B)     ((A)<(B)?(A):(B))
+
+// Some systems have suboptimal BUFSIZ. Use a bit bigger value on them.
+// We also need that IO_BUFFER_SIZE is a multiple of 8 (sizeof(uint64_t))
+#if BUFSIZ <= 1024
+#       define IO_BUFFER_SIZE 8192
+#else
+#       define IO_BUFFER_SIZE (BUFSIZ & ~7U)
+#endif
+
+/// is_sparse() accesses the buffer as uint64_t for maximum speed.
+/// Use an union to make sure that the buffer is properly aligned.
+typedef union {
+        uint8_t u8[IO_BUFFER_SIZE];
+        uint32_t u32[IO_BUFFER_SIZE / sizeof(uint32_t)];
+        uint64_t u64[IO_BUFFER_SIZE / sizeof(uint64_t)];
+} io_buf;
+
+
+static bool
+io_pread(int fd, io_buf *buf, size_t size, off_t pos)
+{
+       // Using lseek() and read() is more portable than pread() and
+       // for us it is as good as real pread().
+       if (lseek(fd, pos, SEEK_SET) != pos) {
+               return true;
+       }
+
+       const size_t amount = read(fd, buf, size);
+       if (amount == SIZE_MAX)
+               return true;
+
+       if (amount != size) {
+               return true;
+       }
+
+       return false;
+}
+
+/*
+ * Most of the following is copied (mostly verbatim) from the xz
+ * distribution, from file src/xz/list.c
+ */
+
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file       list.c
+/// \brief      Listing information about .xz files
+//
+//  Author:     Lasse Collin
+//
+//  This file has been put into the public domain.
+//  You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+
+/// Information about a .xz file
+typedef struct {
+       /// Combined Index of all Streams in the file
+       lzma_index *idx;
+
+       /// Total amount of Stream Padding
+       uint64_t stream_padding;
+
+       /// Highest memory usage so far
+       uint64_t memusage_max;
+
+       /// True if all Blocks so far have Compressed Size and
+       /// Uncompressed Size fields
+       bool all_have_sizes;
+
+       /// Oldest XZ Utils version that will decompress the file
+       uint32_t min_version;
+
+} xz_file_info;
+
+#define XZ_FILE_INFO_INIT { NULL, 0, 0, true, 50000002 }
+
+
+/// \brief      Parse the Index(es) from the given .xz file
+///
+/// \param      xfi     Pointer to structure where the decoded information
+///                     is stored.
+/// \param      pair    Input file
+///
+/// \return     On success, false is returned. On error, true is returned.
+///
+// TODO: This function is pretty big. liblzma should have a function that
+// takes a callback function to parse the Index(es) from a .xz file to make
+// it easy for applications.
+static bool
+parse_indexes(xz_file_info *xfi, int src_fd)
+{
+       struct stat st;
+
+       fstat(src_fd, &st);
+       if (st.st_size <= 0) {
+               return true;
+       }
+
+       if (st.st_size < 2 * LZMA_STREAM_HEADER_SIZE) {
+               return true;
+       }
+
+       io_buf buf;
+       lzma_stream_flags header_flags;
+       lzma_stream_flags footer_flags;
+       lzma_ret ret;
+
+       // lzma_stream for the Index decoder
+       lzma_stream strm = LZMA_STREAM_INIT;
+
+       // All Indexes decoded so far
+       lzma_index *combined_index = NULL;
+
+       // The Index currently being decoded
+       lzma_index *this_index = NULL;
+
+       // Current position in the file. We parse the file backwards so
+       // initialize it to point to the end of the file.
+       off_t pos = st.st_size;
+
+       // Each loop iteration decodes one Index.
+       do {
+               // Check that there is enough data left to contain at least
+               // the Stream Header and Stream Footer. This check cannot
+               // fail in the first pass of this loop.
+               if (pos < 2 * LZMA_STREAM_HEADER_SIZE) {
+                       goto error;
+               }
+
+               pos -= LZMA_STREAM_HEADER_SIZE;
+               lzma_vli stream_padding = 0;
+
+               // Locate the Stream Footer. There may be Stream Padding which
+               // we must skip when reading backwards.
+               while (true) {
+                       if (pos < LZMA_STREAM_HEADER_SIZE) {
+                               goto error;
+                       }
+
+                       if (io_pread(src_fd, &buf,
+                                       LZMA_STREAM_HEADER_SIZE, pos))
+                               goto error;
+
+                       // Stream Padding is always a multiple of four bytes.
+                       int i = 2;
+                       if (buf.u32[i] != 0)
+                               break;
+
+                       // To avoid calling io_pread() for every four bytes
+                       // of Stream Padding, take advantage that we read
+                       // 12 bytes (LZMA_STREAM_HEADER_SIZE) already and
+                       // check them too before calling io_pread() again.
+                       do {
+                               stream_padding += 4;
+                               pos -= 4;
+                               --i;
+                       } while (i >= 0 && buf.u32[i] == 0);
+               }
+
+               // Decode the Stream Footer.
+               ret = lzma_stream_footer_decode(&footer_flags, buf.u8);
+               if (ret != LZMA_OK) {
+                       goto error;
+               }
+
+               // Check that the Stream Footer doesn't specify something
+               // that we don't support. This can only happen if the xz
+               // version is older than liblzma and liblzma supports
+               // something new.
+               //
+               // It is enough to check Stream Footer. Stream Header must
+               // match when it is compared against Stream Footer with
+               // lzma_stream_flags_compare().
+               if (footer_flags.version != 0) {
+                       goto error;
+               }
+
+               // Check that the size of the Index field looks sane.
+               lzma_vli index_size = footer_flags.backward_size;
+               if ((lzma_vli)(pos) < index_size + LZMA_STREAM_HEADER_SIZE) {
+                       goto error;
+               }
+
+               // Set pos to the beginning of the Index.
+               pos -= index_size;
+
+               // Decode the Index.
+               ret = lzma_index_decoder(&strm, &this_index, UINT64_MAX);
+               if (ret != LZMA_OK) {
+                       goto error;
+               }
+
+               do {
+                       // Don't give the decoder more input than the
+                       // Index size.
+                       strm.avail_in = my_min(IO_BUFFER_SIZE, index_size);
+                       if (io_pread(src_fd, &buf, strm.avail_in, pos))
+                               goto error;
+
+                       pos += strm.avail_in;
+                       index_size -= strm.avail_in;
+
+                       strm.next_in = buf.u8;
+                       ret = lzma_code(&strm, LZMA_RUN);
+
+               } while (ret == LZMA_OK);



Home | Main Index | Thread Index | Old Index