tech-userlevel archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Re: Encoding non-alphanumeric characters in manpage filenames



Below is a proof of concept that encodes some of our actual filenames.

The salient lines of code are:

    static const char hex[] = "0123456789ABCDEF";
    if (isalnum(ch) || (ch == '_') || (ch == '-')) {
    *out++ = '%';

These all use C's idea of the character set.

Here's the full program:

#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

char *escape(const char *prefix, const char *page, const char *suffix)
{
    static const char hex[] = "0123456789ABCDEF";
    const char *in;
    char *out;
    char *result;
    size_t size;
    int ch;

    size = strlen(prefix) + strlen(suffix) + 1;
    size += 3 * strlen(page);  /* Worst case: every char hex-escaped. */
    if ((result = calloc(1, size)) == NULL) {
        return NULL;
    }
    out = result;
    for (in = prefix; (ch = *in); in++) {
        *out++ = ch;
    }
    for (in = page; (ch = *in); in++) {
        if (isalnum(ch) || (ch == '_') || (ch == '-')) {
            *out++ = ch;
        } else {
            *out++ = '%';
            *out++ = hex[ch / 16];
            *out++ = hex[ch % 16];
        }
    }
    for (in = suffix; (ch = *in); in++) {
        *out++ = ch;
    }
    *out = 0;
    return result;
}

static void demo(const char *page)
{
    const char suffix[] = ".3scm.gz";
    printf("%s%s -> %s\n", page, suffix, escape("", page, suffix));
}

int main(void)
{
    demo("&assertion");
    demo("call/cc");
    demo("hash-table?");
    demo("string->number");
    return EXIT_SUCCESS;
}

Here's how you could look for the files (not tested):

int try_open(char *path)
{
    int fd, save;

    if (path == NULL) {
        out_of_memory();
    }
    if ((fd = open(path, O_RDONLY)) == -1) {
        save = errno;
        free(path);
        errno = save;
        return -1;
    }
    free(path);
    return fd;
}

int find(const char *mandir, const char *page, const char *suffix)
{
    int fd;

    if ((fd = try_open(concat_3_strings(mandir, page, suffix))) != -1) {
        return fd;
    }
    if ((fd = try_open(escape(mandir, page, suffix))) != -1) {
        return fd;
    }
    return -1;
}


Home | Main Index | Thread Index | Old Index