Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/usr.bin/indent indent: replace direct access to the input bu...



details:   https://anonhg.NetBSD.org/src/rev/7ad7941bd719
branches:  trunk
changeset: 1026329:7ad7941bd719
user:      rillig <rillig%NetBSD.org@localhost>
date:      Fri Nov 19 17:11:46 2021 +0000

description:
indent: replace direct access to the input buffer

This is a preparation for abstracting away all the low-level details of
handling the input.  The goal is to fix the current bugs regarding line
number counting, out of bounds memory access, and generally unreadable
code.

No functional change.

diffstat:

 usr.bin/indent/indent.c     |  22 +++++++-------
 usr.bin/indent/indent.h     |   4 +-
 usr.bin/indent/lexi.c       |  68 ++++++++++++++++++++++----------------------
 usr.bin/indent/pr_comment.c |  28 +++++++++---------
 4 files changed, 62 insertions(+), 60 deletions(-)

diffs (truncated from 411 to 300 lines):

diff -r b1df7889206e -r 7ad7941bd719 usr.bin/indent/indent.c
--- a/usr.bin/indent/indent.c   Fri Nov 19 15:34:25 2021 +0000
+++ b/usr.bin/indent/indent.c   Fri Nov 19 17:11:46 2021 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: indent.c,v 1.221 2021/11/19 15:34:25 rillig Exp $      */
+/*     $NetBSD: indent.c,v 1.222 2021/11/19 17:11:46 rillig Exp $      */
 
 /*-
  * SPDX-License-Identifier: BSD-4-Clause
@@ -43,7 +43,7 @@
 
 #include <sys/cdefs.h>
 #if defined(__NetBSD__)
-__RCSID("$NetBSD: indent.c,v 1.221 2021/11/19 15:34:25 rillig Exp $");
+__RCSID("$NetBSD: indent.c,v 1.222 2021/11/19 17:11:46 rillig Exp $");
 #elif defined(__FreeBSD__)
 __FBSDID("$FreeBSD: head/usr.bin/indent/indent.c 340138 2018-11-04 19:24:49Z oshogbo $");
 #endif
@@ -344,9 +344,9 @@
         * will be moved into "the else's line", so if there was a newline
         * resulting from the "{" before, it must be scanned now and ignored.
         */
-       while (isspace((unsigned char)*inbuf.inp.s)) {
+       while (isspace((unsigned char)inp_peek())) {
            inp_skip();
-           if (*inbuf.inp.s == '\n')
+           if (inp_peek() == '\n')
                break;
        }
        debug_inbuf(__func__);
@@ -443,7 +443,7 @@
      * into the buffer so that the later lexi() call will read them.
      */
     if (inbuf.save_com_e != NULL) {
-       while (ch_isblank(*inbuf.inp.s))
+       while (ch_isblank(inp_peek()))
            save_com_add_char(inp_next());
        debug_inbuf(__func__);
     }
@@ -1246,10 +1246,10 @@
     state = PLAIN;
     int com_start = 0, com_end = 0;
 
-    while (ch_isblank(*inbuf.inp.s))
+    while (ch_isblank(inp_peek()))
        inp_skip();
 
-    while (*inbuf.inp.s != '\n' || (state == COMM && !had_eof)) {
+    while (inp_peek() != '\n' || (state == COMM && !had_eof)) {
        buf_reserve(&lab, 2);
        *lab.e++ = inp_next();
        switch (lab.e[-1]) {
@@ -1258,9 +1258,9 @@
                *lab.e++ = inp_next();
            break;
        case '/':
-           if (*inbuf.inp.s == '*' && state == PLAIN) {
+           if (inp_peek() == '*' && state == PLAIN) {
                state = COMM;
-               *lab.e++ = *inbuf.inp.s++;
+               *lab.e++ = inp_next();
                com_start = (int)buf_len(&lab) - 2;
            }
            break;
@@ -1277,9 +1277,9 @@
                state = CHR;
            break;
        case '*':
-           if (*inbuf.inp.s == '/' && state == COMM) {
+           if (inp_peek() == '/' && state == COMM) {
                state = PLAIN;
-               *lab.e++ = *inbuf.inp.s++;
+               *lab.e++ = inp_next();
                com_end = (int)buf_len(&lab);
            }
            break;
diff -r b1df7889206e -r 7ad7941bd719 usr.bin/indent/indent.h
--- a/usr.bin/indent/indent.h   Fri Nov 19 15:34:25 2021 +0000
+++ b/usr.bin/indent/indent.h   Fri Nov 19 17:11:46 2021 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: indent.h,v 1.88 2021/11/19 15:32:13 rillig Exp $       */
+/*     $NetBSD: indent.h,v 1.89 2021/11/19 17:11:46 rillig Exp $       */
 
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
@@ -377,8 +377,10 @@
 int compute_label_indent(void);
 int ind_add(int, const char *, const char *);
 
+char inp_peek(void);
 void inp_skip(void);
 char inp_next(void);
+
 lexer_symbol lexi(void);
 void diag(int, const char *, ...)__printflike(2, 3);
 void dump_line(void);
diff -r b1df7889206e -r 7ad7941bd719 usr.bin/indent/lexi.c
--- a/usr.bin/indent/lexi.c     Fri Nov 19 15:34:25 2021 +0000
+++ b/usr.bin/indent/lexi.c     Fri Nov 19 17:11:46 2021 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: lexi.c,v 1.140 2021/11/19 15:28:32 rillig Exp $        */
+/*     $NetBSD: lexi.c,v 1.141 2021/11/19 17:11:46 rillig Exp $        */
 
 /*-
  * SPDX-License-Identifier: BSD-4-Clause
@@ -43,7 +43,7 @@
 
 #include <sys/cdefs.h>
 #if defined(__NetBSD__)
-__RCSID("$NetBSD: lexi.c,v 1.140 2021/11/19 15:28:32 rillig Exp $");
+__RCSID("$NetBSD: lexi.c,v 1.141 2021/11/19 17:11:46 rillig Exp $");
 #elif defined(__FreeBSD__)
 __FBSDID("$FreeBSD: head/usr.bin/indent/lexi.c 337862 2018-08-15 18:19:45Z pstef $");
 #endif
@@ -178,7 +178,7 @@
     ['.'] = 15,
 };
 
-static char
+char
 inp_peek(void)
 {
     return *inbuf.inp.s;
@@ -352,7 +352,7 @@
 lex_number(void)
 {
     for (unsigned char s = 'A'; s != 'f' && s != 'i' && s != 'u';) {
-       unsigned char ch = (unsigned char)*inbuf.inp.s;
+       unsigned char ch = (unsigned char)inp_peek();
        if (ch >= array_length(lex_number_row) || lex_number_row[ch] == 0)
            break;
 
@@ -373,11 +373,11 @@
 static void
 lex_word(void)
 {
-    while (isalnum((unsigned char)*inbuf.inp.s) ||
-           *inbuf.inp.s == '\\' ||
-           *inbuf.inp.s == '_' || *inbuf.inp.s == '$') {
+    while (isalnum((unsigned char)inp_peek()) ||
+           inp_peek() == '\\' ||
+           inp_peek() == '_' || inp_peek() == '$') {
 
-       if (*inbuf.inp.s == '\\') {
+       if (inp_peek() == '\\') {
            if (inbuf.inp.s[1] == '\n') {
                inbuf.inp.s += 2;
                if (inbuf.inp.s >= inbuf.inp.e)
@@ -394,7 +394,7 @@
 lex_char_or_string(void)
 {
     for (char delim = token.e[-1];;) {
-       if (*inbuf.inp.s == '\n') {
+       if (inp_peek() == '\n') {
            diag(1, "Unterminated literal");
            return;
        }
@@ -404,7 +404,7 @@
            return;
 
        if (token.e[-1] == '\\') {
-           if (*inbuf.inp.s == '\n')
+           if (inp_peek() == '\n')
                ++line_no;
            token_add_char(inp_next());
        }
@@ -419,7 +419,7 @@
        return false;
     if (inbuf.inp.s[0] == '*' && inbuf.inp.s[1] != '=')
        goto maybe;
-    if (isalpha((unsigned char)*inbuf.inp.s))
+    if (isalpha((unsigned char)inp_peek()))
        goto maybe;
     return false;
 maybe:
@@ -468,11 +468,11 @@
 static lexer_symbol
 lexi_alnum(void)
 {
-    if (isdigit((unsigned char)*inbuf.inp.s) ||
+    if (isdigit((unsigned char)inp_peek()) ||
            (inbuf.inp.s[0] == '.' && isdigit((unsigned char)inbuf.inp.s[1]))) {
        lex_number();
-    } else if (isalnum((unsigned char)*inbuf.inp.s) ||
-           *inbuf.inp.s == '_' || *inbuf.inp.s == '$') {
+    } else if (isalnum((unsigned char)inp_peek()) ||
+           inp_peek() == '_' || inp_peek() == '$') {
        lex_word();
     } else
        return lsym_eof;        /* just as a placeholder */
@@ -480,7 +480,7 @@
     *token.e = '\0';
 
     if (token.s[0] == 'L' && token.s[1] == '\0' &&
-           (*inbuf.inp.s == '"' || *inbuf.inp.s == '\''))
+           (inp_peek() == '"' || inp_peek() == '\''))
        return lsym_string_prefix;
 
     while (ch_isblank(inp_peek()))
@@ -523,7 +523,7 @@
        }
     }
 
-    if (*inbuf.inp.s == '(' && ps.tos <= 1 && ps.ind_level == 0 &&
+    if (inp_peek() == '(' && ps.tos <= 1 && ps.ind_level == 0 &&
        !ps.in_parameter_declaration && !ps.block_init) {
 
        for (const char *p = inbuf.inp.s; p < inbuf.inp.e;)
@@ -552,7 +552,7 @@
     ps.curr_col_1 = ps.next_col_1;
     ps.next_col_1 = false;
 
-    while (ch_isblank(*inbuf.inp.s)) {
+    while (ch_isblank(inp_peek())) {
        ps.curr_col_1 = false;
        inp_skip();
     }
@@ -647,19 +647,19 @@
        lsym = ps.next_unary ? lsym_unary_op : lsym_binary_op;
        unary_delim = true;
 
-       if (*inbuf.inp.s == token.e[-1]) {      /* ++, -- */
-           *token.e++ = *inbuf.inp.s++;
+       if (inp_peek() == token.e[-1]) {        /* ++, -- */
+           *token.e++ = inp_next();
            if (ps.prev_token == lsym_word ||
                    ps.prev_token == lsym_rparen_or_rbracket) {
                lsym = ps.next_unary ? lsym_unary_op : lsym_postfix_op;
                unary_delim = false;
            }
 
-       } else if (*inbuf.inp.s == '=') {       /* += */
-           *token.e++ = *inbuf.inp.s++;
+       } else if (inp_peek() == '=') { /* += */
+           *token.e++ = inp_next();
 
-       } else if (*inbuf.inp.s == '>') {       /* -> */
-           *token.e++ = *inbuf.inp.s++;
+       } else if (inp_peek() == '>') { /* -> */
+           *token.e++ = inp_next();
            unary_delim = false;
            lsym = lsym_unary_op;
            ps.want_blank = false;
@@ -669,8 +669,8 @@
     case '=':
        if (ps.init_or_struct)
            ps.block_init = true;
-       if (*inbuf.inp.s == '=') {      /* == */
-           *token.e++ = *inbuf.inp.s++;
+       if (inp_peek() == '=') {        /* == */
+           *token.e++ = inp_next();
            *token.e = '\0';
        }
        lsym = lsym_binary_op;
@@ -680,10 +680,10 @@
     case '>':
     case '<':
     case '!':                  /* ops like <, <<, <=, !=, etc */
-       if (*inbuf.inp.s == '>' || *inbuf.inp.s == '<' || *inbuf.inp.s == '=')
+       if (inp_peek() == '>' || inp_peek() == '<' || inp_peek() == '=')
            *token.e++ = inp_next();
-       if (*inbuf.inp.s == '=')
-           *token.e++ = *inbuf.inp.s++;
+       if (inp_peek() == '=')
+           *token.e++ = inp_next();
        lsym = ps.next_unary ? lsym_unary_op : lsym_binary_op;
        unary_delim = true;
        break;
@@ -691,14 +691,14 @@
     case '*':
        unary_delim = true;
        if (!ps.next_unary) {
-           if (*inbuf.inp.s == '=')
-               *token.e++ = *inbuf.inp.s++;
+           if (inp_peek() == '=')
+               *token.e++ = inp_next();
            lsym = lsym_binary_op;
            break;
        }
 
-       while (*inbuf.inp.s == '*' || isspace((unsigned char)*inbuf.inp.s)) {
-           if (*inbuf.inp.s == '*')
+       while (inp_peek() == '*' || isspace((unsigned char)inp_peek())) {
+           if (inp_peek() == '*')
                token_add_char('*');
            inp_skip();
        }
@@ -723,7 +723,7 @@
        break;
 
     default:
-       if (token.e[-1] == '/' && (*inbuf.inp.s == '*' || *inbuf.inp.s == '/')) {
+       if (token.e[-1] == '/' && (inp_peek() == '*' || inp_peek() == '/')) {
            *token.e++ = inp_next();
            lsym = lsym_comment;
            unary_delim = ps.next_unary;
@@ -731,7 +731,7 @@
        }
 
        /* handle '||', '&&', etc., and also things as in 'int *****i' */



Home | Main Index | Thread Index | Old Index