[src/trunk]: src/usr.bin/indent indent: distinguish between typename in paren...

To: source-changes-hg%NetBSD.org@localhost
Subject: [src/trunk]: src/usr.bin/indent indent: distinguish between typename in paren...
From: rillig <rillig%NetBSD.org@localhost>
Date: Mon, 08 Nov 2021 15:54:14 +0000
details:   https://anonhg.NetBSD.org/src/rev/d2a180ed1a19
branches:  trunk
changeset: 1024821:d2a180ed1a19
user:      rillig <rillig%NetBSD.org@localhost>
date:      Sun Nov 07 07:35:06 2021 +0000

description:
indent: distinguish between typename in parentheses and other words

This gets rid of two members of parser_state. No functional change for
well-formed programs. The sequence of '++int' or '--size_t' may be
formatted differently than before, but no program is expected to contain
that sequence.

Rename lsym_ident to lsym_word since 'ident' was too specific. This
token type is used for constants and string literals as well. Strictly
speaking, a string literal is not a word, but at least it's better than
before.

diffstat:

 usr.bin/indent/indent.c |  11 ++++++-----
 usr.bin/indent/indent.h |   7 +++----
 usr.bin/indent/lexi.c   |  35 ++++++++++++++++-------------------
 3 files changed, 25 insertions(+), 28 deletions(-)

diffs (219 lines):

diff -r 83cbf8e045b8 -r d2a180ed1a19 usr.bin/indent/indent.c
--- a/usr.bin/indent/indent.c   Sun Nov 07 07:06:00 2021 +0000
+++ b/usr.bin/indent/indent.c   Sun Nov 07 07:35:06 2021 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: indent.c,v 1.210 2021/11/07 07:06:00 rillig Exp $      */
+/*     $NetBSD: indent.c,v 1.211 2021/11/07 07:35:06 rillig Exp $      */
 
 /*-
  * SPDX-License-Identifier: BSD-4-Clause
@@ -43,7 +43,7 @@
 
 #include <sys/cdefs.h>
 #if defined(__NetBSD__)
-__RCSID("$NetBSD: indent.c,v 1.210 2021/11/07 07:06:00 rillig Exp $");
+__RCSID("$NetBSD: indent.c,v 1.211 2021/11/07 07:35:06 rillig Exp $");
 #elif defined(__FreeBSD__)
 __FBSDID("$FreeBSD: head/usr.bin/indent/indent.c 340138 2018-11-04 19:24:49Z oshogbo $");
 #endif
@@ -747,8 +747,8 @@
        return false;
     if (ps.prev_token == lsym_sizeof)
        return opt.blank_after_sizeof;
-    if (ps.prev_token == lsym_ident || ps.prev_token == lsym_funcname)
-       return ps.prev_is_type;
+    if (ps.prev_token == lsym_word || ps.prev_token == lsym_funcname)
+       return false;
     return true;
 }
 
@@ -1511,9 +1511,10 @@
            process_type(&decl_ind, &tabs_to_var);
            goto copy_token;
 
+       case lsym_type_in_parentheses:
        case lsym_offsetof:
        case lsym_sizeof:
-       case lsym_ident:
+       case lsym_word:
        case lsym_funcname:
        case lsym_return:
            process_ident(lsym, decl_ind, tabs_to_var, &spaced_expr,
diff -r 83cbf8e045b8 -r d2a180ed1a19 usr.bin/indent/indent.h
--- a/usr.bin/indent/indent.h   Sun Nov 07 07:06:00 2021 +0000
+++ b/usr.bin/indent/indent.h   Sun Nov 07 07:35:06 2021 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: indent.h,v 1.79 2021/11/07 07:06:00 rillig Exp $       */
+/*     $NetBSD: indent.h,v 1.80 2021/11/07 07:35:06 rillig Exp $       */
 
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
@@ -92,12 +92,13 @@
     lsym_typedef,
     lsym_storage_class,
     lsym_type_at_paren_level_0,
+    lsym_type_in_parentheses,
     lsym_tag,                  /* 'struct', 'union' or 'enum' */
     lsym_case_label,           /* 'case' or 'default' */
     lsym_string_prefix,                /* 'L' */
     lsym_sizeof,
     lsym_offsetof,
-    lsym_ident,                        /* identifier, constant or string */
+    lsym_word,                 /* identifier, constant or string */
     lsym_funcname,
     lsym_do,
     lsym_else,
@@ -261,8 +262,6 @@
 
 extern struct parser_state {
     lexer_symbol prev_token;
-    bool prev_is_type;
-    bool curr_is_type;
     bool curr_col_1;           /* whether the current token started in column
                                 * 1 of the unformatted input */
     bool next_col_1;
diff -r 83cbf8e045b8 -r d2a180ed1a19 usr.bin/indent/lexi.c
--- a/usr.bin/indent/lexi.c     Sun Nov 07 07:06:00 2021 +0000
+++ b/usr.bin/indent/lexi.c     Sun Nov 07 07:35:06 2021 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: lexi.c,v 1.133 2021/11/07 07:06:00 rillig Exp $        */
+/*     $NetBSD: lexi.c,v 1.134 2021/11/07 07:35:06 rillig Exp $        */
 
 /*-
  * SPDX-License-Identifier: BSD-4-Clause
@@ -43,7 +43,7 @@
 
 #include <sys/cdefs.h>
 #if defined(__NetBSD__)
-__RCSID("$NetBSD: lexi.c,v 1.133 2021/11/07 07:06:00 rillig Exp $");
+__RCSID("$NetBSD: lexi.c,v 1.134 2021/11/07 07:35:06 rillig Exp $");
 #elif defined(__FreeBSD__)
 __FBSDID("$FreeBSD: head/usr.bin/indent/lexi.c 337862 2018-08-15 18:19:45Z pstef $");
 #endif
@@ -70,12 +70,12 @@
     {"_Imaginary", lsym_type},
     {"auto", lsym_storage_class},
     {"bool", lsym_type},
-    {"break", lsym_ident},
+    {"break", lsym_word},
     {"case", lsym_case_label},
     {"char", lsym_type},
     {"complex", lsym_type},
     {"const", lsym_type},
-    {"continue", lsym_ident},
+    {"continue", lsym_word},
     {"default", lsym_case_label},
     {"do", lsym_do},
     {"double", lsym_type},
@@ -84,15 +84,15 @@
     {"extern", lsym_storage_class},
     {"float", lsym_type},
     {"for", lsym_for},
-    {"goto", lsym_ident},
+    {"goto", lsym_word},
     {"if", lsym_if},
     {"imaginary", lsym_type},
-    {"inline", lsym_ident},
+    {"inline", lsym_word},
     {"int", lsym_type},
     {"long", lsym_type},
     {"offsetof", lsym_offsetof},
     {"register", lsym_storage_class},
-    {"restrict", lsym_ident},
+    {"restrict", lsym_word},
     {"return", lsym_return},
     {"short", lsym_type},
     {"signed", lsym_type},
@@ -239,12 +239,13 @@
        "typedef",
        "storage_class",
        "type_at_paren_level_0",
+       "type_in_parentheses",
        "tag",
        "case_label",
        "string_prefix",
        "sizeof",
        "offsetof",
-       "ident",
+       "word",
        "funcname",
        "do",
        "else",
@@ -284,8 +285,7 @@
     static struct parser_state prev_ps;
 
     debug_println("");
-    debug_printf("line %d: %s%s", line_no, lsym_name(lsym),
-       ps.curr_is_type ? " type" : "");
+    debug_printf("line %d: %s", line_no, lsym_name(lsym));
     debug_vis_range(" \"", token.s, token.e, "\"\n");
 
     debug_print_buf("label", &lab);
@@ -293,7 +293,6 @@
     debug_print_buf("comment", &com);
 
     debug_println("    ps.prev_token = %s", lsym_name(ps.prev_token));
-    debug_ps_bool(prev_is_type);
     debug_ps_bool(next_col_1);
     debug_ps_bool(curr_col_1);
     debug_ps_bool(next_unary);
@@ -499,15 +498,16 @@
 
     const struct keyword *kw = bsearch(token.s, keywords,
        array_length(keywords), sizeof(keywords[0]), cmp_keyword_by_name);
+    bool is_type = false;
     if (kw == NULL) {
        if (is_typename()) {
-           ps.curr_is_type = true;
+           is_type = true;
            ps.next_unary = true;
            goto found_typename;
        }
 
     } else {                   /* we have a keyword */
-       ps.curr_is_type = kw->lsym == lsym_type;
+       is_type = kw->lsym == lsym_type;
        ps.next_unary = true;
        if (kw->lsym != lsym_tag && kw->lsym != lsym_type)
            return kw->lsym;
@@ -539,12 +539,11 @@
 no_function_definition:;
 
     } else if (probably_typename()) {
-       ps.curr_is_type = true;
        ps.next_unary = true;
        return lsym_type_at_paren_level_0;
     }
 
-    return lsym_ident;         /* the ident is not in the list */
+    return is_type ? lsym_type_in_parentheses : lsym_word;
 }
 
 /* Reads the next token, placing it in the global variable "token". */
@@ -554,8 +553,6 @@
     token.e = token.s;
     ps.curr_col_1 = ps.next_col_1;
     ps.next_col_1 = false;
-    ps.prev_is_type = ps.curr_is_type;
-    ps.curr_is_type = false;
 
     while (ch_isblank(*inp.s)) {
        ps.curr_col_1 = false;
@@ -587,7 +584,7 @@
     case '\'':
     case '"':
        lex_char_or_string();
-       lsym = lsym_ident;
+       lsym = lsym_word;
        break;
 
     case '(':
@@ -654,7 +651,7 @@
 
        if (*inp.s == token.e[-1]) {    /* ++, -- */
            *token.e++ = *inp.s++;
-           if (ps.prev_token == lsym_ident ||
+           if (ps.prev_token == lsym_word ||
                    ps.prev_token == lsym_rparen_or_rbracket) {
                lsym = ps.next_unary ? lsym_unary_op : lsym_postfix_op;
                unary_delim = false;
Prev by Date: [src/trunk]: src/usr.bin/indent indent: rename 'inbuf' functions to 'inp'
Next by Date: [src/trunk]: src/usr.bin/indent indent: rename type_at_paren_level_0 to type_...
Previous by Thread: [src/trunk]: src/usr.bin/indent indent: rename 'inbuf' functions to 'inp'
Next by Thread: [src/trunk]: src/usr.bin/indent indent: rename type_at_paren_level_0 to type_...
Indexes:
Home | Main Index | Thread Index | Old Index