[src/trunk]: src/usr.bin/indent indent: split type token_type into 3 separate...

To: source-changes-hg%NetBSD.org@localhost
Subject: [src/trunk]: src/usr.bin/indent indent: split type token_type into 3 separate...
From: rillig <rillig%NetBSD.org@localhost>
Date: Mon, 08 Nov 2021 15:49:17 +0000
details:   https://anonhg.NetBSD.org/src/rev/cadcbd237e7e
branches:  trunk
changeset: 1024470:cadcbd237e7e
user:      rillig <rillig%NetBSD.org@localhost>
date:      Mon Oct 25 00:54:37 2021 +0000

description:
indent: split type token_type into 3 separate types

Previously, token_type was used for 3 different purposes:

1. symbol types from the lexer
2. symbol types on the parser stack
3. kind of control statement for 'if (expr)' and similar statements

Splitting the 41 constants into separate types makes it immediately
clear that the parser stack never handles comments, preprocessing lines,
newlines, form feeds, the inner structure of expressions.

Previously, the constant switch_expr was especially confusing since it
was used for 3 different purposes: when returned from lexi, it
represented the keyword 'switch', in the parser stack it represented
'switch (expr)', and it was used for a statement head as well.

The only overlap between the lexer symbols and the parser symbols are
'{' and '}', and the keywords 'do' and 'else'. To increase confusion,
the constants of the previous token_type were in apparently random
order and before 2021, they had cryptic, highly abbreviated names.

No functional change.

diffstat:

 usr.bin/indent/indent.c     |  220 +++++++++++++++++++++----------------------
 usr.bin/indent/indent.h     |  114 +++++++++++++---------
 usr.bin/indent/lexi.c       |  175 +++++++++++++++++++---------------
 usr.bin/indent/parse.c      |  158 ++++++++++++++++++++-----------
 usr.bin/indent/pr_comment.c |    7 +-
 5 files changed, 375 insertions(+), 299 deletions(-)

diffs (truncated from 1527 to 300 lines):

diff -r a47cd3ca18f9 -r cadcbd237e7e usr.bin/indent/indent.c
--- a/usr.bin/indent/indent.c   Sun Oct 24 22:44:13 2021 +0000
+++ b/usr.bin/indent/indent.c   Mon Oct 25 00:54:37 2021 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: indent.c,v 1.155 2021/10/24 22:44:13 rillig Exp $      */
+/*     $NetBSD: indent.c,v 1.156 2021/10/25 00:54:37 rillig Exp $      */
 
 /*-
  * SPDX-License-Identifier: BSD-4-Clause
@@ -43,7 +43,7 @@
 
 #include <sys/cdefs.h>
 #if defined(__NetBSD__)
-__RCSID("$NetBSD: indent.c,v 1.155 2021/10/24 22:44:13 rillig Exp $");
+__RCSID("$NetBSD: indent.c,v 1.156 2021/10/25 00:54:37 rillig Exp $");
 #elif defined(__FreeBSD__)
 __FBSDID("$FreeBSD: head/usr.bin/indent/indent.c 340138 2018-11-04 19:24:49Z oshogbo $");
 #endif
@@ -239,16 +239,16 @@
 }
 
 static bool
-search_brace_other(token_type ttype, bool *force_nl,
+search_brace_other(lexer_symbol lsym, bool *force_nl,
     bool comment_buffered, bool last_else)
 {
     bool remove_newlines;
 
     remove_newlines =
            /* "} else" */
-           (ttype == tt_lex_else && code.e != code.s && code.e[-1] == '}')
+           (lsym == lsym_else && code.e != code.s && code.e[-1] == '}')
            /* "else if" */
-           || (ttype == tt_lex_if && last_else && opt.else_if);
+           || (lsym == lsym_if && last_else && opt.else_if);
     if (remove_newlines)
        *force_nl = false;
 
@@ -301,9 +301,9 @@
 }
 
 static void
-search_brace_lookahead(token_type *ttype)
+search_brace_lookahead(lexer_symbol *lsym)
 {
-    if (*ttype == end_of_file)
+    if (*lsym == lsym_eof)
        return;
 
     /*
@@ -335,39 +335,39 @@
 
     struct parser_state transient_state;
     transient_state = ps;
-    *ttype = lexi(&transient_state);   /* read another token */
-    if (*ttype != newline && *ttype != tt_lex_form_feed &&
-       *ttype != comment && !transient_state.search_brace) {
+    *lsym = lexi(&transient_state);    /* read another token */
+    if (*lsym != lsym_newline && *lsym != lsym_form_feed &&
+       *lsym != lsym_comment && !transient_state.search_brace) {
        ps = transient_state;
     }
 }
 
 static void
-search_brace(token_type *ttype, bool *force_nl,
+search_brace(lexer_symbol *lsym, bool *force_nl,
     bool *comment_buffered, bool *last_else)
 {
     while (ps.search_brace) {
-       switch (*ttype) {
-       case newline:
+       switch (*lsym) {
+       case lsym_newline:
            search_brace_newline(force_nl);
            break;
-       case tt_lex_form_feed:
+       case lsym_form_feed:
            break;
-       case comment:
+       case lsym_comment:
            search_brace_comment(comment_buffered);
            break;
-       case lbrace:
+       case lsym_lbrace:
            if (search_brace_lbrace())
                goto switch_buffer;
            /* FALLTHROUGH */
        default:                /* it is the start of a normal statement */
-           if (!search_brace_other(*ttype, force_nl,
+           if (!search_brace_other(*lsym, force_nl,
                    *comment_buffered, *last_else))
                return;
     switch_buffer:
            switch_buffer();
        }
-       search_brace_lookahead(ttype);
+       search_brace_lookahead(lsym);
     }
 
     *last_else = false;
@@ -443,9 +443,9 @@
 {
     found_err = false;
 
-    ps.s_ttype[0] = stmt;
+    ps.s_sym[0] = psym_stmt;
     ps.last_nl = true;
-    ps.last_token = semicolon;
+    ps.last_token = lsym_semicolon;
     buf_init(&com);
     buf_init(&lab);
     buf_init(&code);
@@ -581,7 +581,7 @@
 {
     inbuf_read_line();
 
-    parse(semicolon);
+    parse(psym_semicolon);
 
     int ind = 0;
     for (const char *p = inp.s;; p++) {
@@ -647,11 +647,11 @@
 }
 
 static void
-process_comment_in_code(token_type ttype, bool *force_nl)
+process_comment_in_code(lexer_symbol lsym, bool *force_nl)
 {
     if (*force_nl &&
-       ttype != semicolon &&
-       (ttype != lbrace || !opt.brace_same_line)) {
+       lsym != lsym_semicolon &&
+       (lsym != lsym_lbrace || !opt.brace_same_line)) {
 
        /* we should force a broken line here */
        if (opt.verbose)
@@ -684,7 +684,7 @@
 static void
 process_newline(void)
 {
-    if (ps.last_token == comma && ps.p_l_follow == 0 && !ps.block_init &&
+    if (ps.last_token == lsym_comma && ps.p_l_follow == 0 && !ps.block_init &&
        !opt.break_after_comma && break_comma &&
        com.s == com.e)
        goto stay_in_line;
@@ -701,9 +701,9 @@
 {
     if (!ps.want_blank)
        return false;
-    if (ps.last_token == rparen_or_rbracket)
+    if (ps.last_token == lsym_rparen_or_rbracket)
        return false;
-    if (ps.last_token != ident && ps.last_token != funcname)
+    if (ps.last_token != lsym_ident && ps.last_token != lsym_funcname)
        return true;
     if (opt.proc_calls_space)
        return true;
@@ -748,7 +748,7 @@
         * this is a kluge to make sure that declarations will be aligned
         * right if proc decl has an explicit type on it, i.e. "int a(x) {..."
         */
-       parse(semicolon);       /* I said this was a kluge... */
+       parse(psym_semicolon);  /* I said this was a kluge... */
        ps.init_or_struct = false;
     }
 
@@ -758,8 +758,7 @@
 }
 
 static void
-process_rparen_or_rbracket(bool *sp_sw, bool *force_nl,
-    token_type hd_type)
+process_rparen_or_rbracket(bool *sp_sw, bool *force_nl, stmt_head hd)
 {
     if ((ps.cast_mask & (1 << ps.p_l_follow) & ~ps.not_cast_mask) != 0) {
        ps.next_unary = true;
@@ -786,7 +785,7 @@
        ps.next_unary = true;
        ps.in_stmt = false;     /* don't use stmt continuation indentation */
 
-       parse(hd_type);         /* let parser worry about if, or whatever */
+       parse_hd(hd);           /* let parser worry about if, or whatever */
     }
 
     /*
@@ -870,15 +869,13 @@
 
 static void
 process_semicolon(bool *seen_case, int *quest_level, int decl_ind,
-    bool tabs_to_var, bool *sp_sw,
-    token_type hd_type,
-    bool *force_nl)
+    bool tabs_to_var, bool *sp_sw, stmt_head hd, bool *force_nl)
 {
     if (ps.decl_nest == 0)
        ps.init_or_struct = false;
     *seen_case = false;                /* these will only need resetting in an error */
     *quest_level = 0;
-    if (ps.last_token == rparen_or_rbracket)
+    if (ps.last_token == lsym_rparen_or_rbracket)
        ps.in_parameter_declaration = false;
     ps.cast_mask = 0;
     ps.not_cast_mask = 0;
@@ -897,7 +894,7 @@
                                         * structure declaration, we aren't
                                         * anymore */
 
-    if ((!*sp_sw || hd_type != for_exprs) && ps.p_l_follow > 0) {
+    if ((!*sp_sw || hd != hd_for) && ps.p_l_follow > 0) {
 
        /*
         * There were unbalanced parens in the statement. It is a bit
@@ -908,7 +905,7 @@
        if (*sp_sw) {           /* this is a check for an if, while, etc. with
                                 * unbalanced parens */
            *sp_sw = false;
-           parse(hd_type);     /* don't lose the 'if', or whatever */
+           parse_hd(hd);       /* don't lose the 'if', or whatever */
        }
     }
     *code.e++ = ';';
@@ -917,13 +914,13 @@
                                         * stmt */
 
     if (!*sp_sw) {             /* if not if for (;;) */
-       parse(semicolon);       /* let parser know about end of stmt */
+       parse(psym_semicolon);  /* let parser know about end of stmt */
        *force_nl = true;       /* force newline after an end of stmt */
     }
 }
 
 static void
-process_lbrace(bool *force_nl, bool *sp_sw, token_type hd_type,
+process_lbrace(bool *force_nl, bool *sp_sw, stmt_head hd,
     int *di_stack, int di_stack_cap, int *decl_ind)
 {
     ps.in_stmt = false;                /* don't indent the {} */
@@ -959,7 +956,7 @@
        ps.p_l_follow = 0;
        if (*sp_sw) {           /* check for unclosed if, for, etc. */
            *sp_sw = false;
-           parse(hd_type);
+           parse_hd(hd);
            ps.ind_level = ps.ind_level_follow;
        }
     }
@@ -984,7 +981,7 @@
     }
 
     *decl_ind = 0;
-    parse(lbrace);
+    parse(psym_lbrace);
     if (ps.want_blank)
        *code.e++ = ' ';
     ps.want_blank = false;
@@ -995,10 +992,10 @@
 static void
 process_rbrace(bool *sp_sw, int *decl_ind, const int *di_stack)
 {
-    if (ps.s_ttype[ps.tos] == decl && !ps.block_init)  /* semicolons can be
-                                                        * omitted in
-                                                        * declarations */
-       parse(semicolon);
+    if (ps.s_sym[ps.tos] == psym_decl && !ps.block_init) {
+       /* semicolons can be omitted in declarations */
+       parse(psym_semicolon);
+    }
 
     if (ps.p_l_follow != 0) {  /* check for unclosed if, for, else. */
        diag(1, "Unbalanced parens");
@@ -1030,9 +1027,9 @@
     }
 
     blank_line_before = false;
-    parse(rbrace);             /* let parser know about this */
+    parse(psym_rbrace);
     ps.search_brace = opt.cuddle_else
-       && ps.s_ttype[ps.tos] == if_expr_stmt
+       && ps.s_sym[ps.tos] == psym_if_expr_stmt
        && ps.s_ind_level[ps.tos] >= ps.ind_level;
 
     if (ps.tos <= 1 && opt.blanklines_after_procs && ps.decl_nest <= 0)
@@ -1053,7 +1050,7 @@
 
     *force_nl = true;          /* following stuff must go onto new line */
     *last_else = false;
-    parse(tt_ps_do);
+    parse(psym_do);
 }
 
 static void
@@ -1070,15 +1067,15 @@
 
     *force_nl = true;          /* following stuff must go onto new line */
     *last_else = true;
-    parse(tt_ps_else);
+    parse(psym_else);
 }
Prev by Date: [src/trunk]: src/usr.bin/indent indent: rename form_feed to tt_lex_form_feed
Next by Date: [src/trunk]: src/usr.bin/indent indent: rename local variable sp_sw to spaced...
Previous by Thread: [src/trunk]: src/usr.bin/indent indent: rename form_feed to tt_lex_form_feed
Next by Thread: [src/trunk]: src/usr.bin/indent indent: rename local variable sp_sw to spaced...
Indexes:
Home | Main Index | Thread Index | Old Index