Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/usr.bin/indent indent: clean up and document input handling



details:   https://anonhg.NetBSD.org/src/rev/65cda4ad7e68
branches:  trunk
changeset: 1026545:65cda4ad7e68
user:      rillig <rillig%NetBSD.org@localhost>
date:      Sun Nov 28 11:49:10 2021 +0000

description:
indent: clean up and document input handling

The transformation of moving comments from after an 'if (expr)' after
the following brace has a large implementation cost (about 300 lines of
code) and makes input handling quite complicated. Document the overall
idea to save future readers some time.

No functional change.

diffstat:

 usr.bin/indent/indent.c |   6 ++--
 usr.bin/indent/indent.h |   4 +-
 usr.bin/indent/io.c     |  69 +++++++++++++++++++++++++++++++++++++-----------
 3 files changed, 58 insertions(+), 21 deletions(-)

diffs (209 lines):

diff -r 29dcecd4c638 -r 65cda4ad7e68 usr.bin/indent/indent.c
--- a/usr.bin/indent/indent.c   Sun Nov 28 10:11:15 2021 +0000
+++ b/usr.bin/indent/indent.c   Sun Nov 28 11:49:10 2021 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: indent.c,v 1.237 2021/11/27 21:15:58 rillig Exp $      */
+/*     $NetBSD: indent.c,v 1.238 2021/11/28 11:49:10 rillig Exp $      */
 
 /*-
  * SPDX-License-Identifier: BSD-4-Clause
@@ -43,7 +43,7 @@
 
 #include <sys/cdefs.h>
 #if defined(__NetBSD__)
-__RCSID("$NetBSD: indent.c,v 1.237 2021/11/27 21:15:58 rillig Exp $");
+__RCSID("$NetBSD: indent.c,v 1.238 2021/11/28 11:49:10 rillig Exp $");
 #elif defined(__FreeBSD__)
 __FBSDID("$FreeBSD: head/usr.bin/indent/indent.c 340138 2018-11-04 19:24:49Z oshogbo $");
 #endif
@@ -322,7 +322,7 @@
     }
 
     debug_inp(__func__);
-    inp_comment_rtrim();
+    inp_comment_rtrim_blank();
 
     if (opt.swallow_optional_blanklines ||
        (!comment_buffered && remove_newlines)) {
diff -r 29dcecd4c638 -r 65cda4ad7e68 usr.bin/indent/indent.h
--- a/usr.bin/indent/indent.h   Sun Nov 28 10:11:15 2021 +0000
+++ b/usr.bin/indent/indent.h   Sun Nov 28 11:49:10 2021 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: indent.h,v 1.105 2021/11/27 21:15:58 rillig Exp $      */
+/*     $NetBSD: indent.h,v 1.106 2021/11/28 11:49:10 rillig Exp $      */
 
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
@@ -377,7 +377,7 @@
 void inp_comment_add_range(const char *, const char *);
 bool inp_comment_complete_block(void);
 bool inp_comment_seen(void);
-void inp_comment_rtrim(void);
+void inp_comment_rtrim_blank(void);
 void inp_comment_rtrim_newline(void);
 void inp_comment_insert_lbrace(void);
 
diff -r 29dcecd4c638 -r 65cda4ad7e68 usr.bin/indent/io.c
--- a/usr.bin/indent/io.c       Sun Nov 28 10:11:15 2021 +0000
+++ b/usr.bin/indent/io.c       Sun Nov 28 11:49:10 2021 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: io.c,v 1.142 2021/11/27 21:15:58 rillig Exp $  */
+/*     $NetBSD: io.c,v 1.143 2021/11/28 11:49:10 rillig Exp $  */
 
 /*-
  * SPDX-License-Identifier: BSD-4-Clause
@@ -43,7 +43,7 @@
 
 #include <sys/cdefs.h>
 #if defined(__NetBSD__)
-__RCSID("$NetBSD: io.c,v 1.142 2021/11/27 21:15:58 rillig Exp $");
+__RCSID("$NetBSD: io.c,v 1.143 2021/11/28 11:49:10 rillig Exp $");
 #elif defined(__FreeBSD__)
 __FBSDID("$FreeBSD: head/usr.bin/indent/io.c 334927 2018-06-10 16:44:18Z pstef $");
 #endif
@@ -55,18 +55,41 @@
 
 #include "indent.h"
 
+/*
+ * There are 3 modes for reading the input.
+ *
+ * default: In this mode, the input comes from the input file. The buffer
+ * 'inp' contains the current line, terminated with '\n'. The current read
+ * position is inp.s, and there is always inp.buf <= inp.s < inp.e. All other
+ * pointers are null.
+ *
+ * copy-in: After reading 'if (expr)' or similar tokens, the input still comes
+ * from 'inp', but instead of processing it, it is copied to 'save_com'. The
+ * goal of this mode is to move the comments after the '{', that is to
+ * transform 'if (expr) comment {' to 'if (expr) { comment'. When the next
+ * token cannot be part of this transformation, switch to copy-out.
+ *
+ * copy-out: In this mode, the input comes from 'save_com', which contains the
+ * tokens to be placed after the '{'. The input still comes from the range
+ * [inp.s, inp.e), but these two members have been overwritten with pointers
+ * into save_com_buf, so inp.buf and inp.s are unrelated, which is unusual.
+ * In this mode, inp.e[-1] is usually not terminated with '\n'. After reading
+ * all tokens from save_com, switch to default mode again.
+ */
 static struct {
     struct buffer inp;         /* one line of input, ready to be split into
-                                * tokens; occasionally this buffer switches
+                                * tokens; occasionally 's' and 'e' switch
                                 * to save_com_buf */
     char save_com_buf[5000];   /* input text is saved here when looking for
                                 * the brace after an if, while, etc */
-    char *save_com_s;          /* start of the comment in save_com_buf */
-    char *save_com_e;          /* end of the comment in save_com_buf */
+    char *save_com_s;          /* start of the comment in save_com_buf, or
+                                * null */
+    char *save_com_e;          /* end of the comment in save_com_buf, or
+                                * null */
 
     char *saved_inp_s;         /* saved value of inp.s when taking input from
-                                * save_com */
-    char *saved_inp_e;         /* saved value of inp.e */
+                                * save_com, or null */
+    char *saved_inp_e;         /* saved value of inp.e, or null */
 } inbuf;
 
 static int paren_indent;
@@ -92,10 +115,6 @@
 const char *
 inp_line_start(void)
 {
-    /*
-     * The comment we're about to read usually comes from inp.buf, unless it
-     * has been copied into save_com.
-     */
     return inbuf.saved_inp_s != NULL ? inbuf.save_com_buf : inbuf.inp.buf;
 }
 
@@ -149,6 +168,9 @@
 void
 debug_inp(const char *prefix)
 {
+    assert(inp_line_start() <= inbuf.inp.s);
+    assert(inbuf.inp.s <= inbuf.inp.e);
+
     debug_println("%s %s:", __func__, prefix);
     if (inbuf.saved_inp_s == NULL)
        debug_inp_buf("inp.buf", inbuf.inp.buf, inbuf.inp.s);
@@ -231,6 +253,10 @@
 {
     if (inbuf.save_com_e == NULL) {    /* if this is the first comment, we
                                         * must set up the buffer */
+       /*
+        * XXX: No space is reserved for a potential '{' here, unlike in
+        * inp_comment_init_comment.
+        */
        inbuf.save_com_s = inbuf.save_com_buf;
        inbuf.save_com_e = inbuf.save_com_s;
     } else {
@@ -269,19 +295,25 @@
 }
 
 void
-inp_comment_rtrim(void)
+inp_comment_rtrim_blank(void)
 {
-    while (inbuf.save_com_e > inbuf.save_com_s && ch_isblank(inbuf.save_com_e[-1]))
+    while (inbuf.save_com_e > inbuf.save_com_s &&
+           ch_isblank(inbuf.save_com_e[-1]))
        inbuf.save_com_e--;
 }
 
 void
 inp_comment_rtrim_newline(void)
 {
-    while (inbuf.save_com_e > inbuf.save_com_s && inbuf.save_com_e[-1] == '\n')
+    while (inbuf.save_com_e > inbuf.save_com_s &&
+           inbuf.save_com_e[-1] == '\n')
        inbuf.save_com_e--;
 }
 
+/*
+ * Switch the input to come from save_com, replaying the copied tokens while
+ * looking for the next '{'.
+ */
 void
 inp_from_comment(void)
 {
@@ -289,7 +321,7 @@
     inbuf.saved_inp_s = inbuf.inp.s;
     inbuf.saved_inp_e = inbuf.inp.e;
 
-    inbuf.inp.s = inbuf.save_com_s;    /* redirect lexi input to save_com_s */
+    inbuf.inp.s = inbuf.save_com_s;
     inbuf.inp.e = inbuf.save_com_e;
     inbuf.save_com_s = NULL;
     inbuf.save_com_e = NULL;
@@ -521,6 +553,7 @@
        output_char(line_terminator);
        ps.stats.lines++;
 
+       /* TODO: rename to blank_line_after_decl */
        if (ps.just_saw_decl == 1 && opt.blanklines_after_decl) {
            blank_line_before = true;
            ps.just_saw_decl = 0;
@@ -643,8 +676,8 @@
     skip_blank(&p);
     if (!skip_string(&p, "INDENT"))
        return;
+
     skip_blank(&p);
-
     if (*p == '*' || skip_string(&p, "ON"))
        on = true;
     else if (skip_string(&p, "OFF"))
@@ -661,6 +694,10 @@
 
     inhibit_formatting = !on;
     if (on) {
+       /*
+        * XXX: Does this make sense? Is the handling of blank lines above
+        * INDENT OFF comments essentially the same?
+        */
        blank_lines_to_output = 0;
        blank_line_after = false;
        blank_line_before = false;



Home | Main Index | Thread Index | Old Index