#! /usr/bin/gawk -f # Last edited on 2021-02-18 12:24:25 by jstolfi # Reads a Python program. Writes it to {stdout} # after changing all triple-quoted docstrings to comments. BEGIN{ abort = -1 last_was_def = 0 # True if previous line was a "def" or "class". in_docstring = 0 # True if current line starts inside a docstring. indent = 2 # If how much the next docstring should be indented. } (abort >= 0) { exit(abort) } /^ *(def|class) /{ if (! in_docstring) { print; last_was_def = 1; gsub(/[^ ].*$/, "", $0) indent = 2 + length($0) next } } (in_docstring) { n = patsplit($0, fld, /[']['][']/, sep) if (n == 0) { # Still part of docstring: print commentify($0, indent) next } else if (n == 1) { # End of docstring: # printf "%d [%s][%s][%s]\n", n, sep[0], fld[1], sep[1] if (sep[0] !~ /^ *$/) { print commentify(sep[0], indent) } if (sep[1] !~ /^ *$/) { printf "%*s%s\n", indent, "", sep[1] } in_docstring = 0 next } else { data_error("multiple ''' on same docstring line") } } /^ *[']['][']/ { if (last_was_def) { # Start of new docstring: n = patsplit($0, fld, /[']['][']/, sep) if (n != 1) { data_error("multiple ''' on same docstring line") } if (sep[0] !~ /^[ ]*$/) { data_error("duh?") } if (sep[1] !~ /^[ ]*$/) { printf "%*s# %s\n", indent, "", sep[1] } in_docstring = 1 next } else { data_error("line with ''' not docstring") } } // { if (in_docstring) { print(commentify($0, indent)) } else { print $0 } } END { if (abort >= 0) { exit(abort) } if (in_docstring) { data_error("file ends inside docstring") } } function commentify(s,indent) { # Returns {s} with "# " inserted after the first {indent} blanks. if (s ~ /^ *$/){ return sprintf("%*s#", indent, "") } else { x = substr(s, 1, indent) y = substr(s, indent+1) if (x !~ /^ *$/) { data_error("docstring line is not properly indented: '" x "'") } return (x "# " y) } } function data_error(msg) { printf "%s:%d: ** %s\n", FILENAME, FNR, msg abort = 1 exit(1) }