#! /usr/bin/python3
# Last edited on 2025-11-04 00:36:54 by stolfi

import os, sys, re
from error_funcs import file_line_error
from sys import stderr as err, stdout as out

def main() -> None:
  nread = 0; nwrite = 0;
  
  beg = int(sys.argv[1])
  lim = int(sys.argv[2])
  err.write(f"{beg = } {lim = }\n")
      
  def process_line(line):
    nonlocal nread, nwrite, beg, lim
    
    def data_error(msg):
      nonlocal nread, line
      file_line_error("stdin", nread, msg, line)
      assert False
      # ..................................................................

    line = line.strip()
    nread += 1

    # Discard comments, write as blank lines:
    if line == "" or line[0] == "#": 
      out.write("\n"); nwrite += 1;
    elif nread >= beg and nread < lim: 
      line = filter_line(line)
      out.write(line); out.write("\n"); nwrite += 1;
      
    return 
    # --------------------------------------------------------------------

  with sys.stdin as rd:
    while True:
      line = rd.readline()
      if line == "": break
      process_line(line)
    rd.close()
    
  err.write(f"total {nread} lines read {nwrite} lines written\n") 
  return
    
  # ======================================================================
    
def filter_line(line): 

  # Replace all punctuation by spaces:
  line = re.sub(r"[-¡!¿?.,:;`'~()«»_/\[\]]", r" ", line)
  
  # Replace all symbol strings by " * ":
  line = re.sub(r"[+¤0-9¦$]", r" * ", line)

  # Map all letters to lowercase:
  line = line.lower()
  
  # Normalize spaces:
  line = line.strip()
  line = re.sub(r"[ ][ ]+", " ", line)
  
  return line
  # ??????????????????????????????????????????????????????????????????????
  
  # Sundry markup:
  line = re.sub(r"[`][`]", r"«", line)
  line = re.sub(r"['][']", r"»", line)
  line = re.sub(r"{[.]}", r".", line)
  line = re.sub(r"([-!?.,:;)»])([¡¿«])", r"\1 \2", line)
  line = re.sub(r"[.][¡]", r". ¡", line)
  line = re.sub(r"[¦]", r"", line)
  line = re.sub(r"\\emph{([^{}]*)}", r"_\1_", line)

  # Abbrev with esima, esimo:
  line = re.sub(r"D{.}{\\esima}", r"D.ª", line)
  line = re.sub(r"{\\esimo}", r"º", line)

  # Abbrevs without esima, esimo:
  line = re.sub(r"([A-Z][a-z]?)[.]~", r"\1. ", line)
  line = re.sub(r"etc.~", r"etc. ", line)
  line = re.sub(r"vers.~", r"vers. ", line)
  line = re.sub(r"cap.~", r"cap. ", line)

  line = re.sub(r"[.] ([.,:;!?])", r".\1", line)

  # Parag and parag breaks:
  line = re.sub(r"\parag{(.*)}$", r"\1", line)
  line = re.sub(r"[ ]*\\[ ]*", r"\n\n", line)

  # Turn "foreign languages"  into " * "*
  line = re.sub(r"\(itap|frcp|latp|engp){([^{}]*)}", r" * ", line)

  # Last cleanup of leadings and trainling spaces:
  line = line.strip()
  return line
  # ----------------------------------------------------------------------

main()
