#! /usr/bin/python3
# Last edited on 2026-04-21 18:46:04 by stolfi

import sys, os, re
from sys import stdin as inp, stdout as out, stderr as err
from random import uniform as rand
from math import inf, nan, sqrt, hypot, log, isfinite

def main():
  debug = False
  data = dict() # Collected data.
  clear_data(data)
  max_nw = 0
  nread = 0
  for line in inp:
    line = line.strip()
    if line != "":
      nread += 1
      if debug and nread <= 3: err.write(f"{line = !r}\n")
      data['line_lengths'].append(len(line))
      words = re.split(r"[ ]+", line)
      # err.write(f"{words = !r}\n")
      nw = len(words)
      data['words_per_line'].append(nw)
      max_nw = max(max_nw, nw)
      for iw in range(nw):
        word = words[iw]
        tally_word(word, data, 'words_beg', iw)
        tally_word(word, data, 'words_end', nw-1-iw)
        tally_word(word, data, 'words',     -1)
      if debug and nread == 3: err.write(f"{data['words_beg'] = !r}\n")
      if debug and nread == 3: err.write(f"{data['words_end'] = !r}\n")
  err.write(f"read {nread} lines\n") 
  compute_stats(data, max_nw)
  return
  # ----------------------------------------------------------------------
  
def clear_data(data):
  data['line_lengths'] =   [] # List of line lengths.
  data['words_per_line'] = [] # List of line lengths.
  data['words'] =          [] # {data['words']} is list of all words.
  data['words_beg'] =      [] # {data['words_beg'][iw]} is list of words at pos {iw} from line beg.
  data['words_end'] =      [] # {data['words_end'][iw]} is List of words at pos {iw} from line end.
  return
  # ----------------------------------------------------------------------

def compute_stats(data, max_nw):
  llen_stats = compute_num_seq_stats(data['line_lengths'])
  write_num_seq_stats(out, -1, 0, 'line lengths (characters)', llen_stats, None)
    
  nwds_stats = compute_num_seq_stats(data['words_per_line'])
  write_num_seq_stats(out, -1, 0, 'words_per_line', nwds_stats, None)
  
  wlen_stats = compute_word_length_stats(data['words'])
  write_num_seq_stats(out, 0, 0,'overall word lengths', wlen_stats, None)
  
  for iw in range(max_nw):
    wlen_beg_stats = compute_word_length_stats(data['words_beg'][iw])
    wlen_beg_title = f'word lengths at position {iw+1} from line start'
    write_num_seq_stats(out, 1, iw+1, wlen_beg_title, wlen_beg_stats, wlen_stats)
  
    wlen_end_stats = compute_word_length_stats(data['words_end'][iw])
    wlen_end_title = f'word lengths at position {iw+1} from line end'
    write_num_seq_stats(out, 2, -(iw+1), wlen_end_title, wlen_end_stats, wlen_stats)

  return
  # ----------------------------------------------------------------------
  
def compute_word_length_stats(words):
  # Given a list of words, computes statistics of lengths.
  lengths = [ len(wd) for wd in words ]
  stats = compute_num_seq_stats(lengths)
  return stats
  # ----------------------------------------------------------------------
  
def compute_num_seq_stats(vals):
  val_num = len(vals)
  if val_num == 0:
    val_avg = nan; val_dev = nan
    val_min = +inf; val_max = 0
  else:
    val_min = vals[0]; val_max = vals[0]
    val_sum = 0
    for val in vals:
      val_min = min(val_min, val)
      val_max = max(val_max, val)
      val_sum += val
    val_avg = val_sum/val_num
    val_sum_d2 = 0
    for val in vals:
      d = val - val_avg
      val_sum_d2 += d*d
    val_dev = nan if val_num == 1 else sqrt(val_sum_d2/(val_num - 1))
  return \
    { 'num': val_num, 
      'min': val_min, 
      'max': val_max, 
      'avg': val_avg, 
      'dev': val_dev
    }
  # ----------------------------------------------------------------------
  
def write_num_seq_stats(wr, plot, kw, title, abs_stats, ref_stats):
  wr.write("------------------------------------------------------------------------\n")
  wr.write(f"statistics of {title}\n")
  abs_num = abs_stats['num']
  wr.write(f"num =     {abs_num:8d}\n")
  abs_avg = nan
  abs_dev = nan
  anom = nan
  if abs_num > 0:
    wr.write(f"min =     {abs_stats['min']:8d}\n")
    wr.write(f"max =     {abs_stats['max']:8d}\n")
    abs_avg = abs_stats['avg']
    wr.write(f"avg =       {abs_avg:11.2f}")
    if ref_stats != None and abs_num >= 2:
      ref_avg = ref_stats['avg']
      wr.write(f"  ref = {ref_avg:11.2f}")
      abs_dev = abs_stats['dev']
      # Use {abs_dev} as fudge for both:
      anom = log(hypot(abs_avg, abs_dev)/hypot(ref_avg, abs_dev))
      wr.write(f"  anomaly = {anom:+6.3f}")
    wr.write("\n")
  if abs_num >= 2 and isfinite(abs_avg):
    if plot > 0: 
      wr.write(f"PLOT {plot} {kw} {abs_avg:11.2f} {anom:+6.3f}\n")
    elif plot == 0: 
      wr.write(f"PLOT {plot} -100 {abs_avg:11.2f} {anom:+6.3f}\n")
      wr.write(f"PLOT {plot}    0 {abs_avg:11.2f} {anom:+6.3f}\n")
      wr.write(f"PLOT {plot} +100 {abs_avg:11.2f} {anom:+6.3f}\n")
    abs_dev = abs_stats['dev']
    wr.write(f"dev =       {abs_dev:11.2f}\n")
  wr.write("------------------------------------------------------------------------\n")
  return
  # ----------------------------------------------------------------------

def tally_word(word, data, key, kw):
  if kw >= 0:
    while kw >= len(data[key]):
      data[key].append([])
    wlist = data[key][kw]
  else:
    wlist = data[key]
  wlist.append(word)
  return
  # ----------------------------------------------------------------------

main()
