#! /usr/bin/python3
# Last edited on 2026-01-29 15:15:48 by stolfi

# To be included in python3 programs.

import sys, os, re
from sys import stdout as out, stderr as err, stdin as inp
from process_funcs import bash, basic_line_loop

def read_chinese_char_set(file_name):
  # Reads from file {file_name} a set of Chinese characters in UTF-8.
  # Ignores blank lines and #-comments. 
  # Otherwise ssumes that each data line has a single Chinese character 
  # in UTF-8 encoding.  Returns a set of strings that are those characters.

  nitem=0
  nread=0

  def set_line_error(msg):
    nonlocal file_name, nread
    err.write(f"{file_name}:{nread}: ** {msg}\n")
    sys.exit(1)
    # ----------------------------------------------------------------------

  err.write(f"reading {file_name} ...\n")
  rd = open(file_name, "r")
  rd.reconfigure(encoding='utf-8')
  S = set()
  while True:
    line = rd.readline()
    if line == "":
      # End of file:
      nitem += 1
      break
    nread += 1
    if re.match(r"^[ ]*([#]|$)", line): continue
    line = re.sub("[#].*$", "", line)
    line = re.sub("[ \n]", "", line)
    if len(line) != 1: 
      set_line_error(f"charset line '{line}' has {len(line)} chars")
    S.add(line)
  rd.close()
  err.write(f"loaded {nitem} chars from {nread} lines\n")
  if nread == 0: set_line_error(f"file {file_name} is empty")
  if nitem == 0: set_line_error(f"file {file_name} contains no chars")
  return S
  # ----------------------------------------------------------------------  
