#!/usr/bin/env python

""" Runs CTM/BIC for the given parameters

CTM is the algorithm that implements efficiently the Bayesian Information Criterion estimator

usage: ctm.py [-h] [-c PENALTY] -d MAX_DEPTH [-A ALPHABET] -s SAMPLE_PATH [-k KEEP] [--split SPLIT] [--check_consistency CHECK_CONSISTENCY] [--inspect INSPECT]
              [--perl_compatible PERL_COMPATIBLE] [--df {ct06,perl,g4l}] [--num_cores NUM_CORES] [-l LOG_FILE] [-i {quiet,debug,info,warning,error}]
              output

Estimates context tree

positional arguments:
  output                output tree file (ex. my_model.tree)

optional arguments:
  -h, --help            show this help message and exit
  -c PENALTY, --penalty PENALTY
                        Penalty constant
  -d MAX_DEPTH, --max_depth MAX_DEPTH
                        Max tree depth
  -A ALPHABET, --alphabet ALPHABET
                        Symbols of the alphabet. Ex. '0 1 2 3 4'
  -s SAMPLE_PATH, --sample_path SAMPLE_PATH
                        Sample path
  -k KEEP, --keep KEEP  Set 1 if you want to keep the full nodes details
  --split SPLIT         Split sample character
  --check_consistency CHECK_CONSISTENCY
                        Check consistency
  --inspect INSPECT     Inspect tree
  --perl_compatible PERL_COMPATIBLE
                        Keeps compatibility with original version in perl (def. False)
  --df {ct06,perl,g4l}  Penalization strategy
  --num_cores NUM_CORES
                        Number of processors for parallel processing
  -l LOG_FILE, --log_file LOG_FILE
                        Log file path
  -i {quiet,debug,info,warning,error}, --log_level {quiet,debug,info,warning,error}
                        Log level


Example:

python ctm.py -s fixtures/sample20000.txt -c 0.5 -d 6  ../output.tree


"""


import logging
from g4l.bic import BIC
from g4l.sample import Sample
from g4l.util.command_line_methods import ctm_argparser, set_log
from g4l.util.command_line_methods import keep, save_file, check_consistency


def run_ctm(X, args):
    logging.info("Estimating BIC tree:")
    # Instantiates BIC object with received parameters
    bic = BIC(args.penalty,
              df_method=args.df,
              keep_data=keep(args),
              perl_compatible=args.perl_compatible)
    bic.fit(X)
    tree = bic.context_tree
    logging.info("Tree found:")
    logging.info(tree.to_str(reverse=True))
    save_file(tree, args)
    check_consistency(tree, args)


if __name__ == '__main__':
    args = ctm_argparser()
    set_log(args.log_file, args.log_level)
    A = args.alphabet
    if A:
        A = A.split(' ')
    sample = Sample(args.sample_path.name, A, args.max_depth,
                    perl_compatible=args.perl_compatible,
                    subsamples_separator=args.split)
    run_ctm(sample, args)