# Possible template to start TP5

"""Compute the entropy of different models for text
            
Usage: compress [-m <model>] [-f <file>] [-o <order>]

Options:
-h --help      Show the description of the program
-f <file> --filename <file>  filename of the text to compress [default: Dostoevsky.txt]
-o <order> --order <order>  order of the model
-m <model> --model <model>  model for compression [default: IIDModel]
"""

import argparse, re
import numpy as np
import math
from collections import Counter 

from docopt import docopt

class IIDModel:
    """An interface for the text model"""
    def __init__(self, order=2):
        print("Creation of the model")
        self.order = order
        # ...
        
    def process(self,text):
        # ...

    def getEntropy(self, text):
        # ...

    def getCrossEntropy(self, text):
        # ...


class MarkovModel:
    """An interface for the text model"""
    def __init__(self, order=2):
        print("Creation of the model")
        self.order = order
        # ...

    def process(self, text):
        # ...

    def getEntropy(self,text):
        # ...


def preprocess(text):
    text = re.sub("\s\s+", " ", text)
    text = re.sub("\n", " ", text)
    return text

# Experiencing encoding issues due to UTF8 (on possibly other texts)? Consider:
#  f.read().decode('utf8')
#  blabla.join(u'dgfg')
#              ^


if __name__ == '__main__':

    # Retrieve the arguments from the command-line
    args = docopt(__doc__)
    print(args)

    # Read and preprocess the text
    src_text = preprocess(open(args["--filename"]).read())
    target_text = preprocess(open("Goethe.txt").read())

    # Create the model
    if(args["--model"]=="IIDModel"):
        model = IIDModel(int(args["--order"]))
    elif(args["--model"]=="MarkovModel"):
        model = MarkovModel(int(args["--order"]))

    model.process(src_text)
    print(model.getEntropy(src_text))
    print(model.getCrossEntropy(target_text))