# coding: utf-8
import re
from spacy.symbols import POS, ADJ, ADP, ADV, AUX, CCONJ, INTJ, NOUN, NUM, PART, PRON, SCONJ, VERB, X, DET, PROPN, PUNCT, SYM
from bisect import bisect_left
import json

direc=__file__.split('\\')
direc=direc[:-1]
direc='\\'.join(direc)
direc+='\\'

lkp_tables={}
for pos_tag in ['ADJ', 'ADP', 'ADV', 'AUX', 'NOUN', 'NUM', 'PART', 'PRON', 'VERB', 'X']:
    file=open('{0}_{1}_lookup.JSON'.format(direc,pos_tag), 'r', encoding='utf-8')
    list_list=json.load(file)
    tuple_list=[tuple(p) for p in list_list]
    lkp_tables[pos_tag]=tuple_list
    file.close()



def bin_lemma_search(a, x):
    # binary search to speed-up lookup
    i = bisect_left(a, (x,''))
    if i != len(a) and a[i][0] == x:
        return a[i][1]
        # returns the second part of the pair, i.e. the lemma
    return None

class PolishLemmatizer(object):
    # This lemmatizer implements lookup lemmatization based on
    # the Morfeusz dictionary (morfeusz.sgjp.pl/en) by Institute of Computer Science PAS
    # It utilizes binary search, and some prefix based improvements for
    # verb and adjectives lemmatization, as well as case-sensitive
    # lemmatization for nouns
    @classmethod
    def load(cls, path, index=None, exc=None, rules=None, lookup=None):
        return cls(index, exc, rules, lookup)

    def __init__(self, index=None, exceptions=None, rules=None, lookup=None):
        # this lemmatizer is lookup based, so it does not require an index, exceptionlist, or rules
        # the lookup tables are imported from JSON files 
        self.INT_TO_POS={POS:'POS',
            ADJ:'ADJ',
            ADP:'ADP',
            ADV:'ADV',
            AUX:'AUX',
            CCONJ:'CCONJ',
            DET:'DET',
            INTJ:'INTJ',
            NOUN:'NOUN',
            NUM:'NUM',
            PART:'PART',
            PRON:'PRON',
            PROPN:'PROPN',
            PUNCT:'PUNCT',
            SCONJ:'SCONJ',
            VERB:'VERB',
            X:'X'
          }
        self.lemma_lookups = {}
        for tag in ['ADJ', 'ADP', 'ADV', 'AUX', 'NOUN', 'NUM', 'PART', 'PRON', 'VERB', 'X']:
            self.lemma_lookups[tag]=lkp_tables[tag]
        additional_tags={
                # additional tags outside of the tagmaps range
                 'CCONJ': [],
                 'INTJ': [],
                 'SCONJ': [],
                 'DET' : lkp_tables['X'],
                 'PROPN' : lkp_tables['NOUN'],
                 'PUNCT' : [],
                 'SYM' : []
                 }
        self.lemma_lookups.update(additional_tags)

        
        
    def lemmatize_adj(self, string, morphology):
        # this method utilizes different procedures for adjectives
        # with 'nie' and 'naj' prefixes
        lemmas=[]
        lemma_dict=self.lemma_lookups['ADJ']
        if string[:3]=='nie':
            search_string=string[3:]

            if search_string[:3]=='naj':
                naj_search_string=search_string[3:]
                lemma=bin_lemma_search(lemma_dict, naj_search_string)
                if lemma:
                    lemmas.append(lemma)
                    return lemmas
                
            lemma=bin_lemma_search(lemma_dict, search_string)
            if lemma:
                lemmas.append(lemma)
                return lemmas

        if string[:3]=='naj':
            naj_search_string=string[3:]
            lemma=bin_lemma_search(lemma_dict, naj_search_string)
            if lemma:
                lemmas.append(lemma)
                return lemmas
            
        lemma=bin_lemma_search(lemma_dict, string)
        if lemma:
            lemmas.append(lemma)
            return lemmas
        else:
            lemmas = [string]
            return lemmas
            
    def lemmatize_verb(self, string, morphology):
        # this method utilizes a differen procedures for verbs
        # with 'nie' prefix
        lemmas=[]
        lemma_dict=self.lemma_lookups['VERB']
        
        if string[:3]=='nie':
            search_string=string[3:]
            lemma=bin_lemma_search(lemma_dict, search_string)
            if lemma:
                lemmas.append(lemma)
                return lemmas
            
        lemma=bin_lemma_search(lemma_dict, string)
        if lemma:
            lemmas.append(lemma)
            return lemmas
        else:
            lemmas = [string]
            return lemmas
        
    def lemmatize_noun(self, string, morphology):
        # this method is case-sensitive, in order to work
        # for incorrectly tagged proper names
        lemmas=[]
        lemma_dict=self.lemma_lookups['NOUN']
        if string!=string.lower():
            lemma=bin_lemma_search(lemma_dict, string.lower())
            if lemma:
                lemmas.append(lemma)
                return lemmas
            else:
                lemma=bin_lemma_search(lemma_dict, string)
                if lemma:
                    lemmas.append(lemma)
                else:
                    lemmas.append(string.lower())
                return lemmas
        else:
            lemma=bin_lemma_search(lemma_dict, string)
            if not lemma:
                lemma=string
            lemmas.append(lemma)
            return lemmas
        
    def __call__(self, string, univ_pos, morphology=None):
        if type(univ_pos) == int:
            try:
                univ_pos = self.INT_TO_POS[univ_pos]
            except KeyError:
                univ_pos = self.INT_TO_POS[X]
                
        univ_pos = univ_pos.upper()
        
        if univ_pos == 'NOUN':
            return self.lemmatize_noun(string, morphology)
        
        if univ_pos != 'PROPN':
            string=string.lower()
            
        if univ_pos == 'ADJ':
            return self.lemmatize_adj(string, morphology)
        if univ_pos == 'VERB':
            return self.lemmatize_verb(string, morphology)

        lemmas = []
        lemma_dict = self.lemma_lookups[univ_pos]
        lemma=bin_lemma_search(lemma_dict, string)
        if not lemma:
            lemma=string.lower()
        lemmas.append(lemma)
        return lemmas

    def lookup(self, string):
        return string.lower()


def lemmatize(string, index, exceptions, trie):
    print('This message should not appear, this lemmatizer should not use the function "lemmatize"')
