    # Copyright (C) 2013 Agnieszka Patejuk

    # This program is free software: you can redistribute it and/or modify
    # it under the terms of the GNU General Public License as published by
    # the Free Software Foundation, either version 3 of the License, or
    # (at your option) any later version.

    # This program is distributed in the hope that it will be useful,
    # but WITHOUT ANY WARRANTY; without even the implied warranty of
    # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    # GNU General Public License for more details.

    # You should have received a copy of the GNU General Public License
    # along with this program.  If not, see <http://www.gnu.org/licenses/>.

# ATTENTION: there seems to be a problem with words such as TAKI, TEN (loads of entries)

import re
from morfeusz_nkjp import *

gfjpdef = open("dicts2xle/gfjp_slowwyj.pl", "r")

def entryfind(entry):
    return entry.split("(")[1].split(")")[0]

def makeChangeDict(file):
    chdct = {}
    for line in file:
        if re.search("slowwyj", line):
            # some good entries have comments at the end
            if not line[0] == "%":
                line = line.decode("utf-8")
                newentry = entryfind(line).split(",")
                # some lemmas contain quotes
                lemma = newentry[0].strip().strip("'")
                oldtag = newentry[1].strip()
                newtag = newentry[2].strip()
                if not chdct.has_key(lemma):
                        chdct[lemma] = [(oldtag, newtag)]
                else:
                        if (oldtag, newtag) not in chdct[lemma]:
                                chdct[lemma].append((oldtag, newtag))
    return chdct

chdct = makeChangeDict(gfjpdef)

def tagtrans(analyses):
    newanalyses = []
    for analysis in analyses:
        newanalysis = []
        for token in analysis:
            orth = token[0]
            if token[1]:
                lemma = token[1]
                if chdct.has_key(lemma):
                    tags = token[2].split("|")
                    for i in range(len(chdct[lemma])):
                        tagnewanalyses = []
                        for tag in tags:
                            badtag = chdct[lemma][i][0]
                            newtag = chdct[lemma][i][1]
                            if re.search(":T", badtag):
                                badtagpos = badtag[:badtag.find(":")]
                                tagpos = tag[:tag.find(":")]
                                if re.match(badtagpos, tagpos):
                                    orgmorphtag = tag[tag.find(":"):]
                                    newpos = newtag[:newtag.find(":")]
                                    tagnewanalyses.append((orth, lemma, newpos+orgmorphtag))
                            if badtag == tag:
                                tagnewanalyses.append((orth, lemma, newtag))
                        if len(tagnewanalyses) > 0:
                            newtags = []
                            for tagnewanalysis in tagnewanalyses:
                                newtags.append(tagnewanalysis[2])
                            newanalysis.append((orth, lemma, "|".join(newtags)))
                        else:
                            newanalysis.append(token)
                    if len(newanalysis) == 0:
                        newanalysis.append(token)
                else:
                    newanalysis.append(token)
            else:
                return analyses
        if not len(newanalysis) == 0:
            if not newanalysis in newanalyses:
                if not re.search("zsyp", newanalysis[0][2]):
                    newanalyses.append(newanalysis)
    return newanalyses

def cartesian_product(lists, previous_elements = []):
    if len(lists) == 1:
        for elem in lists[0]:
            yield previous_elements + [elem, ]
    else:
        for elem in lists[0]:
            for x in cartesian_product(lists[1:], previous_elements + [elem, ]):
                yield x

def expand_analyses(analyses):
    newanalyses = []
    for analysis in analyses:
        toshuffle = []
        for segment in analysis:
            localexp = []
            orth = segment[0]
            base = segment[1]
            tags = list(expand_tags(segment[2]))
            for tag in tags:
                an = (orth, base, tag)
                localexp.append(an)
            toshuffle.append(localexp)
        for el in list(cartesian_product(toshuffle)):
            newanalyses.append(el)
    return newanalyses

def tagtranalyse(word):
    return expand_analyses(tagtrans(analyse(word, False, False, False)))
