#!/usr/bin/python
# -*- coding: utf-8 -*- 

    # Copyright (C) 2010–2015 Agnieszka Patejuk

    # This program is free software: you can redistribute it and/or modify
    # it under the terms of the GNU General Public License as published by
    # the Free Software Foundation, either version 3 of the License, or
    # (at your option) any later version.

    # This program is distributed in the hope that it will be useful,
    # but WITHOUT ANY WARRANTY; without even the implied warranty of
    # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    # GNU General Public License for more details.

    # You should have received a copy of the GNU General Public License
    # along with this program.  If not, see <http://www.gnu.org/licenses/>.

import sys
import os
# NOTE: for reducing frames
from itertools import *

debug = []
fixed = []

real_defs = 'realizations_20140608.txt'

control_pairs = [('controller', 'controllee'), ('controller2', 'controllee2')]

##########################################################

# BASIC FUNCTIONS

def onlyNonempty(clist):
    return [c for c in clist if c]

def cpxlemma(lemma):
    return "_".join(lemma.split())

def frameAsp(aspect):
    if aspect != '_':
        return '(^ TNS-ASP ASPECT)=c '+aspect

def frameNeg(negation):
    if negation == 'aff':
        return '~(^ NEG)'
    if negation == 'neg':
        return '(^ NEG)=c +'

def getFrameParts(line):
    fields = line.split(':')
    lemma = fields[0].strip()
    frame = fields[-1].strip()
    if len(fields) == 3:
        aspect = fields[1].strip()
        negation = False
    if len(fields) == 4:
        negation = fields[1].strip()
        aspect = fields[2].strip()
    return [lemma, frame, aspect, negation]

def getArgtypeParams(arg):
    parts = arg.split('(')
    argtype = parts[0]
    params = parts[-1].split(')')[0].split(',')
    return (argtype, params)

def checkREFL(gfargs, lemma):
    if lemma.find('_się') != -1:
        # NOTE: inherent SIĘ
        return True
    return False

# CONVERSION: getting GF

def getGF(arg, ctrl):
    argparts = getArgtypeParams(arg)
    argtype = argparts[0]
    params = argparts[-1]
    if argtype == 'or':
        return 'GFTODO'
    # ATTENTION: does not take coordination into account (np or ncp --> OBJ rather than COMP)
    if argtype in ['np', 'ncp', 'lexnp']:
        case = params[0]
        # NOTE: possibly XCOMP-PRED, if arg = controllee
        if ctrl:
            if 'controllee' in ctrl:
                return 'xcomp-pred'
        if case == 'dat':
            return 'obj-th'
        if case in ['acc', 'str', 'part']:
            return 'obl-str'
        if case in ['gen', 'inst']:
            return 'obl-'+case
    if argtype in ['prepnp', 'prepncp', 'prepadjp', 'preplexnp', 'comprepnp']:
        if argtype == 'prepadjp':
            return 'xcomp-pred'
        return 'obl'
    if argtype == 'cp':
        return 'comp'
    if argtype == 'infp':
        return 'xcomp'
    if argtype == 'adjp':
        if ctrl:
            if 'controllee' in ctrl:
                # NOTE: not always, checking for control
                return 'xcomp-pred'
    if argtype == 'refl':
        return 'marker'
    if argtype == 'nonch':
        # NOTE: elsewhere (unless maked as SUBJ or OBJ)
        return 'obl-str'
    if argtype == 'advp':
        return 'obl-adv'
    if argtype == 'xp':
        semtype = params[0]
        return 'obl-'+semtype
    if argtype == 'fixed':
        return 'obl-adv'
    return 'GFWEIRD'

unlikegfdict = {}

def getGFmore(argslist, ctrl):
    argtypes = [getGF(arg, ctrl) for arg in argslist]
    gfset = set(argtypes) 
    if len(gfset) == 1:
        return argtypes[0]
    # NOTE: unlike GFs
    else:
        gfset = sorted(list(gfset))
        # NOTE: building frequency dict of unlike GFs
        if not unlikegfdict.has_key(str(gfset)):
            unlikegfdict[str(gfset)] = 1
        else:
            unlikegfdict[str(gfset)] += 1
        # NOTE: choosing GF; GFTODO is removed, remaining GFs are taken into consideration
        gfset = [gf for gf in gfset if gf != 'GFTODO']
        if len(set(['xcomp', 'xcomp-pred', 'obj-th']).intersection(set(gfset))) == 0:
            return makeGFrank(gfset) 
        else:
            # NOTE: debugging info, take a look at these frames
            debug.append(current_frame+' '+'check frame: no common GF! '+str(gfset))
        return 'coord('+str(len(gfset))+'):TODO! '+str(argtypes)

# ATTENTION: added 'GFWEIRD' with rank 100 (highest priority)
def makeGFrank(gflist):
    rankdict = {}
    gfrank = {'obl-abl': 5, 'obl-adl': 5, 'obl-dur': 5, 'obl-locat': 5, 'obl-mod': 5, 'obl-perl': 5, 'obl-temp': 5, 'obl-adv': 5, 'obl': 4, 'obl-inst': 3, 'obl-str': 3, 'obl-gen': 3, 'comp': 2, 'GFWEIRD': 100}
    for gf in gflist:
        gfr = gfrank[gf]
        if rankdict.has_key(gfr):
            rankdict[gfr].append(gf)
        else:
            rankdict[gfr] = [gf]
    topgf = max([key for key in rankdict])
    if len(rankdict[topgf]) > 1:
        # NOTE: debugging info, take a look at these frames
        debug.append(current_frame+' '+'check frame: equally ranked GFs! '+str(gflist))
        return 'check frame: equally ranked GFs! '+str(gflist)
    else:
        return rankdict[topgf][0]

def getArgs(args):
    outargs = []
    gflist = []
    for arg in args:
        gf = False
        ctrl = False
        opening = arg.find('{') 
        if opening != -1:
            head = arg[:opening]
            body = arg[opening+1:-1]
            if len(head) > 0:
                extras = head.split(',')
                ctrl = []
                for extra in extras:
                    if extra in ['subj', 'obj']:
                        gf = extra
                    if extra.find('control') != -1:
                        ctrl.append(extra)
                if len(ctrl) == 0:
                    ctrl = False
        else:
            body = arg
        argslist = [subarg.strip() for subarg in body.split(';')]
        if not gf:
            if len(argslist) > 1:
                gf = getGFmore(argslist, ctrl)
            else:
                gf = getGF(argslist[0], ctrl)        
        if gf in gflist:
            counter = 2
            while gf+str(counter) in gflist:
                counter += 1
            gf = gf+str(counter)
        gflist.append(gf)
        outargs.append((gf.upper(), argslist, ctrl))
    return outargs

# CONVERSION: getting semantic form (PRED)

def semNosemArg(lemma, gfargs):
    control = {}
    for (gf, argslist, ctrl) in gfargs:
        if ctrl:
            for cr in ctrl:
                if control.has_key(cr):
                    control[cr].append((gf, argslist))
                else:
                    control[cr] = [(gf, argslist)]
    sem = []
    nosem = []
    for (gf, argslist, ctrl) in gfargs:
        if gf != 'MARKER':
            if ctrl:                
                for p in control_pairs:
                    controller = p[0]
                    controllees = p[1]
                    if controller in ctrl:
                        if gf not in sem and gf not in nosem:
                            if control.has_key(controllees):
                                # NOTE: some heuristics, checking only first elements of relevant lists (there may be 1< controllee)
                                cats_ee = control[controllees][0][1]
                                if 'infp' in [c.split('(')[0] for c in cats_ee]:
                                    if 'E' in argslist:
                                        nosem.append(gf)
                                    else:
                                        sem.append(gf)
                                # NOTE: control with predicatives
                                else:
                                    nosem.append(gf)
                    else:
                        if controllees in ctrl:
                            sem.append(gf)
            else:
                sem.append(gf)
    return makePRED(lemma, sem, nosem)

def makePRED(lemma, semargs, nonsemargs):
    pred = "(^ PRED)='"+lemma
    sem = ""
    for sa in semargs:
        if sa == 'NULL':
            sem += sa+' '
        else:
            sem += "(^ "+sa+")"
    nonsem = ""
    for nsa in nonsemargs:
        nonsem += "(^ "+nsa+")"
    if (len(sem) > 0) or (len(sem) > 0):
        pred += "<"+sem+">"+nonsem
    pred += "'"
    return pred

# CONVERSION: handling control

def makeCTRLdict(gfargs):
    control = {}
    for (gf, argslist, ctrl) in gfargs:
        if ctrl:
            # NOTE: removing control with xp(mod)
            if not 'xp(mod)' in argslist:
                for cr in ctrl:
                    if control.has_key(cr):
                        control[cr].append(gf)
                    else:
                        control[cr] = [gf]
    return control

def makeControl(gfargs):
    control = makeCTRLdict(gfargs)
    if len(control) > 1:
        relations = []
        for p in control_pairs:
            controller = p[0]
            controllees = p[1]
            if control.has_key(controller):
                # NOTE: to allow controllers without a matching controllee (as a result of reduction)
                if control.has_key(controllees):
                    for controllee in control[controllees]:
                        relations.append("(^ "+control[controller][0]+")= "+"(^ "+controllee+" SUBJ)")
        if len(relations) > 0:
            return '\n'.join(relations)

# CONVERSION: handling passives

def checkPassive(gfargs):
    for (gf, argslist, ctrl) in gfargs:
        if gf == 'OBJ':
            return True

def makePass(gfargs, type):
    newframe = []
    for (gf, argslist, ctrl) in gfargs:
        changed = changePass(gf, argslist, type)
        if len(changed[1]) == 0:
            return None
        else:
            newframe.append((changed[0], changed[1], ctrl))
    return newframe

def changePass(gf, argslist, type):
    changed_args = []
    if gf == 'SUBJ':
        gf = type.upper()
        # NOTE: changing categories, accepting only selected ones
        if gf == 'OBL-AG':
            for arg in argslist:
                argparts = getArgtypeParams(arg)
                argtype = argparts[0]
                params = argparts[-1]
                if arg == 'np(str)':
                    changed_args.append('prepnp(przez,acc)')
                if argtype == 'ncp':
                    if params[0] == 'str':
                        changed_args.append('prepncp(przez,acc,'+params[1]+')')
                if argtype == 'lexnp':
                    if params[0] == 'str':
                        changed_args.append('preplexnp(przez,acc,'+','.join(params[1:])+')')
            argslist = changed_args
    if gf == 'OBJ':
        gf = 'SUBJ'
    return (gf, argslist)

# CONVERSION: getting realisation definitions of xp, etc.

def getArgRealDefs(file):
    definitions = open(file,'r')
    dict_real = {}
    defined_types = []
    for line in definitions:
        if len(line.strip()) > 0:
            if line.find('-->') != -1:
                defined_types.append(line.split('-->')[0])
            else:
                last_type = defined_types[-1]
                rdef = line.split('[')[0].strip()
                if dict_real.has_key(last_type):
                    dict_real[last_type].append(rdef)
                else:
                    dict_real[last_type] = [rdef]
    return dict_real

realisations = getArgRealDefs(real_defs)

# CONVERSION: getting constraints

# NOTE: % is used for paths with variables
def makeConstr(type, path, attr, val=False, eq=False):
    if type == "std":
        if path[0] != '%':
            path = '^ '+path
    if type == "off":
        if path[0] != '%':
            tailpath = " ".join(path.split()[1:])
            path = '<- '+tailpath
    if path[-1] != ' ':
        path += ' '
    if val and eq:
        return "("+path+attr+")"+eq+" "+val
    else:
        return "("+path+attr+")"

# NOTE: handles complex CASE attribute (to use it, change default cpx to True)
def makeCase(type, path, val, eq, cpx=False):
    attr = 'CASE'
    if cpx:
        return makeConstr(type, path+' '+attr, val, '+', eq)
    return makeConstr(type, path, attr, val, eq)

def makeCorr(gf, type, corr):
    if corr == True:
        return makeConstr(type, gf, 'CORRELATIVE', '+', '=c')
    else:
        return makeConstr(type, gf, 'CORRELATIVE', '+', '~=')

def makeADVP(gf, semtype, type):
    if semtype == 'pron':
        # NOTE: no instances of coordination --> std
        return '(^ '+gf+' PRED FN) $c {tak jak}'
    if semtype == 'misc':
        # NOTE: misc: adverbs of degree and evaluation, derived from adjectives, so specified for DEGREE
        # NOTE: existential constraint (^ GF DEGREE), so only 3 parameters passed to makeConstr
        return makeConstr(type, gf, 'DEGREE')
    if semtype == 'mod':
        return makeConstr(type, gf, 'CHECK _CAT', 'adv', '=c')
    else:
        # NOTE: constraints depending on semtype (advp(<SEM>) from the realisation list)
        xp_reals = realisations['advp('+semtype+')']
        constr = " | ".join([makeConstr('std', gf, 'PRED FN', real, '=c') for real in xp_reals])
        return constr

def makeXP(gf, semtype, ctrl, type):
    xp_reals = realisations['xp('+semtype+')']
    constr = " | ".join([makeSingleConstraint(gf,real,ctrl,'std') for real in xp_reals])
    return '{'+constr+'}'

# NOTE: no instances of coordination --> std
def makeNONCH(gf, ctrl):
    case = makeNP(gf, 'str', 'std', ctrl)
    # NOTE: only selected pronouns (CO, COŚ, NIC, TO)
    semconstr = '(^ '+gf+' PRED FN) $c {co coś nic to}'
    if gf == 'SUBJ':
        # NOTE: these cannot be numerals, like correlative pronouns
        case = makeSUBJstr('std', True)
    return case+' '+makeConstr('std', gf, 'NTYPE NSYN', 'pronoun', '=c')+' '+semconstr

def makeCOMPP(gf, pform, type):
    return makeConstr(type, gf, 'PFORM', cpxlemma(pform), '=c')

def makeREFL():
    # NOTE: uses RFX template (calls RQR and REFLH-LEX)
    return "@(RFX R)"

def makeADJP(gf, case, type):
    if case == 'pred':
        constr = "(^ "+gf+" CASE)=c (^ "+gf+" SUBJ CASE)"
    if case == 'str':
        constr = makeOBJstr(gf, type)
    if case == 'inst':
        constr = makeCase(type, gf, case, '=c')
    # NOTE: allowing other adjectival forms: ppas, pact
    return constr+' '+makeConstr(type, gf, 'CHECK _CAT', '{adj ppas pact}', ' $c')

def makeNP(gf, case, type, ctrl, corr=False):
    if gf == 'SUBJ':
        # NOTE: allows any case (not just str) because of passivisation
        constr = makeSUBJstr(type, corr)
    else:
        constr = makeCase(type, gf, case, '=c')
        if case == 'str':
            constr = makeOBJstr(gf, type)
        if case == 'part':
            constr = makeOBJpart(gf, type)
        # NOTE: optional implicit controllers (not SUBJ: handled by syntax)
        if ctrl:
            if ('controller' in ctrl) or ('controller2' in ctrl):
                pro_gf = makeConstr('std', gf, 'PRED', "'pro'", '=')
                assign_case = makeCase(type, gf, case, '=')
                if case in ['str', 'part']:
                    assign_case = '{'+makeCase(type, gf, 'acc', '=')+' | '+makeCase(type, gf, 'gen', '=')+'}'
                constr += ' {'+pro_gf+' '+assign_case+'}'
    return constr+' '+makeCorr(gf, type, corr)

# NOTE: CP types from Walenty: aż, czy, gdy, int, jak, jakby, jakoby, jeśli, kiedy, że, żeby, żeby2
# NOTE: CP types from the list of realisations: aż2 (?), dopóki, gdzie (int), którędy (int), skąd (int), zanim
def makeCP(gf, cform, type):
    # NOTE: added from the list of realisations: gdzie, którędy, skąd
    if cform in ['int', 'czy', 'jak', 'kiedy', 'gdzie', 'którędy', 'skąd']:
        intconstr = makeConstr(type, gf, 'CLAUSE-TYPE', 'int', '=c')
        if cform == 'czy':
            intconstr += ' '
            if type == 'std':
                intconstr += makeConstr(type, 'GF+', 'TYPE', 'int', '~=')
            if type == 'off':
                # NOTE: only the interrogative marker (alternatively, add Y/N marking in CZY)
                intconstr += '(('+gf+' <-) GF+ TYPE)~= int'
        if cform in ['jak', 'kiedy', 'gdzie', 'którędy', 'skąd']:
            # NOTE: to avoid using %K variable (variables don't seem to work in off-path constraints)
            intconstr += ' '+makeConstr(type, gf+' ADJUNCT $', 'TYPE', 'int', '=c')+' '+makeConstr(type, gf+' ADJUNCT $', 'PRED FN', cform, '=c')
        return intconstr
    # NOTE: ŻEBY2 can be realised as ŻE (always) or as ŻEBY with negation in the main clause (TODO? rhetorical questions)
    if cform == 'żeby2':
        if type == 'std':
            return '{(^ NEG)=c + '+makeConstr(type, gf, 'COMP-FORM', 'żeby', '=c')+' | '+makeConstr(type, gf, 'COMP-FORM', 'że', '=c')+'}'
        if type == 'off':
            gf = gf.split()[0]
            return '{(('+gf+' <-) NEG)=c + '+makeConstr(type, gf, 'COMP-FORM', 'żeby', '=c')+' | '+makeConstr(type, gf, 'COMP-FORM', 'że', '=c')+'}'
    # NOTE: GDY is realised as GDYBY under conditional mood in the main clause and as GDY elsewhere
    if cform == 'gdy':
        if type == 'std':
            return '{(^ TNS-ASP MOOD)=c conditional '+makeConstr(type, gf, 'COMP-FORM', 'gdyby', '=c')+' | (^ TNS-ASP MOOD)~= conditional '+makeConstr(type, gf, 'COMP-FORM', 'gdy', '=c')+'}'
        if type == 'off':
            gf = gf.split()[0]
            return '{(('+gf+' <-) TNS-ASP MOOD)=c conditional '+makeConstr(type, gf, 'COMP-FORM', 'gdyby', '=c')+' | (('+gf+' <-) TNS-ASP MOOD)~= conditional '+makeConstr(type, gf, 'COMP-FORM', 'gdy', '=c')+'}'
    # NOTE: complementisers from Walenty: aż, jakby, jakoby, jeśli, że, żeby
    # NOTE: complementisers from the list of realisations: aż2 (?), dopóki, zanim
    return makeConstr(type, gf, 'COMP-FORM', cform, '=c')

def makePP(gf, pform, case, type, corr=False):
    # NOTE: to handle semantic prepositions (realisations of XP)
    if gf in ['OBL-ABL', 'OBL-ADL', 'OBL-DUR', 'OBL-LOCAT', 'OBL-MOD', 'OBL-PERL', 'OBL-TEMP']:
        return makePPsem(gf, pform, case, type, corr)
    c = makeCase(type, gf, case, '=c')
    if case == 'str':
        if type == 'std':
            c = '(^ '+gf+' CASE)=(^ '+gf+' SUBJ CASE)'
        if type == 'off':
            c = '(<- CASE)=(<- SUBJ CASE)'
    return setPFORM(type, gf, 'PFORM', pform)+' '+c+' '+makeCorr(gf, type, corr)

def makePPsem(gf, pform, case, type, corr=False):
    prep_atr = 'PRED FN'
    noun_path = gf+' OBJ'
    c = makeCase(type, noun_path, case, '=c')
    if case == 'str':
        if type == 'std':
            c = '(^ '+noun_path+' CASE)=(^ '+noun_path+' SUBJ CASE)'
        if type == 'off':
            c = '(<- OBJ CASE)=(<- OBJ SUBJ CASE)'
    return setPFORM(type, gf, prep_atr, pform)+' '+c+' '+makeCorr(noun_path, type, corr)

def setPFORM(type, gf, attr, pform):
    if pform == 'przeciw':
        return '{'+makeConstr(type, gf, attr, pform, '=c')+' | '+makeConstr(type, gf, attr, 'przeciwko', '=c')+'}'
    return makeConstr(type, gf, attr, cpxlemma(pform), '=c')

def makeINFP(gf, asp, type, ctrl):
    con_asp = None
    if asp == '_':
        # NOTE: to distinguish infp conjuncts under coordination
        if type == 'off':
            con_asp = makeConstr(type, gf, 'CHECK _CAT', 'inf', '=c')
    else:
        con_asp = makeConstr(type, gf, 'TNS-ASP ASPECT', asp, '=c')
    # NOTE: optional implicit SUBJ for predicates taking infp which is not a controllee(2)
    con_ctrl_pro = uncontrolledINFP(gf, ctrl, type)
    return " ".join(onlyNonempty([con_asp, con_ctrl_pro]))

def uncontrolledINFP(gf, ctrl, type):
    if (not ctrl) or (('controllee' not in ctrl) and ('controllee2' not in ctrl)):
        return '{'+makeConstr('std', gf, 'SUBJ PRED', "'pro'", '=')+'}'

def makeNCP(gf, case, cform, type, ctrl):
    # NOTE: corr=True --> different constraints for SUBJ! (don't allow numerals)
    return makeNP(gf, case, type, ctrl, True)+' '+makeCP(gf+' COMP', cform, type)

def makePNCP(gf, pform, case, cform, type):
    return makePP(gf, pform, case, type, True)+' '+makeCP(gf+' COMP', cform, type)

def makeLEX(gf, lexinfo, type):
    num = lexinfo[0]
    lemma = lexinfo[1][1:-1]
    mod = lexinfo[2]
    cons = makeConstr(type, gf, 'PRED FN', lemma, '=c')
    if num != '_':
        cons += ' '+makeConstr(type, gf, 'NUM', num, '=c')
    if mod in ['natr', 'ratr', 'batr']:
        cons += ' '+modLEX(gf,mod,type)
    return cons

def modLEX(gf, mod, type):
    if mod == 'natr':
        return '~'+makeConstr(type, gf, 'ADJUNCT')+' '+'~'+makeConstr(type, gf, 'POSS')
    if mod == 'ratr':
        return '{'+makeConstr(type, gf, 'ADJUNCT')+' | '+makeConstr(type, gf, 'POSS')+'}'
    if mod == 'batr':
        return makeConstr(type, gf, 'ADJUNCT $ PRED FN')+' $c {swój własny}'

def makeLEXNP(gf, case, lexinfo, type, ctrl):
    return makeNP(gf, case, type, ctrl)+' '+makeLEX(gf, lexinfo, type)

def makeLEXPP(gf, pform, case, lexinfo, type):
    return makePP(gf, pform, case, type)+' '+makeLEX(gf, lexinfo, type)

def makeEpro():
    return "(^ SUBJ NUM)=sg (^ SUBJ GEND)=n (^ SUBJ PERS)=3"

# NOTE: E-type verbs take the SUBJ of their XCOMP complement: if XCOMP has no SUBJ, E-type verb has no SUBJ (zaczęło być widać?)
def makeEnoSUBJ():
    return "~(^ XCOMP SUBJ)"

def makeFIXED(gf, mwe):
    # NOTE: no coordination with fixed --> std
    return "(^ "+gf+" PRED FN)=c "+mwe+' '+modLEX(gf,'natr', 'std')

def makeSingleConstraint(gf, arg, ctrl, type):
    argtype = arg.split('(')[0]
    params = arg.split('(')[-1].split(')')[0].split(',')
    if argtype in ['np', 'ncp', 'lexnp']:
        case = params[0]
        if argtype == 'ncp':
            compform = params[1]
            return makeNCP(gf, case, compform, type, ctrl)
        if argtype == 'lexnp':
            return makeLEXNP(gf, case, params[1:], type, ctrl)
        return makeNP(gf, case, type, ctrl)
    if argtype in ['prepnp', 'prepncp', 'prepadjp', 'preplexnp']:
        pform = params[0]
        case = params[1]
        if argtype == 'prepncp':
            compform = params[2]
            return makePNCP(gf, pform, case, compform, type)
        if argtype == 'preplexnp':
            return makeLEXPP(gf, pform, case, params[2:], type)
        return makePP(gf, pform, case, type)
    if argtype == 'cp':
        compform = params[0]
        return makeCP(gf, compform, type)
    if argtype == 'infp':
        asp = params[0]
        return makeINFP(gf, asp, type, ctrl)
    if argtype == 'adjp':
        case = params[0]
        return makeADJP(gf, case, type)
    if argtype == 'refl':
        return makeREFL()
    if argtype == 'comprepnp':
        pform = params[0]
        return makeCOMPP(gf, pform, type)
    if argtype == 'nonch':
        return makeNONCH(gf, ctrl)
    if argtype == 'advp':
        semtype = params[0]
        return makeADVP(gf, semtype, type)
    if argtype == 'xp':
        semtype = params[0]
        return makeXP(gf, semtype, ctrl, type)
    if argtype == 'E':
        if ctrl:
            # NOTE: reduced frame without SUBJ (~(^ XCOMP SUBJ)) is handled in makeFrame
            # NOTE: no constraints are imposed on SUBJ because they are taken from XCOMP SUBJ
            return None
        else:
            return makeEpro()
    if argtype == 'fixed':
        mwe = cpxlemma(params[0])
        if mwe not in fixed:
            fixed.append(mwe)
        return makeFIXED(gf, mwe)
    return 'constr:TODO! '+str(arg)

def makeConstraints(gfargs):
    allcons = []
    for (gf, argslist, ctrl) in gfargs:
        if gf != 'NULL':
            if len(argslist) > 1:
                cons_parts = [makeSingleConstraint(gf, arg, ctrl, 'off') for arg in argslist]
                con = "(^ "+gf+" PRED: {"+" | ".join(cons_parts)+"})"
            else:
                con = makeSingleConstraint(gf, argslist[0], ctrl, 'std')
            if con not in allcons:
                allcons.append(con)
    return '\n'.join(onlyNonempty(allcons))

# STRUCTURAL CASE ASSIGNMENT

# NOTE: CASE constraints according to POS: genitive for gerunds, no constraint for pact, ppas and inf, nom/acc elsewhere 
def makeSUBJstr(type, corr=False):
    gf = 'SUBJ'
    if corr:
        # NOTE: XLE does not tolerate ~$ in off-path constraints, but this uses plain constraints
        # NOTE: does *NOT* use $
        return '{'+makeConstr(type, 'CHECK', '_CAT', 'ger', '=c')+' '+makeCase(type, gf, 'gen', '=c')+' | '+makeConstr(type, 'CHECK', '_CAT', 'pact', '=c')+' | '+makeConstr(type, 'CHECK', '_CAT', 'ppas', '=c')+' | '+makeConstr(type, 'CHECK', '_CAT', 'inf', '=c')+' | '+makeConstr(type, 'CHECK', '_CAT', 'ger', '~=')+' '+makeConstr(type, 'CHECK', '_CAT', 'pact', '~=')+' '+makeConstr(type, 'CHECK', '_CAT', 'ppas', '~=')+' '+makeConstr(type, 'CHECK', '_CAT', 'inf', '~=')+' '+makeCase(type, gf, 'nom', '=c')+'}'
    else:
        # NOTE: *NOT* compatible with coordination (does *NOT* use $)
        return '{'+makeConstr(type, 'CHECK', '_CAT', 'ger', '=c')+' '+makeCase(type, gf, 'gen', '=c')+' | '+makeConstr(type, 'CHECK', '_CAT', 'pact', '=c')+' | '+makeConstr(type, 'CHECK', '_CAT', 'ppas', '=c')+' | '+makeConstr(type, 'CHECK', '_CAT', 'inf', '=c')+' | '+makeConstr(type, 'CHECK', '_CAT', 'ger', '~=')+' '+makeConstr(type, 'CHECK', '_CAT', 'pact', '~=')+' '+makeConstr(type, 'CHECK', '_CAT', 'ppas', '~=')+' '+makeConstr(type, 'CHECK', '_CAT', 'inf', '~=')+' '+'(^ SUBJ PRED: '+'{'+makeCase('off', gf, 'nom', '=c')+' | '+makeCase('off', gf, 'acc', '=c')+' (<- ACM)=c rec'+'}'+')'+'}'


def makeOBJstr(gf, type):
    if type == 'std':
        return '@(STRCO-LEX '+gf+')'
    return '{'+'(('+gf+' <-) CHECK _CAT)=c ger'+' '+makeCase('off', gf, 'gen', '=c')+' | '+'{~(({XCOMP|XCOMP-PRED}* '+gf+' <-) NEG) '+makeCase('off', gf, 'acc', '=c')+' | (({XCOMP|XCOMP-PRED}* '+gf+' <-) NEG)=c + {(('+gf+' <-) NEG)=c + '+makeCase('off', gf, 'gen', '=c')+' | ~(('+gf+' <-) NEG) (<- CASE) $c {acc gen}}}'+'}'

def makeOBJpart(gf, type):
    # NOTE: genitive partitive (added special feature) objects can be coordinated with accusative ones (dajcie wina i całą świnię)
    constr = '{'+makeCase('off', gf, 'gen', '=c')+' (<- PARTITIVE)= + | '+makeOBJstr(gf, 'off')+'}'
    if type == 'std':
        return '(^ '+gf+' PRED: '+constr+')'
    return constr

def makeEnoSUBJframe(gfargs):
    no_subj = []
    for (gf, argslist, ctrl) in gfargs:
        if gf != 'SUBJ':
            if not ctrl:
                no_subj.append((gf, argslist, ctrl))
            else:
                if 'controllee' in ctrl:
                    no_subj.append((gf, argslist, False))
    return no_subj

# REDUCING FRAMES

def addreduced(gfargs):
    if len(gfargs) == 0:
        return [gfargs]
    else:
        obl = []
        opt = []
        for (gf, argslist, ctrl) in gfargs:
            # NOTE: arguments marked as controllee(2) can be reduced (unlike controller(2))
            if (ctrl not in ['controller', 'controller2']) \
            and (len([a for a in argslist if a.split('(')[0] in ['refl', 'lexnp', 'preplexnp', 'fixed']]) == 0) \
            and (gf != 'SUBJ'):
                opt.append((gf, argslist, ctrl))
            else:
                obl.append((gf, argslist, ctrl))
        optframes = mix2(opt)
        reduced = []
        for optf in optframes:
            red = obl + optf
            if red not in reduced:
                reduced.append(red)
        # NOTE: reduceOBL ensures proper GF assignment for OBL
        # NOTE: reduceCTRL removes controllers without a matching controllee
        # WORKAROUND: there is a bug in XLE which causes it to crash if a lexical entry has more than 5 arguments
        return [reduceCTRL(reduceOBL(r)) for r in reduced if len(r) < 6]

def reduceOBL(gfargs):
    OBL_list = ['OBL', 'OBL2', 'OBL3', 'OBL4']
    OBL_counter = 0
    gfargs_new = []
    for gfarg in gfargs:
        gf = gfarg[0]
        argslist = gfarg[1]
        ctrl = gfarg[2]
        if gf in OBL_list:
            OBL_counter += 1
            if gf != OBL_list[OBL_counter-1]:
                gfarg = (OBL_list[OBL_counter-1], argslist, ctrl)
        gfargs_new.append(gfarg)
    return gfargs_new

def reduceCTRL(gfargs):
    control = makeCTRLdict(gfargs)
    gfargs_new = []
    for gfarg in gfargs:
        gf = gfarg[0]
        argslist = gfarg[1]
        ctrl = gfarg[2]
        if ctrl:
            if ('controller' in ctrl) or ('controller2' in ctrl):
                for p in control_pairs:
                    controller = p[0]
                    controllees = p[1]
                    if controller in ctrl:
                        if not control.has_key(controllees):
                            gfarg = (gf, argslist, False)
        gfargs_new.append(gfarg)
    return gfargs_new

def powerset(iterable):
    s = list(iterable)
    return chain.from_iterable(combinations(s, r) for r in range(len(s)+1))

def mix2(elements):
    return [list(c) for c in list(powerset(elements))]    

# SPLITTING FRAMES

def cartesianProduct(list):
    return [p for p in product(*list)]

def copyLabelsToReals(args):
    outargs = []
    for arg in args:
        opening = arg.find('{') 
        if opening != -1:
            # NOTE: labels
            head = arg[:opening]
            # NOTE: rest
            body = arg[opening+1:-1]
        else:
            head = ''
            body = arg
        # NOTE: list of realisations
        argslist = [subarg.strip() for subarg in body.split(';')]
        rwithlabels = []
        for r in argslist:
            rwithlabels.append((head,r))
        outargs.append(rwithlabels)
    return outargs

def checkControllee(c):
    ctrlee_good = ['infp', 'prepadjp', 'prepnp', 'adjp', 'np']
    local_control = {}
    for (labels,real) in c:
        labs = labels.split(',')
        for l in labs:
            if l in ['controllee', 'controllee2']:
                if not local_control.has_key(l):
                    ctrlee_basecat = real.split('(')[0]
                    if not ctrlee_basecat in ctrlee_good:
                        local_control[l] = 'bad'
                    else:
                        local_control[l] = 'OK'
    return local_control

def makeSplitEntry(line, removeOR):
    fparts = getFrameParts(line)
    lemma = fparts[0]
    frame = fparts[1]
    aspect = fparts[2]
    negation = fparts[3]
    # NOTE: list of triples (gf, argslist, ctrl)
    args = [f.strip() for f in frame.split('+')]
    if removeOR:
        # ATTENTION: imperfect, some are left!
        # NOTE: removing OR arguments
        for a in args:
            if a.find('{or}') != -1:
                if a.find('controllee') == -1:
                    args.remove(a)
    # NOTE: list of lists containing pairs (label, realisation)
    reallabels = copyLabelsToReals(args)
    # NOTE: Cartesian product or reallabels
    cart = cartesianProduct(reallabels)
    split_frames = []
    for c in cart:
        local_control = checkControllee(c)
        local_frame = []
        for (labels,real) in c:
            labs = labels.split(',')
            for p in control_pairs:
                controller = p[0]
                controllees = p[1]
                if controller in labs:
                    if local_control[controllees] == 'bad':
                        labs.remove(controller)
                if controllees in labs:
                    if local_control[controllees] == 'bad':
                        labs.remove(controllees)
            labels = ','.join(labs)
            arg = labels+"{"+real+"}"
            local_frame.append(arg)
        split_frames.append(" + ".join(local_frame))
    split_entries = []
    for sf in split_frames:
        split_entries.append(": ".join([lemma,negation,aspect,sf]))
    return split_entries

# PUTTING IT ALL TOGETHER

def makeSubFrame(lemma, gfargs, aspect, negation):
    pred = semNosemArg(lemma, gfargs)
    # NOTE: constraints imposed by all dependents
    constr = makeConstraints(gfargs)
    # NOTE: control relation(s)
    control = makeControl(gfargs)
    # NOTE: inherent reflexivity
    inhrefl = False
    if checkREFL(gfargs, lemma):
        inhrefl = makeREFL()
    # # WORKAROUND: commented out for a while due to problems with aspect in Walenty
    # # NOTE: aspect
    # asp = frameAsp(aspect)
    asp = None
    # NOTE: negation
    neg = frameNeg(negation)
    parts = [pred, constr, inhrefl, control, asp, neg]
    return '\n'.join(onlyNonempty(parts))

def makeReducePassConv(lemma, gfargs, aspect, negation, reduce, ppas):
    if reduce == True:
        reduced = addreduced(gfargs)
    if reduce == False:
        reduced = [gfargs]
    converted = []
    for gfargsred in reduced:
        base_frame = [makeSubFrame(lemma, gfargsred, aspect, negation)]
        if ppas == True:
            base_frame = []
            if checkPassive(gfargsred):
                obl_gfargs = makePass(gfargsred, 'obl-ag')
                # NOTE: sometimes there's no SUBJ that can be OBL-AG (zadowolić: perf: subj{cp(że)} + obj{np(str)})
                if obl_gfargs:
                    obl_frame = makeSubFrame(lemma, obl_gfargs, aspect, negation)
                    base_frame.append(obl_frame)
                null_frame = makeSubFrame(lemma, makePass(gfargsred, 'null'), aspect, negation)
                base_frame.append(null_frame)
        for f in base_frame:           
            converted.append(f)
    return converted

def makeFrame(line, reduce, ppas):
    fparts = getFrameParts(line)
    lemma = cpxlemma(fparts[0])
    frame = fparts[1]
    aspect = fparts[2]
    negation = fparts[3]
    # NOTE: list of triples (gf, argslist, ctrl)
    args = [f.strip() for f in frame.split('+')]
    gfargs = getArgs(args)
    conv = makeReducePassConv(lemma, gfargs, aspect, negation, reduce, ppas)
    # NOTE: handling frames with E as a controller, removing this argument
    if frame.find('subj,controller{E}') != -1:
        cutgfargs = makeEnoSUBJframe(gfargs)
        cutconv = makeReducePassConv(lemma, cutgfargs, aspect, negation, reduce, ppas)
        conv = conv + modifyEnoSUBJconv(cutconv)
    return conv

def modifyEnoSUBJconv(conv):
    modified = []
    for f in conv:
        if f.find('XCOMP') != -1:
            f += '\n'+makeEnoSUBJ()
        modified.append(f)
    return modified

# RUNNING THE SCRIPT

def makeValDictByPos(input_dict, reduce, ppas):
    frames = {}
    lines = open(input_dict, 'r').readlines()
    for line in lines[1:]:
        line = line.strip()		
        if len(line) > 0:
            if line[0] != '%':
                # NOTE: for debugging
                global current_frame
                current_frame = line
                # NOTE: for dict lemma without SIĘ
                lemma = getFrameParts(line)[0].split()[0]
                lemma = lemma.decode("utf-8")
                if not frames.has_key(lemma):
                    frames[lemma] = {}
                # NOTE: for each lemma, for each Walenty frame, create a dict of converted frames (default and ppas)
                frames[lemma][line] = {'default': [], 'ppas': []}
                # NOTE: basic frames
                basicframes = makeFrame(line, reduce, False)
                for bfr in basicframes:
                    bfr = bfr.decode("utf-8")
                    if bfr not in frames[lemma][line]['default']:
                        frames[lemma][line]['default'].append(bfr)
                if ppas:
                    # NOTE: passive frames
                    passiveframes = makeFrame(line, reduce, True)
                    if len(passiveframes) > 0:
                        for pfr in passiveframes:
                            pfr = pfr.decode("utf-8")
                            if pfr not in frames[lemma][line]['ppas']:
                                frames[lemma][line]['ppas'].append(pfr)
    return frames

# EXTRA FUNCTIONS

def makeFreqDict(input_dict):
    frames = {}
    lines = open(input_dict, 'r').readlines()
    for line in lines[1:]:
        line = line.strip()		
        if len(line) > 0:
            if line[0] != '%':
                frame = getFrameParts(line)[1]
                frame = frame.split(' + ')
                frame = sorted(frame)
                frame = ' + '.join(frame)
                if frames.has_key(frame):
                    frames[frame] += 1
                else:
                    frames[frame] = 1
    return frames

def printArgDict(dict, output):
    out = open(output, 'w')
    for key in dict:
        out.write(str(dict[key]) + '\t\t' + key + '\n')

def makeFreqArgDict(input_dict):
    freq_args = {}
    lines = open(input_dict, 'r').readlines()
    for line in lines[1:]:
        line = line.strip()		
        if len(line) > 0:
            if line[0] != '%':
                # NOTE: for debugging
                global current_frame
                current_frame = line
                fparts = getFrameParts(line)
                frame = fparts[1]
                args = [f.strip() for f in frame.split('+')]
                gfargs = getArgs(args)
                for (gf, argslist, ctrl) in gfargs:
                    for arg in argslist:
                        if freq_args.has_key(arg):
                            freq_args[arg] += 1
                        else:
                            freq_args[arg] = 1
    return freq_args

def makeFreqBaseArgDict(dict):
    baseargs = {}
    for key in sorted(dict):
        basekey = key.split('(')[0]
        if baseargs.has_key(basekey):
            baseargs[basekey] += 1
        else:
            baseargs[basekey] = 1
    return baseargs

def printFreqArgDict(dict, output):
    out = open(output, 'w')
    for key in sorted(dict):
        out.write(str(dict[key]) + '\t\t' + key + '\n')

def printOBLDict(dict, output):
    out = open(output, 'w')
    for key in sorted(dict):
        out.write(str(key) + '\t\t' + str(dict[key]) + '\n')

# CREATING SPLIT FRAMES

def makeSplitEntries(input_dict, output, removeOR=False):
    out = open(output, 'w')
    lines = open(input_dict, 'r').readlines()
    for line in lines[1:]:
        line = line.strip()		
        if len(line) > 0:
            if line[0] != '%':
                split_entries = makeSplitEntry(line, removeOR)
                for se in split_entries:
                    out.write(se+'\n')
