#-*- coding: utf-8 -*-
'''
Created on Jan 23, 2013

@author: lennyn
'''

import pyparsing
import re
from pyparsing import Forward, ParserElement, Word, Literal, ZeroOrMore, OneOrMore, Or, oneOf, Dict, Group, alphas, printables, lineEnd, delimitedList, restOfLine
import schema

PLUS = Literal('+').suppress()
CONCAT = Literal('^').suppress()

UnorderedElem = Forward()
VerbElem = Literal('-')
PhraseElem = Word(alphas)
PLUS = Literal('+').suppress()
COMMA = Literal(',').suppress()
BRACE_LEFT = Literal('(').suppress()
BRACE_RIGHT = Literal(')').suppress()
CURLY_LEFT = Literal('{').suppress()
CURLY_RIGHT = Literal('}').suppress()
QUOT = (Literal('"') | Literal("'")).suppress()
TextElem = (QUOT + Word(alphas + u' ęóąśłżźćń') + QUOT) | (QUOT + Word(alphas + u' ęóąśłżźćń'))

OptionalElem = BRACE_LEFT + UnorderedElem + BRACE_RIGHT
ConcatElem = delimitedList((PhraseElem | TextElem | OptionalElem), delim='^')
AtLeastOneOfElem = CURLY_LEFT + OneOrMore(UnorderedElem) + CURLY_RIGHT
#SchemaListElem = delimitedList(SchemaElem, delim=',')
OneOfElem = Forward()
OneOfElem << (CURLY_LEFT + delimitedList(UnorderedElem, delim=',') + CURLY_RIGHT)
UnorderedElem << delimitedList(ConcatElem ^ OptionalElem ^ VerbElem ^ AtLeastOneOfElem ^ OneOfElem, delim='+')
#SchemaElem << (ConcatElem | OptionalElem | AtLeastOneOfElem | UnorderedElem | VerbElem)
#SchemaElem << Group(PhraseElem | TextElem | OptionalElem | AtLeastOneOfElem | OneOfElem | UnorderedElem)
Schema = OneOrMore(UnorderedElem ^ CONCAT ^ OptionalElem)

stack = []

def pushPhrase(s, loc, toks):
    print 'phrase', toks
    return 'dupa'

def pushOptional(s, loc, toks):
    print 'optional', toks

print dir(schema)
PhraseElem.setParseAction(lambda s,l,toks: schema.Phrase(toks[0]))
TextElem.setParseAction(lambda s,l,toks: schema.TextElement(toks[0]))
VerbElem.setParseAction(lambda s,l,toks: schema.Verb())
OptionalElem.setParseAction(lambda s,l,toks: schema.ComplexElement(schema.ElemType.optional, toks))
AtLeastOneOfElem.setParseAction(lambda s,l,toks: schema.ComplexElement(schema.ElemType.atLeastOneOf, toks))
OneOfElem.setParseAction(lambda s,l,toks: schema.ComplexElement(schema.ElemType.oneOf, toks))
UnorderedElem.setParseAction(lambda s,l,toks: schema.ComplexElement(schema.ElemType.unordered, toks) if len(toks) > 1 else toks[0])
ConcatElem.setParseAction(lambda s,l,toks: schema.ComplexElement(schema.ElemType.concatenation, toks) if len(toks) > 1 else toks[0])
Schema.setParseAction(lambda s,l,toks: schema.Schema(elements=toks, schemaText=s, offsets=l))

#ENDLS = ZeroOrMore(lineEnd).suppress()
#COMMENT = Literal('#') + restOfLine
#Ignorables = ZeroOrMore(Or(ENDLS, COMMENT)).suppress()
#Token = Word(alphanums+'-_')
#OptToken = Literal('[').suppress() + Token + Literal(']').suppress()
#Attrs = delimitedList(Token, delim=':')

def correctSchema(regex, replacement, schemaText):
    schemaText0 = schemaText
    schemaText1 = None
    while schemaText0 != schemaText1:
        if schemaText1 != None:
            schemaText0 = schemaText1
        schemaText1 = re.sub(regex, replacement, schemaText0)
    #~ re.sub(ur'(\b[a-zęóąśłżźćń][a-zęóąśłżźćń]*)"([A-Za-zęóąśłżźćń])', r'\1"^\2', schemaText)
    return schemaText1

def parseSchema(schemaText0):
    schemaText = correctSchema(ur'(\b[a-zęóąśłżźćń][a-zęóąśłżźćń]*)"([A-Za-zęóąśłżźćń])', r'\1"^\2', schemaText0)
    schemaText = correctSchema(ur'([A-Za-z]+)\)"([A-Za-zęóąśłżźćń])', r'\1)^"\2', schemaText)
    schemaText = correctSchema(ur'([A-Za-z]+)\)([A-Z])', r'\1)^\2', schemaText)
    schemaText = correctSchema(ur'(\b[A-Z][A-Za-z]*)[ ]?"([A-Za-zęóąśłżźćń])', r'\1^"\2', schemaText)
    schemaText = correctSchema(ur'([A-Za-z])("że")', r'\1^\2', schemaText)
    schemaText = correctSchema(ur'(\b[A-Z][A-Za-z]*) (\b[A-Z][A-Za-z]*\b)', r'\1^\2', schemaText)
    schemaText = correctSchema(ur'(\b[A-Z][A-Za-z]*) (\b[A-Z][A-Za-z]*\b)', r'\1^\2', schemaText)
    schemaText = schemaText.replace('.', ',').replace('++', '+').replace('[', '').replace(' ^', '^')
    #~ print schemaText.encode('utf-8')
    #~ schemaText = re.sub(ur'(\b[a-zęóąśłżźćń][a-zęóąśłżźćń]*)"([A-Za-zęóąśłżźćń])', r'\1"^\2', schemaText0)
    #~ schemaText = re.sub(ur'([A-Za-z]+)\)"([A-Za-zęóąśłżźćń])', r'\1)^"\2', schemaText)
    #~ schemaText = re.sub(ur'([A-Za-z]+)\)([A-Z])', r'\1)^\2', schemaText)
    #~ schemaText = re.sub(ur'(\b[A-Z][A-Za-z]*)"([A-Za-zęóąśłżźćń])', r'\1^"\2', schemaText)
    #~ schemaText = re.sub(ur'(\b[A-Z][A-Za-z]*) (\b[A-Z][A-Za-z]*\b)', r'\1^\2', schemaText)
    #~ if schemaText0 != schemaText:
        #~ print 'sub', schemaText0, '==>', schemaText
    res = None
#    print 'PARSE:', schemaText.encode('utf-8')
    try:
        #~ print '<<<'+schemaText.encode('utf-8')+'>>>'
        res = Schema.parseString(schemaText, parseAll=True)
#    except Exception as ex:
#        raise ex
#        if '{(' in schemaText and '})' in schemaText:
#            res = Schema.parseString(schemaText.replace('{(', '({'), parseAll=True)
#        elif '({' in schemaText and ')}' in schemaText:
#            res = Schema.parseString(schemaText.replace(')}', '})'), parseAll=True)
#        else:
#            raise ex
    finally:
        pass
    assert len(res) == 1
    return res[0]
