package termopl;

import java.util.*;

import pl.sgjp.morfeusz.*;

public class BaseFormGuesser 
{
	
	private Morfeusz morfeusz;
	private Tagset tagset;
	
	public BaseFormGuesser()
	{
	}
	
	public BaseFormGuesser(Tagset tagset)
	{
		this.tagset = tagset;
		morfeusz = Morph.getGenerator();
	}
	
	public String calcBaseForm(Term term)
	{
		if (tagset == null) return calcBaseFormFromFormsWithTAG(term.getFormsWithTAG(), term.getForms());
		return calcBaseFormFromForms(term.getForms());
	}
	
	public String calcBaseFormFromForms(LinkedList<Form> forms)
	{
		Form form = forms.getFirst();
		LinkedList<MatchedToken> tokens = form.getTokens();
		StringBuffer buffer = new StringBuffer();
		int i = 0;
		boolean useMorfeuszOnly = (forms.size() <= 1 && form.allOccurences() <= 1);
		boolean appendSpace = false;
		
		for (MatchedToken mt : tokens) {
			Token t = mt.token;
			
			if (appendSpace) buffer.append(" ");
			appendSpace = t.spaceAfter;
			if (mt.computeBaseForm) {
				if (useMorfeuszOnly) buffer.append(resolveCase(nominalForm(t), t.lemma));
				else buffer.append(resolveCase(nominalForm(t, forms, i), forms, i));
			}
			else {
				if (useMorfeuszOnly) buffer.append(t.form);
				else buffer.append(resolveCase(t.form, forms, i));
			}
			i++;
		}
		return buffer.toString();
	}
	
	public String calcBaseFormFromFormsWithTAG(HashMap<String,Integer> forms, LinkedList<Form> list)
	{
		LinkedList<String> sing = null, nom = null, sing_nom = null, other = null, formList = null;
		int lcase = 0, ucase = 0;
		
		for (String form : forms.keySet()) {
			boolean singular = false, nominative = false;
			String[] frm_tag = form.split("\t");
			String tag = (frm_tag.length > 1 ? frm_tag[1] : null);
			
			if (Character.isUpperCase(form.codePointAt(0))) ucase++;
			else lcase++;
			if (tag != null) {
				String tagl = tag.toLowerCase();
				
				if (tagl.contains("number=sing")) singular = true;
				if (tagl.contains("case=nom")) nominative = true;
				if (nominative) {
					if (nom == null) nom = new LinkedList<String>();
					nom.add(form);
				}
				if (singular) {
					if (sing == null) sing = new LinkedList<String>();
					sing.add(form);
				}
				if (nominative && singular) {
					if (sing_nom == null) sing_nom = new LinkedList<String>();
					sing_nom.add(form);
				}
				if (!nominative && !singular) {
					if (other == null) other = new LinkedList<String>();
					other.add(form);
				}
			}
		}
		
		String mostFreq = null;
		int max = 0;
		
		if (sing_nom != null) formList = sing_nom;
		else if (nom != null) formList = nom;
		else if (sing != null) formList = sing;
		else formList = other;
		
		for (String form : formList) {
			int n = forms.get(form);
			if (n > max) {
				mostFreq = form;
				max = n;
			}
		}
		mostFreq = mostFreq.split("\t")[0];
		if (lcase >= ucase) {
			if (Character.isUpperCase(mostFreq.codePointAt(0))) 
				mostFreq = mostFreq.substring(0, 1).toLowerCase() + mostFreq.substring(1);
		}
		else {
			if (Character.isLowerCase(mostFreq.codePointAt(0))) 
				mostFreq = mostFreq.substring(0, 1).toUpperCase() + mostFreq.substring(1);
		}
		
		return mostFreq;
	}
	
	
	public String nominalForm(String lemma, String tag)
	{
		if (!lemma.contains(" ")) {
			MorphInterpretation mi = findNominalForm(lemma, tag.split(":"), 0);
			
			if (mi != null) lemma = mi.getOrth();
		}
		return lemma;
	}
	
	public String nominalForm(Token token)
	{
		MorphInterpretation mi = findNominalForm(token.lemma, token.getTag(tagset), 0);
		
		if (mi != null) return mi.getOrth();
		return token.form;
	}
	
	public String nominalForm(Token token, LinkedList<Form> forms, int tokenIndex) 
	{
		int singular = 2;
		
		for (Form form : forms) {
			String num = tagset.getCategory(form.getMatchedToken(tokenIndex).token, "number");

			if (num == null) {
				singular = 0;
				break;
			}
			else if (num.equals("sg")) {
				singular = 1;
				break;
			}
		}
		
		MorphInterpretation mi = findNominalForm(token.lemma, token.getTag(tagset), singular);
		
		if (mi != null) return mi.getOrth();
		return token.form;
	}
	
	public MorphInterpretation findNominalForm(String lemma, String[] tag, int singular)
	{
		String gcase = tagset.getCategory(tag, "case");
		String gender = tagset.getCategory(tag, "gender");
		String number = tagset.getCategory(tag, "number");
		MorphInterpretation mi = null;
		
		if (gcase != null && gender != null && number != null) {
			boolean isLower = startsWithLowerCase(lemma);
			List<MorphInterpretation> interps = null;
			try {
				interps = morfeusz.generate(lemma);
			}
			catch(Exception exception) {
				interps = null;
			}
			
			if (interps != null) {
			mi = findNominalForm(interps, tag, singular);
			if (mi == null) {
				lemma = changeCase(lemma, isLower);
				interps = morfeusz.generate(lemma);
				mi = findNominalForm(interps, tag, singular);
			}
			}
		}
		return mi;
	}
	
	// singular = 0 -- prefer singular
	// singular = 1 -- accept only singular
	// singular = 2 -- accept only plural
	public MorphInterpretation findNominalForm(List<MorphInterpretation> interps, String[] tag, int singular)
	{
		MorphInterpretation mi = null;
		String pos = tagset.getPos(tag);
		String gender = tagset.getCategory(tag, "gender");
		String str;
		
		for (MorphInterpretation i : interps) {
			String[] mtag = i.getTag(morfeusz).split(":");
			
			if (pos.equals(tagset.getPos(mtag)) && 
				mtag[2].contains("nom") && 
				compatibleGender(mtag[3], gender)) 
			{
				if (pos.equals("adj")) {
					str = tagset.getCategory(tag, "degree");
					if (str == null) break;
					if (mtag[4].contains(str)) {
						if ((singular <= 1 && mtag[1].contains("sg")) || (singular == 2 && mtag[1].contains("pl"))) {
							mi = i;
							break;
						}
						else if (singular == 0) mi = i;
					}
				}
				else if (pos.equals("ger") || pos.equals("ppas") || pos.equals("pact")) {
					String neg = tagset.getCategory(tag, "negation");
					String mneg = mtag[5];
					String aspect = tagset.getCategory(tag,  "aspect");
					String maspect = mtag[4];
					
					if (neg == null || aspect == null) {
						// Error in tag
						mi = null;
						break;
					}
					if (maspect.contains(aspect) && mneg.contains(neg)) {
						if ((singular <= 1 && mtag[1].contains("sg")) || (singular == 2 && mtag[1].contains("pl"))) {
							mi = i;
							break;
						}
						else if (singular == 0) mi = i;
					}
				}
				else {
					if ((singular <= 1 && mtag[1].contains("sg")) || (singular == 2 && mtag[1].contains("pl"))) {
						mi = i;
						break;
					}
					else if (singular == 0) mi = i;
				}
			}
		}
		return mi;
	}
	
	public String resolveCase(String form, String lemma)
	{
		boolean lowerCase = startsWithLowerCase(form);
		boolean convertToLowerCase = startsWithLowerCase(lemma);
		boolean allInUpperCaseF = allInUpperCase(form);
		boolean allInUpperCaseL = allInUpperCase(lemma);
		
		if (allInUpperCaseL)
			if (!allInUpperCaseF) form = form.toUpperCase();
		if (lowerCase != convertToLowerCase)
			form = changeCase(form, convertToLowerCase);
		return form;
	}

	public String resolveCase(String form, LinkedList<Form> forms, int tokenIndex)
	{
		boolean lowerCase = startsWithLowerCase(form);
		boolean convertToLowerCase = false;
		boolean allInUpperCase = true;
		
		for (Form f : forms) {
			MatchedToken mt = f.getMatchedToken(tokenIndex);
			
			if (mt != null) {
				String fx = mt.token.form;
				
				if (startsWithLowerCase(fx)) {
					convertToLowerCase = true;
					break;
				}
				else if (allInUpperCase) allInUpperCase = allInUpperCase(fx);
			}
		}
		if (convertToLowerCase) allInUpperCase = false;
		if (lowerCase != convertToLowerCase) {
			if (allInUpperCase) form = form.toUpperCase();
			else if (convertToLowerCase) form = form.toLowerCase();
			else form = changeCase(form, true);
		}
		return form;
	}
	
	public boolean compatibleGender(String g1, String g2)
	{
		if (g1 == null || g2 == null) return true;
		if (g1.contains(g2)) return true;
		if (g1.startsWith("p") && g2.equals("n")) return true;
		return false;
	}
	
	public boolean allInUpperCase(String str)
	{
		for (int i = 0; i < str.length(); i++) {
			if (Character.isLowerCase(str.charAt(i))) return false;
		}
		return true;
	}
	
	public boolean startsWithLowerCase(String str)
	{
		return Character.isLowerCase(str.charAt(0));
	}
	
	public String changeCase(String str, boolean upper)
	{
		char ch = str.charAt(0);
		
		if (Character.isUpperCase(ch) && !upper) {
			ch = Character.toLowerCase(ch);
			str = ch + str.substring(1);
		}
		else if (Character.isLowerCase(ch) && upper) {
			ch = Character.toUpperCase(ch);
			str = ch + str.substring(1);
		}
		return str;
	}
	
}
