package tag;

import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.Date;
import java.util.LinkedList;
import java.util.List;

import tag.extract.MorphGenerator;
import trees.TAGTreeNode;
import utility.Utility;

import com.dawidweiss.morfeusz.InterpMorf;
import com.dawidweiss.morfeusz.Morfeusz;

import de.tuebingen.tokenizer.Word;
import de.tuebingen.tree.Grammar;
import de.tuebingen.ui.CommandLineOptions;
import de.tuebingen.ui.Interface;

public class TAGParser {
	
	public static void main(String[] args) {

		int MAX_TOKENS = 200;
		int TIMEOUT = 60 * 5;
		
		String s = null, morphPath = null, outPath = null;
		CommandLineOptions ops = processCommandLine(args);
		if (ops.check("s")) {
			s = ops.getVal("s");
		}
		if (s == null) {
			System.out.println("No sentence given.");
			return;
		}
		if (ops.check("m")) {
			morphPath = ops.getVal("m");
		}
		if (ops.check("t")) {
			TIMEOUT = 60 * Integer.parseInt(ops.getVal("t"));
		}
		if (ops.check("o")) {
			outPath = ops.getVal("o");
			if (!outPath.endsWith(".xml")) {
				outPath += ".xml";
			}
		}
		boolean gui = !ops.check("o");
		try {
			Morfeusz m = Morfeusz.getInstance();
			Date begin = new Date();
			System.out.println("START: " + begin.toString() + "\n");
				for (int i = 0; i < Utility.punctuation.length; ++i) {
					s = s.replace((String) Utility.punctuation[i].first(), " " + (String) Utility.punctuation[i].second() + " ");
				}
				System.err.println(s);
				System.err.println("Tokenizing...");
				List<Word> tokens = new LinkedList<Word>();
				boolean morphOK = true;
				System.err.println("Running SGJP Morfeusz...");
				InterpMorf[] interps = m.getAnalyzer().analyze(s);
				String prev = "";
				int index = 0;
				for (InterpMorf i : interps) {
					if (i.getTokenImage().length() > 0) {
						if (!i.getTokenImage().contentEquals(prev)) {
							tokens.add(new Word(i.getTokenImage(), index,
									index + 1));
							System.out.println("token: " + i.getTokenImage());
							index += 1;
						}
						prev = i.getTokenImage();
					}
				}
				boolean createMorph = (morphPath == null);
				if (createMorph) {
					System.err.println("Creating morph file...");
					morphPath = "polish-morph";
					BufferedWriter out = new BufferedWriter(new FileWriter(
							morphPath));
					for (InterpMorf i : interps) {
						if (i.getLemmaImage().length() > 0) {
							for (String e : MorphGenerator.getMorphEntries(i, TAGTreeNode.grammarFeatures)) {
								out.write(e + "\n");
								System.out.println(e);
							}
						} else {
							if (i.getTokenImage().length() > 0
									&& !i.getTokenImage().contentEquals(
											"fullstop")
									&& !i.getTokenImage()
											.contentEquals("comma")
									&& !i.getTokenImage().contentEquals("lpar")
									&& !i.getTokenImage().contentEquals("rpar")
									&& !i.getTokenImage().contentEquals("hyph")
									&& !i.getTokenImage().contentEquals("qmark")
									&& !i.getTokenImage().contentEquals("excl")
									&& !i.getTokenImage().contentEquals("foo")) {
								morphOK = false;
								System.out.println("Unknown lexeme: "
										+ i.getTokenImage() + "\n");
							}
						}
					}
					out.write("fullstop fullstop [pos = interp;]\n");
					out.write("comma comma [pos = interp;]\n");
					out.write("qmark qmark [pos = interp;]\n");
					out.write("excl excl [pos = interp;]\n");
					out.write("lpar lpar [pos = interp;]\n");
					out.write("rpar rpar [pos = interp;]\n");
					out.write("hyph hyph [pos = interp;]\n");
					out.close();
				}
				if (morphOK) {
					if (tokens.size() <= MAX_TOKENS) {
						String morph = morphPath;
						if (createMorph) {
							Runtime.getRuntime().exec(
									"lexConverter -i "
											+ morphPath + " -tM -o " + morphPath
											+ ".xml");
							morph += ".xml";
						}
						System.err.println("Setting up TuLiPa... ");
						String gram = "grammar/polish.xml";
						String lex = "grammar/polish-lex.xml";
						String[] tulipaArgs = {
								"-g",
								gram,
								"-l",
								lex,
								"-m",
								morph,
								"-a",
								"WYPOWIEDZENIE",
								(ops.check("w") ? "-w" : ""),
								(gui ? "" : "-x"),
								(gui ? "" : "-o"),
								(gui ? "" : outPath) };
						CommandLineOptions tulipaOps = Interface
								.processCommandLine(tulipaArgs);
						Grammar g = AlternativeInterface.loadGrammar(tulipaOps, gram,
								lex, morph);
						TulipaRunner tr = new TulipaRunner(tulipaOps, g, s, tokens);
						System.err.println("Parsing... ");
						Thread t = new Thread(tr);
						t.start();
						t.join(TIMEOUT * 1000);
						if (t.isAlive()) {
							System.out.println("Timed out\n(no parse)\n");
							t.interrupt();
						} else {
							System.out.println(tr.logMsg);
						}
					} else {
						System.out.println("Sentence too long\n");
					}
				}
				Date end = new Date();
				System.out.println("END: " + end.toString() + "\n");
				long total = end.getTime() - begin.getTime();
				long mins = total / (1000 * 60);
				double secs = (double) (total - mins) / (1000);
				System.out.println(String.format("TOTAL TIME:  %d min %.3f sec\n\n", mins, secs));
		} catch (UnsupportedEncodingException e) {
			e.printStackTrace();
		} catch (SecurityException e) {
			e.printStackTrace();
		} catch (UnsatisfiedLinkError e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
			// } catch (TokenizerException e) {
			// e.printStackTrace();
		} catch (Throwable e) {
			System.err.println("Ooops... ");
			e.printStackTrace();
		}
	}
	
	public static CommandLineOptions processCommandLine(String[] cmdline) {
		CommandLineOptions op = new CommandLineOptions();
		op.add(CommandLineOptions.Prefix.DASH, "s", CommandLineOptions.Separator.BLANK, true);
		op.add(CommandLineOptions.Prefix.DASH, "m", CommandLineOptions.Separator.BLANK, true);
		op.add(CommandLineOptions.Prefix.DASH, "t", CommandLineOptions.Separator.BLANK, true);
		op.add(CommandLineOptions.Prefix.DASH, "o", CommandLineOptions.Separator.BLANK, true);
		op.add(CommandLineOptions.Prefix.DASH, "w", CommandLineOptions.Separator.BLANK, false);
		op.prepare();
		String line = "";
		for (int i = 0; i < cmdline.length ; i++) {
			String tmp = cmdline[i];
			tmp = tmp.replace(" ", "---");
			line += "\"" + tmp + "\" ";
		}
		op.parse(line);
		return op;		
	}

}
