package pl.waw.ipipan.zil.core.tei2mmax;

import ipipan.clarin.tei.api.entities.TEICoreference;
import ipipan.clarin.tei.api.entities.TEICorpusText;
import ipipan.clarin.tei.api.entities.TEIInterpretation;
import ipipan.clarin.tei.api.entities.TEIMention;
import ipipan.clarin.tei.api.entities.TEIMorph;
import ipipan.clarin.tei.api.entities.TEIParagraph;
import ipipan.clarin.tei.api.entities.TEISentence;
import ipipan.clarin.tei.api.io.TEI_IO;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;

import javax.xml.parsers.ParserConfigurationException;
import javax.xml.stream.XMLOutputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamWriter;
import javax.xml.transform.TransformerException;

import org.apache.log4j.Logger;
import org.w3c.dom.Document;

import pl.waw.ipipan.zil.core.mmaxAPI.Constants;
import pl.waw.ipipan.zil.core.mmaxAPI.Reader;
import pl.waw.ipipan.zil.core.mmaxAPI.Writer;

public class Tei2Mmax {

	private static Logger logger = Logger.getLogger(Tei2Mmax.class);

	final private static XMLOutputFactory xof = XMLOutputFactory.newInstance();
	final private static TEI_IO teiIO = TEI_IO.getInstance();

	public static void main(String[] args) {

		if (args.length != 2) {
			logger.error("Wrong number of arguments! Should be: tei2mmax sourceDir targetDir");
			return;
		}

		File originalDir = new File(args[0]);
		File targetDir = new File(args[1]);

		if (!originalDir.isDirectory()) {
			logger.error(originalDir + " is not a directory!");
			return;
		}
		if (!targetDir.isDirectory()) {
			logger.error(targetDir + " is not a directory!");
			return;
		}

		recConvertDirs(originalDir, targetDir);
	}

	private static void recConvertDirs(File originalDir, File targetDir) {
		for (File f : originalDir.listFiles())
			if (f.isDirectory()) {
				boolean isTextDir = false;
				for (String child : f.list())
					if (child.matches("text.*\\.xml"))
						isTextDir = true;

				if (isTextDir) {
					logger.info("Converting from dir :" + f);
					try {
						TEICorpusText ct = teiIO.readFromNKJPDirectory(f);

						Document header = Reader.loadDocument(new File(f,
								"header.xml"));
						String title = header.getElementsByTagName("title")
								.item(0).getTextContent();
						String catRef = header.getElementsByTagName("catRef")
								.item(0).getAttributes().getNamedItem("target")
								.getTextContent();

						convert(ct, f.getName(), targetDir, title, catRef);
					} catch (Exception e) {
						logger.error("Error converting text: " + f
								+ " to dir: " + targetDir);
						logger.error(e.getLocalizedMessage());
					}
				} else {
					File correspDir = new File(targetDir + File.separator
							+ f.getName());
					correspDir.mkdir();
					recConvertDirs(f, correspDir);
				}
			}
	}

	private static void convert(TEICorpusText ct, String projectName,
			File targetDir, String title, String catRef) throws IOException,
			ParserConfigurationException, TransformerException {
		String filePrefix = targetDir.getPath() + File.separator + projectName;

		writeMmaxFile(ct, filePrefix, title, catRef);
		writeWords(ct, filePrefix);
		writeMentions(ct, filePrefix);
	}

	private static void writeMmaxFile(TEICorpusText ct, String filePrefix,
			String title, String catRef) throws ParserConfigurationException,
			TransformerException {
		Document header = Writer.createHeader(title, catRef, new File(
				filePrefix + Constants.MMAX_WORDS_LEVEL_SUFFIX).getName());
		Writer.saveDocument(header, new File(filePrefix + "."
				+ Constants.MMAX_PROJECT_EXTENSION));
	}

	private static void writeWords(TEICorpusText ct, String filePrefix)
			throws IOException {
		BufferedWriter bw = null;
		XMLStreamWriter out = null;
		try {
			bw = new BufferedWriter(new FileWriter(filePrefix
					+ Constants.MMAX_WORDS_LEVEL_SUFFIX));
			out = xof.createXMLStreamWriter(bw);
			out.writeStartDocument("UTF-8", "1.0");
			out.writeCharacters("\n");

			out.writeDTD("<!DOCTYPE words SYSTEM \"words.dtd\">");
			out.writeCharacters("\n");

			out.writeStartElement("words");
			out.writeCharacters("\n");

			Map<TEIMorph, String> morph2Id = getMorph2Id(ct);

			Set<TEIMorph> lastinpars = new HashSet<TEIMorph>();
			Set<TEIMorph> lastinsents = new HashSet<TEIMorph>();
			for (TEIParagraph p : ct.getParagraphs()) {
				TEIMorph lastInPar = null;
				for (TEISentence sent : p.getSentences()) {
					TEIMorph lastInSent = sent.getMorphs().get(
							sent.getMorphs().size() - 1);
					lastinsents.add(lastInSent);
					lastInPar = lastInSent;
				}
				if (lastInPar != null)
					lastinpars.add(lastInPar);
			}

			for (TEISentence s : ct.getAllSentences())
				for (TEIMorph m : s.getMorphs()) {
					out.writeCharacters("\t");
					out.writeStartElement(Constants.WORD);
					TEIInterpretation interp = m.getChosenInterpretation();
					out.writeAttribute(Constants.BASE, interp.getBase());
					out.writeAttribute(Constants.CTAG, interp.getCtag());
					if (m.hasNps())
						out.writeAttribute(Constants.HAS_NPS, Constants.TRUE);
					out.writeAttribute(Constants.WORD_ID, morph2Id.get(m));
					if (lastinpars.contains(m))
						out.writeAttribute(Constants.LAST_IN_PAR,
								Constants.TRUE);
					if (lastinsents.contains(m))
						out.writeAttribute(Constants.LAST_IN_SENT,
								Constants.TRUE);
					out.writeAttribute(Constants.MSD, interp.getMorph());

					out.writeCharacters(m.getOrth());
					out.writeEndElement(); // word
					out.writeCharacters("\n");
				}

			out.writeEndElement(); // words
			out.writeCharacters("\n");
		} catch (Exception e) {
			logger.error("Error writing to file: " + filePrefix
					+ Constants.MMAX_WORDS_LEVEL_SUFFIX + " \n"
					+ e.getLocalizedMessage());
		} finally {
			try {
				out.close();
				bw.close();
			} catch (Exception e) {
				logger.error("Error writing to file: " + filePrefix
						+ Constants.MMAX_WORDS_LEVEL_SUFFIX + " \n"
						+ e.getLocalizedMessage());
			}
		}
	}

	private static void writeMentions(TEICorpusText ct, String filePrefix)
			throws IOException {
		BufferedWriter bw = null;
		XMLStreamWriter out = null;
		try {
			bw = new BufferedWriter(new FileWriter(filePrefix
					+ Constants.MMAX_MENTIONS_LEVEL_SUFFIX));
			out = xof.createXMLStreamWriter(bw);
			out.writeStartDocument("UTF-8", "1.0");
			out.writeCharacters("\n");

			out.writeDTD("<!DOCTYPE markables SYSTEM \"markables.dtd\">");
			out.writeCharacters("\n");

			out.writeStartElement("markables");
			out.writeDefaultNamespace("www.eml.org/NameSpaces/"
					+ Constants.MMAX_MENTIONS_LEVEL_NAME);
			out.writeCharacters("\n");

			Map<TEIMorph, String> morph2Id = getMorph2Id(ct);
			Map<TEIMention, String> mention2newId = getMention2NewId(ct);
			Map<TEIMention, String> mention2groupId = getMention2GroupId(ct);
			Map<TEIMention, String> mention2dominant = getMention2Dominant(ct);
			Map<TEIMention, String> mention2nearIdent = getMention2NearIdent(
					ct, mention2newId);

			for (TEISentence s : ct.getAllSentences()) {
				for (TEIMention m : s.getAllMentions()) {
					String head = "";
					for (TEIMorph mor : m.getHeadMorphs()) {
						head += " " + mor.getOrth();
						break; // TODO: only first single segment, because mmax
								// cries!
					}
					if (head.length() > 0)
						head = head.substring(1);

					String groupId = "empty";
					if (mention2groupId.containsKey(m))
						groupId = mention2groupId.get(m);

					String dom = mention2dominant.get(m);
					String near = mention2nearIdent.get(m);

					out.writeCharacters("\t");
					out.writeStartElement(Constants.MENTION);
					out.writeAttribute(Constants.MENTION_ID, "markable_"
							+ mention2newId.get(m));
					out.writeAttribute(Constants.SPAN,
							getSpanFromList(m.getMorphs(), morph2Id));
					out.writeAttribute(Constants.MENTION_HEAD, head);
					out.writeAttribute("mmax_level",
							Constants.MMAX_MENTIONS_LEVEL_NAME);
					out.writeAttribute(Constants.NEAR_IDENTITY, near);
					out.writeAttribute(Constants.MENTION_GROUP, groupId);
					if (dom != null)
						out.writeAttribute(Constants.DOMINANT, dom);

					out.writeEndElement(); // markable
					out.writeCharacters("\n");
				}
			}
			out.writeEndElement(); // markables
		} catch (XMLStreamException e) {
			logger.error("Error writing to file: " + filePrefix
					+ Constants.MMAX_MENTIONS_LEVEL_SUFFIX + " \n"
					+ e.getLocalizedMessage());
		} finally {
			try {
				out.close();
				bw.close();
			} catch (XMLStreamException e) {
				logger.error("Error writing to file: " + filePrefix
						+ Constants.MMAX_MENTIONS_LEVEL_SUFFIX + " \n"
						+ e.getLocalizedMessage());
			}
		}
	}

	private static Map<TEIMention, String> getMention2NearIdent(
			TEICorpusText ct, Map<TEIMention, String> mention2newId) {

		Map<TEIMention, String> result = new HashMap<>();
		for (TEICoreference cor : ct.getAllCoreferences()) {
			if (cor.getType().equals("near-ident")) {
				TEIMention sourceMention = cor.getSourceMention();
				TEIMention targetMention = cor.getMentions().indexOf(
						sourceMention) == 0 ? cor.getMentions().get(1) : cor
						.getMentions().get(0);
				result.put(sourceMention,
						"markable_" + mention2newId.get(targetMention));
			}
		}

		for (TEISentence s : ct.getAllSentences())
			for (TEIMention m : s.getAllMentions())
				if (!result.containsKey(m))
					result.put(m, "empty");

		return result;
	}

	private static Map<TEIMention, String> getMention2NewId(TEICorpusText ct) {
		int i = 1;
		Map<TEIMention, String> mention2newId = new HashMap<>();

		for (TEISentence s : ct.getAllSentences())
			for (TEIMention m : s.getAllMentions())
				mention2newId.put(m, Integer.toString(i++));

		return mention2newId;
	}

	private static Map<TEIMention, String> getMention2Dominant(TEICorpusText ct) {
		Map<TEIMention, String> result = new HashMap<TEIMention, String>();

		for (TEICoreference c : ct.getAllCoreferences()) {
			if (c.getType().equals("ident")) {
				for (TEIMention m : c.getMentions()) {
					result.put(m, c.getDominant());
				}
			}
		}

		return result;
	}

	private static Map<TEIMention, String> getMention2GroupId(TEICorpusText ct) {
		Map<TEIMention, String> result = new HashMap<TEIMention, String>();

		int id = 0;
		for (TEICoreference c : ct.getAllCoreferences()) {
			if (c.getType().equals("ident")) {
				String setName = "set_" + id++;
				for (TEIMention m : c.getMentions()) {
					result.put(m, setName);
				}
			}
		}

		return result;
	}

	private static Map<TEIMorph, String> getMorph2Id(TEICorpusText ct) {
		int id = 1;
		Map<TEIMorph, String> result = new HashMap<TEIMorph, String>();
		for (TEISentence s : ct.getAllSentences())
			for (TEIMorph m : s.getMorphs())
				result.put(m, "word_" + id++);
		return result;
	}

	private static String getSpanFromList(List<TEIMorph> l,
			Map<TEIMorph, String> morph2Id) {

		List<Integer> ids = new ArrayList<Integer>();
		for (TEIMorph morph : l) {
			try {
				ids.add(Integer.parseInt(morph2Id.get(morph).split("_")[1]));
			} catch (Exception ex) {
				System.out.println(l + " " + l.get(0).getId());
			}
		}

		return getSpanFromIdList(ids);
	}

	protected static String getSpanFromIdList(List<Integer> ids) {
		Iterator<Integer> it = ids.iterator();

		int prevId;
		int currId = it.next();
		String span = "word_" + currId;

		while (it.hasNext()) {
			prevId = currId;
			currId = it.next();

			if (currId == prevId + 1) {
				while (it.hasNext() && currId == prevId + 1) {
					prevId = currId;
					currId = it.next();
				}
				if (currId == prevId + 1) {
					span += ".." + "word_" + currId;
				} else {
					span += ".." + "word_" + prevId + "," + "word_" + currId;
				}
			} else {
				span += "," + "word_" + currId;
			}
		}

		return span;
	}

}
