package pl.waw.ipipan.zil.core.tei2brat;

import ipipan.clarin.tei.api.entities.TEICoreference;
import ipipan.clarin.tei.api.entities.TEICorpusText;
import ipipan.clarin.tei.api.entities.TEIMention;
import ipipan.clarin.tei.api.entities.TEIMorph;
import ipipan.clarin.tei.api.entities.TEIParagraph;
import ipipan.clarin.tei.api.entities.TEISentence;
import ipipan.clarin.tei.api.io.TEI_IO;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.log4j.Logger;

public class Tei2Brat {

	private static Logger logger = Logger.getLogger(Tei2Brat.class);

	final private static TEI_IO teiIO = TEI_IO.getInstance();

	public static void main(String[] args) {

		if (args.length != 2) {
			logger.error("Wrong number of arguments! Should be: "
					+ Tei2Brat.class.getSimpleName() + " sourceDir targetDir");
			return;
		}

		File originalDir = new File(args[0]);
		File targetDir = new File(args[1]);

		if (!originalDir.isDirectory()) {
			logger.error(originalDir + " is not a directory!");
			return;
		}
		if (!targetDir.isDirectory()) {
			logger.error(targetDir + " is not a directory!");
			return;
		}

		recConvertDirs(originalDir, targetDir);
	}

	private static void recConvertDirs(File originalDir, File targetDir) {
		for (File f : originalDir.listFiles())
			if (f.isDirectory()) {
				boolean isTextDir = false;
				for (String child : f.list())
					if (child.matches("text.*\\.xml"))
						isTextDir = true;

				if (isTextDir) {
					logger.info("Converting from dir :" + f);
					try {
						TEICorpusText ct = teiIO.readFromNKJPDirectory(f);
						convert(ct, f.getName(), targetDir);
					} catch (Exception e) {
						logger.error("Error converting text: " + f
								+ " to dir: " + targetDir);
						logger.error(e.getLocalizedMessage());
					}
				} else {
					File correspDir = new File(targetDir + File.separator
							+ f.getName());
					correspDir.mkdir();
					recConvertDirs(f, correspDir);
				}
			}
	}

	private static void convert(TEICorpusText ct, String projectName,
			File targetDir) throws IOException {
		String filePrefix = targetDir.getPath() + File.separator + projectName;

		Map<String, Integer> morphId2StartPos = new HashMap<>();

		writeTxtFile(ct, filePrefix, morphId2StartPos);
		writeAnnFile(ct, filePrefix, morphId2StartPos);
	}

	private static void writeAnnFile(TEICorpusText ct, String filePrefix,
			Map<String, Integer> morphId2StartPos) {
		Map<TEIMention, String> mention2dominant = new HashMap<>();
		for (TEICoreference cor : ct.getAllCoreferences())
			if (cor.getType().equals("ident"))
				for (TEIMention m : cor.getMentions())
					mention2dominant.put(m, cor.getDominant());

		Map<TEIMention, String> mention2Id = new HashMap<>();
		int mid = 0;
		int nid = 0;
		try (BufferedWriter bw = new BufferedWriter(new FileWriter(filePrefix
				+ "." + "ann"))) {

			// mentions
			for (TEIParagraph p : ct.getParagraphs()) {
				for (TEISentence s : p.getSentences()) {

					for (TEIMention m : s.getAllMentions()) {
						String id = "T" + mid++;
						mention2Id.put(m, id);

						bw.append(id
								+ "\t"
								+ "Mention"
								+ morphsToPositions(m.getMorphs(),
										morphId2StartPos) + "\t"
								+ morphsToString(m.getMorphs(), s.getMorphs())
								+ "\n");

						String head = "";
						for (TEIMorph mo : m.getHeadMorphs())
							head += mo.hasNps() ? "" : " " + mo.getOrth();
						bw.append("#" + nid++ + "\t" + "empty " + id + "\t"
								+ "Head: \"" + head.trim() + "\"" + "\n");

						String dom = mention2dominant.get(m);
						if (dom != null) {
							bw.append("#" + nid++ + "\t" + "empty " + id + "\t"
									+ "Dominant: \"" + dom + "\"" + "\n");
						}

					}
				}
			}

			int quasiId = 0;

			// coreference
			for (TEICoreference c : ct.getAllCoreferences()) {

				List<String> ids = new ArrayList<>();
				for (TEIMention m : c.getMentions())
					ids.add(mention2Id.get(m));

				Collections.sort(ids);
				StringBuffer sb = new StringBuffer();
				for (String id : ids)
					sb.append(" " + id);

				if (c.getType().equals("ident")) {
					bw.append("*\tCoref " + sb.substring(1) + "\n");
				} else {
					String idSource = mention2Id.get(c.getSourceMention());

					Set<TEIMention> ms = new HashSet<>(c.getMentions());
					if (ms.size() != 2)
						throw new Exception("Quasi relation without 2 members!");
					ms.remove(c.getSourceMention());
					String idTarget = mention2Id.get(ms.iterator().next());

					bw.append("R" + quasiId++ + "\tQuasi Arg1:" + idSource
							+ " Arg2:" + idTarget + "\n");
				}

			}

		} catch (Exception e) {
			logger.error("Error writing ann file: " + e.getLocalizedMessage());
		}
	}

	private static String morphsToPositions(List<TEIMorph> morphs,
			Map<String, Integer> morphId2StartPos) {

		StringBuffer sb = new StringBuffer();
		int pos = morphId2StartPos.get(morphs.get(0).getId());
		sb.append(" " + pos + " ");
		boolean first = true;
		for (TEIMorph m : morphs) {
			int corr = (first || m.hasNps()) ? 0 : 1;
			pos += corr;
			if (pos != morphId2StartPos.get(m.getId())) {
				sb.append((pos - corr) + ";");
				pos = morphId2StartPos.get(m.getId());
				sb.append(pos + " ");
			}
			pos += m.getOrth().length();
			first = false;
		}
		sb.append(pos);

		return sb.toString();
	}

	private static void writeTxtFile(TEICorpusText ct, String filePrefix,
			Map<String, Integer> morphId2StartPos) {
		try (BufferedWriter bw = new BufferedWriter(new FileWriter(filePrefix
				+ "." + "txt"))) {

			int pos = 0;
			for (TEIParagraph p : ct.getParagraphs()) {
				boolean first = true;
				for (TEISentence s : p.getSentences()) {
					for (TEIMorph m : s.getMorphs()) {
						String space = (m.hasNps() || first) ? "" : " ";
						pos += space.length();
						morphId2StartPos.put(m.getId(), pos);

						pos += m.getOrth().length();
						first = false;

						bw.append(space + m.getOrth());
					}
				}
				bw.append("\n\n");
				pos += 2;
			}

		} catch (IOException e) {
			logger.error("Error writing text file: " + e.getLocalizedMessage());
		}
	}

	private static String morphsToString(List<TEIMorph> morphs,
			List<TEIMorph> sentMorphs) {

		Iterator<TEIMorph> iterator = sentMorphs.iterator();
		TEIMorph sentMorph = iterator.next();
		StringBuffer sb = new StringBuffer();
		for (TEIMorph m : morphs) {
			int i = 0;
			// we check for discont spans
			while (iterator.hasNext() && !sentMorph.equals(m)) {
				i++;
				sentMorph = iterator.next();
			}
			if (i > 1 && m.hasNps())
				sb.append(" ");
			sb.append((m.hasNps() ? "" : " ") + m.getOrth());
		}
		return sb.toString().trim();
	}

}
