package pl.waw.ipipan.zil.core.mmaxAPI;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;

import org.apache.log4j.Logger;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;

import pl.waw.ipipan.zil.core.mmaxAPI.types.Mention;
import pl.waw.ipipan.zil.core.mmaxAPI.types.Word;

public class Reader {

	final private static Logger logger = Logger.getLogger(Reader.class);

	public static Map<String, String> findMmaxTexts(File mmaxDir) {
		Map<String, String> result = new HashMap<>();
		recfindMmaxTexts(mmaxDir, result);
		return result;
	}

	private static void recfindMmaxTexts(File mmaxDir,
			Map<String, String> accumulator) {
		File[] files = mmaxDir.listFiles();
		Arrays.sort(files);
		for (File f : files)
			if (f.isFile() && f.getName().matches(".+\\.mmax"))
				accumulator.put(
						f.getName().substring(0, f.getName().lastIndexOf(".")),
						f.getAbsolutePath());
			else if (f.isDirectory())
				recfindMmaxTexts(f, accumulator);
	}

	public static Document loadDocument(File xmlFile)
			throws ParserConfigurationException, SAXException, IOException {
		DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
		dbFactory
				.setFeature(
						"http://apache.org/xml/features/nonvalidating/load-external-dtd",
						false);
		DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
		Document doc = dBuilder.parse(xmlFile);
		doc.normalize();
		return doc;
	}

	public static List<Mention> loadMentions(File mentions)
			throws ParserConfigurationException, SAXException, IOException {
		logger.debug("Loading mentions from file " + mentions);

		List<Mention> result = new ArrayList<>();
		Document doc = loadDocument(mentions);
		NodeList nl = doc.getElementsByTagName(Constants.MENTION);
		for (int i = 0; i < nl.getLength(); i++) {
			Node elem = nl.item(i);
			if (elem.getNodeType() == Node.ELEMENT_NODE) {
				Element mention = (Element) elem;

				String id = mention.getAttribute(Constants.MENTION_ID);
				String span = mention.getAttribute(Constants.SPAN);
				String mentionHead = mention
						.getAttribute(Constants.MENTION_HEAD);
				String nearIdentity = mention
						.getAttribute(Constants.NEAR_IDENTITY);
				String mentionGroup = mention
						.getAttribute(Constants.MENTION_GROUP);
				String bartMentionGroup = mention
						.getAttribute(Constants.BART_MENTION_GROUP);
				String dominant = mention.getAttribute(Constants.DOMINANT);
				String comment = mention.getAttribute(Constants.COMMENT);

				Mention m = new Mention(id, span, mentionHead, nearIdentity,
						mentionGroup, bartMentionGroup, dominant, comment);
				result.add(m);
			}
		}

		return result;
	}

	public static List<Word> loadWords(File words)
			throws ParserConfigurationException, SAXException, IOException {

		logger.debug("Loading words from file " + words);
		List<Word> result = new ArrayList<Word>();
		Document doc = loadDocument(words);
		NodeList nl = doc.getElementsByTagName(Constants.WORD);
		for (int i = 0; i < nl.getLength(); i++) {
			Node elem = nl.item(i);
			if (elem.getNodeType() == Node.ELEMENT_NODE) {
				Element word = (Element) elem;
				String id = word.getAttribute(Constants.WORD_ID);
				String base = word.getAttribute(Constants.BASE);
				String ctag = word.getAttribute(Constants.CTAG);
				String msd = word.getAttribute(Constants.MSD);
				String orth = word.getTextContent();
				boolean hasNps = word.getAttribute(Constants.HAS_NPS).equals(
						Constants.TRUE);
				boolean isLastInPar = word.getAttribute(Constants.LAST_IN_PAR)
						.equals(Constants.TRUE);
				boolean isLastInSent = word
						.getAttribute(Constants.LAST_IN_SENT).equals(
								Constants.TRUE);

				Word w = new Word(id, orth, base, ctag, msd, hasNps,
						isLastInPar, isLastInSent);
				result.add(w);
			}
		}

		return result;
	}

	public static List<String> parseSpan(String span, List<String> words) {
		List<String> parsed = new ArrayList<String>();

		for (String fragment : span.split(",")) {
			String[] spl = fragment.split("\\.\\.");
			String first = spl[0];
			String last = spl[spl.length - 1];
			if (first.equals(last)) {
				parsed.add(first);
			} else {
				Iterator<String> it = words.iterator();
				String curr = it.next();
				while (!curr.equals(first))
					curr = it.next();
				parsed.add(first);
				while (!curr.equals(last)) {
					curr = it.next();
					parsed.add(curr);
				}
			}
		}
		return parsed;
	}

	public static Map<Mention, List<Word>> parseAllSpans(List<Word> mmaxWords,
			List<Mention> mmaxMentions) {

		Map<String, Word> wordId2Word = new HashMap<>();
		List<String> wordIds = new ArrayList<>();
		for (Word w : mmaxWords) {
			String id = w.getId();
			wordId2Word.put(id, w);
			if (wordIds.contains(id))
				logger.error("Duplicate word id: " + id);
			wordIds.add(id);
		}

		Map<Mention, List<Word>> result = new HashMap<>();
		for (Mention m : mmaxMentions) {
			List<String> spanIds = parseSpan(m.getSpan(), wordIds);
			List<Word> span = new ArrayList<>();
			for (String id : spanIds)
				span.add(wordId2Word.get(id));
			result.put(m, span);
		}

		return result;
	}
}
