/*
 * 
 *  Copyright (C) 2011 Mateusz Kopec
 *
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program.  If not, see http://www.gnu.org/licenses/.
 *
 */
package annotation;

import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import utils.TakipiManager;
import basic.Context;
import corpusapi.ContinueMode;
import corpusapi.Corpus;
import corpusapi.CorpusTools;
import corpusapi.Sense;
import corpusapi.SenseInventory;
import corpusapi.SenseStatistics;
import corpusapi.tei.TEICorpus;
import corpusapi.tei.TEICorpusText;
import corpusapi.tei.TEICorpusTools;
import corpusapi.tei.TEIInterpretation;
import corpusapi.tei.TEIMorphoSegmentGroup;
import corpusapi.tei.TEISegment;

/**
 * Responsible for creation of word experts and annotation.
 * 
 * @author Mateusz Kopec
 * 
 */
public class Annotator {

	/**
	 * Annotate corpus using given word experts.
	 * 
	 * @param corpus
	 *            to be annotated
	 * @param filename
	 *            to save annotations into
	 * @param wordExperts
	 *            used to annotate
	 */
	public void annotateMulti(final TEICorpus corpus, String filename, Map<String, List<WordExpert>> wordExperts) {
		System.out.println("Annotating...");

		int processedSegments = 0;
		int processedTexts = 0;

		int numberOfTexts = corpus.getCorpusTextIds().size();
		int c = 0;

		for (String corpusTextId : corpus.getCorpusTextIds()) {

			if (++c % (numberOfTexts / 10) == 0)
				System.out.println("Processing text nr: " + c + " out of " + numberOfTexts);

			TEICorpusText corpusText = corpus.getCorpusText(corpusTextId);
			int segmentsInText = 0;
			TEISegment segment = null;

			try {
				// get first segment in text
				segment = corpusText.getFirstSegment();
				if (segment == null) {
					throw new Exception("NULL first segment!");
				}

				AnnSensesTXT ann_sensesTXT = new AnnSensesTXT(corpusText.getPath());

				// iterate through all segments in text and find polysemous ones
				do {
					segmentsInText++;

					TEIMorphoSegmentGroup morph = (TEIMorphoSegmentGroup) segment
							.getSegmentGroup(TEIMorphoSegmentGroup.class);
					String baseForm = morph.getChosenInterpretation().getBase();

					if (wordExperts.containsKey(baseForm)) {
						// if there is expert for word
						Context context = loadContextForSegment(segment);

						List<WordExpert> wes = wordExperts.get(baseForm);
						List<String> answers = new ArrayList<String>();
						for (WordExpert we : wes)
							answers.add("NKJP_WSI.xml#" + we.disambiguate(context).getId());

						ann_sensesTXT.add(segment, answers);
					}

				} while ((segment = segment.getNext(ContinueMode.ALWAYS_CONTINUE)) != null);

				ann_sensesTXT.saveAsTxt(filename);
				ann_sensesTXT = null;

				processedSegments += segmentsInText;
				processedTexts++;

			} catch (Exception e) {
				e.printStackTrace();
				System.out.println(corpusText.getPath() + " Skipping text because of: "
						+ e.getMessage().replaceAll("\\n", " "));
				continue;
			} finally {
				corpusText.closeCorpusText();
				corpusText = null;
			}
		}
		System.out.println("Processed " + processedTexts + " texts out of " + corpus.getCorpusTextIds().size());
		System.out.println("Processed segments: " + processedSegments);
		System.out.println("Done.");
	}

	/**
	 * Size of the context is 2*WINDOW_SIZE+1, of course can be less if context
	 * is shorter. Set to big value, because it can be shortened in later
	 * processing.
	 */
	private final static int WINDOW_SIZE = 50;

	/**
	 * Loads context for given segment.
	 * 
	 * @param senseSegment
	 * @return context
	 * @throws Exception
	 */
	private Context loadContextForSegment(TEISegment senseSegment) throws Exception {
		Context result = new Context();

		int relativePosition = 0;
		TEISegment currentSegment = senseSegment;

		// looking for first segment of context
		while ((relativePosition > -WINDOW_SIZE) && (currentSegment.getPrev(ContinueMode.CONTINUOUS) != null)) {
			currentSegment = currentSegment.getPrev(ContinueMode.CONTINUOUS);
			relativePosition--;
		}

		result.setKeywordIndex(-relativePosition);

		TEIMorphoSegmentGroup morpho = (TEIMorphoSegmentGroup) currentSegment
				.getSegmentGroup(TEIMorphoSegmentGroup.class);
		TEIInterpretation interp = morpho.getChosenInterpretation();

		result.addWord(currentSegment.getOrth(), interp.getBase());

		// process sequentially context
		while ((relativePosition < WINDOW_SIZE) && (currentSegment.getNext(ContinueMode.CONTINUOUS) != null)) {
			currentSegment = currentSegment.getNext(ContinueMode.CONTINUOUS);
			relativePosition++;

			morpho = (TEIMorphoSegmentGroup) currentSegment.getSegmentGroup(TEIMorphoSegmentGroup.class);
			interp = morpho.getChosenInterpretation();
			result.addWord(currentSegment.getOrth(), interp.getBase());
		}

		return result;
	}

	/**
	 * Creates a map of Most Frequent Sense decisions in given corpus.
	 * 
	 * @param dict
	 *            sense dictionary
	 * @param corpus
	 * @return map: orth -> most frequent sense
	 */
	private Map<String, String> createMFSDecs(SenseInventory dict, TEICorpus corpus) {
		// calculating mfs decisions : orth -> decision
		CorpusTools ct = new TEICorpusTools();
		SenseStatistics globalStats = ct.getSenseStatistics(corpus);
		Map<String, String> mfsDecsId = ct.getMFSDecisionsfromStats(globalStats);
		Map<String, String> mfsDecsOrth = new HashMap<String, String>();
		for (String lemma : dict.getAllPolysemousLemmas())
			mfsDecsOrth.put(lemma, mfsDecsId.get(dict.getSenseEntryIdFromOrth(lemma)));

		return mfsDecsOrth;
	}

	/**
	 * Creates word experts.
	 * 
	 * @param dict
	 *            dictionary of senses
	 * @param methods
	 *            descriptions of methods
	 * @param corpus
	 *            needed only for MFS expert
	 * @param takipiManager
	 *            to takipize sense glosses
	 * @return map: word -> word's experts
	 * @throws Exception
	 */
	public Map<String, List<WordExpert>> createWordExperts(SenseInventory dict, List<String> methods, TEICorpus corpus,
			TakipiManager takipiManager) throws Exception {

		Map<String, String> mfsDecsOrth = null; // needed for mfs expert

		Map<String, List<WordExpert>> wordExperts = new HashMap<String, List<WordExpert>>();
		for (String lemma : dict.getAllPolysemousLemmas()) {
			Collection<? extends Sense> senses = dict.getSensesForLemma(lemma);

			List<WordExpert> experts = new ArrayList<WordExpert>();
			for (String params : methods) {
				String[] splitted = params.split(":", 2);
				String weClass = splitted[0];

				if (weClass.equalsIgnoreCase("lesk")) {
					String weParams = splitted[1];
					experts.add(new WELesk(lemma, senses, weParams, takipiManager));

				} else if (weClass.equalsIgnoreCase("random")) {
					experts.add(new WERandom(lemma, senses));

				} else if (weClass.equalsIgnoreCase("mfs")) {
					if (mfsDecsOrth == null)
						mfsDecsOrth = createMFSDecs(dict, corpus);
					experts.add(new WEMFS(lemma, senses, mfsDecsOrth.get(lemma)));

				} else {
					System.out.println("Wrong expert specification: " + params);
				}
			}

			wordExperts.put(lemma, experts);
		}

		takipiManager.saveCache();
		return wordExperts;
	}

	/**
	 * Create word MFS experts.
	 * 
	 * @param dict
	 *            dictionary of senses
	 * @param corpus
	 *            to calculate mfs from
	 * @return map: orth to list with one mfs expert for word
	 */
	public Map<String, List<WordExpert>> createWordExpertsMfs(SenseInventory dict, Corpus corpus) {

		// calculating mfs decisions : orth -> decision
		CorpusTools ct = new TEICorpusTools();
		SenseStatistics globalStats = ct.getSenseStatistics(corpus);
		Map<String, String> mfsDecsId = ct.getMFSDecisionsfromStats(globalStats);
		Map<String, String> mfsDecsOrth = new HashMap<String, String>();
		for (String lemma : dict.getAllPolysemousLemmas())
			mfsDecsOrth.put(lemma, mfsDecsId.get(dict.getSenseEntryIdFromOrth(lemma)));

		Map<String, List<WordExpert>> wordExperts = new HashMap<String, List<WordExpert>>();
		for (String lemma : dict.getAllPolysemousLemmas()) {
			Collection<? extends Sense> senses = dict.getSensesForLemma(lemma);

			List<WordExpert> experts = new ArrayList<WordExpert>();

			experts.add(new WEMFS(lemma, senses, mfsDecsOrth.get(lemma)));

			wordExperts.put(lemma, experts);
		}
		return wordExperts;
	}

}
