/*
 * 
 *  Copyright (C) 2011 Mateusz Kopec
 *
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program.  If not, see http://www.gnu.org/licenses/.
 *
 */
package evaluation;

import java.io.BufferedWriter;
import java.io.IOException;
import java.io.Writer;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;

import utils.Utils;
import corpusapi.ContinueMode;
import corpusapi.Corpus;
import corpusapi.SenseSegmentGroup;
import corpusapi.tei.TEICorpusText;
import corpusapi.tei.TEISenseInventory;
import corpusapi.tei.TEISenseSegmentGroup;
import evaluation.AnnotationReader.SegmentAnnotation;

public class Evaluator {

	public enum Sorting {
		MFS_IMPROVEMENT, RARE, ACCURACY
	}

	public Collection<SingleMethodEvaluation> evaluate(Corpus corpus, AnnotationReader annotationReader,
			TEISenseInventory dict) {
		System.out.println("Evaluating given annotation on given corpus...");

		// global stats
		int segmentsUnknownInOracle = 0;
		int segmentsAll = 0; // known in oracle
		int textsWithoutErrors = 0;
		int textsWithErrors = 0;

		// stats of each method
		Map<String, SingleMethodEvaluation> allMethods = new HashMap<String, SingleMethodEvaluation>();
		for (String methodName : annotationReader.getAnnotatorsNames())
			allMethods.put(methodName, new SingleMethodEvaluation(methodName, dict));

		// for each text in corpus
		for (String corpusTextId : corpus.getCorpusTextIds()) {
			TEICorpusText corpusText = (TEICorpusText) corpus.getCorpusText(corpusTextId);

			try {
				annotationReader.setCurrentCorpusText(corpusText);
				SegmentAnnotation singleSegmentAnnotations = null;

				// read all gold standard annotations for given text
				Map<String, String> goldStandardAnnotations = new HashMap<String, String>();
				TEISenseSegmentGroup currentSsg = (TEISenseSegmentGroup) corpusText
						.getFirstSegmentGroup(SenseSegmentGroup.class);
				while (currentSsg != null) {
					goldStandardAnnotations.put(currentSsg.getSegmentIds().get(0), currentSsg.getSenseId());
					currentSsg = currentSsg.getNext(ContinueMode.ALWAYS_CONTINUE);
				}

				// iterate through annotated polysemous segments in text
				while ((singleSegmentAnnotations = annotationReader.getNextTaggedSegment()) != null) {

					String segmentId = singleSegmentAnnotations.segmentId;
					String lexeme = singleSegmentAnnotations.lexeme;

					String correctSenseId = goldStandardAnnotations.get(segmentId);

					if (correctSenseId == null) {
						segmentsUnknownInOracle++;

					} else { // calculation of single annotation score

						segmentsAll++;

						for (Entry<String, String> annotation : singleSegmentAnnotations.annotations.entrySet()) {
							String methodId = annotation.getKey();
							String predictedSenseId = annotation.getValue();
							SingleMethodEvaluation methodEvaluation = allMethods.get(methodId);

							String subsenseId = correctSenseId.split("#")[1];

							if (predictedSenseId.equals(""))
								methodEvaluation.segmentsTaggedAsUnknown++;
							else if (predictedSenseId.equals(correctSenseId)) {
								methodEvaluation.addCorrectAnnotation(lexeme, subsenseId);
							} else
								methodEvaluation.addIncorrectAnnotation(lexeme, subsenseId);
						}
					}
				}

				textsWithoutErrors++;

			} catch (Exception e) {
				e.printStackTrace();
				System.out.println(corpusText.getPath() + " Skipping text because of: "
						+ e.getMessage().replaceAll("\\n", " "));
				textsWithErrors++;
				continue;
			} finally {
				corpusText.closeCorpusText();
			}
		}
		System.out.println();
		System.out.println("Texts - all          : " + (textsWithErrors + textsWithoutErrors));
		System.out.println("Texts without errors : " + textsWithoutErrors);
		System.out.println("Texts with errors    : " + textsWithErrors);
		System.out.println();
		System.out.println("Segments - all (known in oracle) : " + segmentsAll);
		System.out.println("Segments - unknown in oracle     : " + segmentsUnknownInOracle);
		System.out.println();

		return allMethods.values();
	}

	/**
	 * Sorts evaluation results of many methods, according to: total accuracy or
	 * number of lexemes for which the method is better than mfs or heuristic
	 * measure
	 * 
	 * @param evals
	 *            evaluation results of many methods
	 * @param sortingType
	 *            sorting order
	 * @param goldenStandard
	 *            golden standard for mfs calcuation
	 * @return sorted evaluation results of many methods
	 */
	private List<SingleMethodEvaluation> getSortedEvaluations(Collection<SingleMethodEvaluation> evals,
			Sorting sortingType, final AnnotationStats goldenStandard) {

		List<SingleMethodEvaluation> sorted = new ArrayList<SingleMethodEvaluation>(evals);

		Comparator<SingleMethodEvaluation> comp = null;

		if (Sorting.MFS_IMPROVEMENT.equals(sortingType)) {
			// results sorted from the biggest number of improved lexemes
			comp = new Comparator<SingleMethodEvaluation>() {
				@Override
				public int compare(SingleMethodEvaluation o1, SingleMethodEvaluation o2) {
					return Integer.valueOf(o2.getNumberOfImprovements(null, goldenStandard)).compareTo(
							o1.getNumberOfImprovements(null, goldenStandard));
				}
			};
		} else if (Sorting.ACCURACY.equals(sortingType)) {
			// results sorted from the best total accuracy
			comp = new Comparator<SingleMethodEvaluation>() {
				@Override
				public int compare(SingleMethodEvaluation o1, SingleMethodEvaluation o2) {
					return o2.getTotalAccuracy().compareTo(o1.getTotalAccuracy());
				}
			};
		} else if (Sorting.RARE.equals(sortingType)) {
			// results sorted from the maximum heuristic measure
			comp = new Comparator<SingleMethodEvaluation>() {
				@Override
				public int compare(SingleMethodEvaluation o1, SingleMethodEvaluation o2) {
					return Double.valueOf(o2.getWeightedRank(null, goldenStandard)).compareTo(
							o1.getWeightedRank(null, goldenStandard));
				}
			};
		} else { // no sorting
			return sorted;
		}

		Collections.sort(sorted, comp);
		return sorted;
	}

	/**
	 * Prints evaluation results to a given writer.
	 * 
	 * @param bw
	 *            writer to output results
	 * @param smes
	 *            method evaluations
	 * @param goldenStandard
	 * @throws IOException
	 */
	public void printEvaluationResults(BufferedWriter bw, Collection<SingleMethodEvaluation> smes,
			AnnotationStats goldenStandard) throws IOException {

		String separator = ",";

		// methods are sorted
		for (SingleMethodEvaluation result : smes) {
			String[] splitted = result.getMethodId().split(":");
			if (splitted[0].equals("lesk")) {
				for (String s : splitted) {
					bw.append(s + separator);
				}
			} else {
				bw.append(splitted[0] + separator);
				for (int i = 0; i < 12; i++)
					bw.append(separator);
			}

			bw.append("" + Utils.round(result.getTotalAccuracy(null, goldenStandard)));
			bw.append(separator + result.getNumberOfImprovements(null, goldenStandard));
			bw.append(separator + Utils.round(result.getWeightedRank(null, goldenStandard)));

			bw.append("\n");
		}
	}

	/**
	 * Prints evaluation results to a given writer. Each row is organized as
	 * follows: lexeme, baseline for lexeme, results for this lexeme of all
	 * methods
	 * 
	 * @param w
	 *            writer
	 * @param results
	 *            method evaluations
	 * @param goldenStandard
	 * @throws IOException
	 */
	public void printEvaluationResultsOnLexemes(Writer w, Collection<SingleMethodEvaluation> results,
			AnnotationStats goldenStandard) throws IOException {

		// sortowanie wierszy
		List<Entry<String, Double>> sorted = goldenStandard.getSortedMfs();

		String separator = ",";

		for (Entry<String, Double> key : sorted) {
			double baseAcc = key.getValue();
			double randomAcc = 1.0 / goldenStandard.getSubsensesList(key.getKey()).size();

			String lemma = key.getKey();

			List<String> line = new ArrayList<String>();
			line.add(goldenStandard.senseIdToOrth.get(lemma));
			line.add(Integer.toString(goldenStandard.getCountForLexeme(lemma)));

			List<String> subline = new ArrayList<String>();
			for (String subsense : goldenStandard.getSubsensesList(lemma))
				subline.add(Integer.toString(goldenStandard.getCountForSubsense(lemma, subsense)));
			line.add(Utils.join(subline, "/"));

			// subline.clear();
			// for (String subsense : goldenStandard.getSubsensesList(lemma))
			// subline.add(Double.toString(Utils.round(goldenStandard.getSenseWeight(lemma,
			// subsense))));
			// line.add(Utils.join(subline, "/"));

			line.add(Double.toString(Utils.round(baseAcc)));
			line.add(Double.toString(Utils.round(randomAcc)));

			int nr = 0;
			for (SingleMethodEvaluation result : results) {
				nr++;

				// subline.clear();
				// for (String subsense :
				// goldenStandard.getSubsensesList(lemma)) {
				// Double acc = result.getSubsenseAccuracy(lemma, subsense);
				// if (acc == null)
				// subline.add("-");
				// else
				// subline.add(Double.toString(Utils.round(acc)));
				// }
				// line.add(Utils.join(subline, "; "));

				double acc = result.getLexemeAccuracy(lemma);
				line.add(Double.toString(Utils.round(acc)));
			}

			w.append(Utils.join(line, separator) + "\n");
		}
	}

	/**
	 * Selects best methods from a method evaluation list.
	 * 
	 * @param results
	 *            methods evaluations to chose from
	 * @param sortingType
	 *            type of comparison
	 * @param howMany
	 *            how many best to select
	 * @param goldenStandard
	 *            for comparement to mfs
	 * @return selected methods
	 */
	public Collection<SingleMethodEvaluation> selectBestMethods(Collection<SingleMethodEvaluation> results,
			Sorting sortingType, Integer howMany, AnnotationStats goldenStandard) {
		List<SingleMethodEvaluation> smes = getSortedEvaluations(results, sortingType, goldenStandard);
		if (howMany != null)
			return new ArrayList<SingleMethodEvaluation>(smes.subList(0, howMany > smes.size() ? smes.size() : howMany));
		else
			return new ArrayList<SingleMethodEvaluation>(smes);
	}

	/*
	 * public void
	 * printEvaluationResultsForChart(Collection<SingleMethodEvaluation>
	 * selectedResults, AnnotationStats goldenStandard) { List<Entry<String,
	 * Double>> sorted = goldenStandard.getSortedMfs();
	 * 
	 * for (Entry<String, Double> e : sorted) {
	 * System.out.print(e.getKey()+" ");
	 * System.out.print(goldenStandard.senseIdToPos.get(e.getKey())+" ");
	 * System.out.print(e.getValue()+" ");
	 * System.out.print(1.0/goldenStandard.getSubsensesList
	 * (e.getKey()).size()+" ");
	 * 
	 * for (SingleMethodEvaluation s : selectedResults) { double acc =
	 * s.getLexemeAccuracy(e.getKey()); System.out.print(acc+" "); }
	 * 
	 * System.out.println(); } System.out.println(); }
	 */
}
