/*
 * 
 *  Copyright (C) 2011 Mateusz Kopec
 *
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program.  If not, see http://www.gnu.org/licenses/.
 *
 */

package evaluation;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Map.Entry;

import utils.Counter;
import corpusapi.tei.TEISense;
import corpusapi.tei.TEISenseEntry;
import corpusapi.tei.TEISenseInventory;

/**
 * Statistics of annotation
 * 
 * @author Mateusz Kopec
 * 
 */
public class AnnotationStats extends HashMap<String, Counter> {

	private static final long serialVersionUID = 2462719502167974479L;

	/**
	 * Maps: sense id -> orth of lexeme
	 */
	public Map<String, String> senseIdToOrth = new HashMap<String, String>();
	/**
	 * Maps: sense id -> part of speech of lexeme
	 */
	public Map<String, String> senseIdToPos = new HashMap<String, String>();

	/**
	 * Constructor
	 * 
	 * @param dict
	 *            dictionary of senses
	 */
	public AnnotationStats(TEISenseInventory dict) {
		for (Entry<String, TEISenseEntry> entry : dict.getSenseEntries().entrySet()) {
			Counter c = this.getCounterForLexeme(entry.getKey());
			for (TEISense subs : entry.getValue().getSenseList())
				c.put(subs.getId(), 0);
		}
	}

	/**
	 * Gets number of annotations
	 * 
	 * @return number
	 */
	public int getAllCount() {
		return getAllCount(null);
	}

	/**
	 * Gets number of annotations for given part of speech
	 * 
	 * @param pos
	 *            part of speech
	 * @return number
	 */
	public int getAllCount(String pos) {
		int result = 0;
		for (String s : this.getLexemeSet(pos))
			result += this.getCountForLexeme(s);
		return result;
	}

	/**
	 * Gets number of annotations for lexeme
	 * 
	 * @param lexeme
	 * @return number
	 */
	public int getCountForLexeme(String lexeme) {
		int result = 0;
		Counter c = this.get(lexeme);
		if (c != null)
			for (int i : c.values())
				result += i;
		return result;
	}

	private Counter getCounterForLexeme(String lexeme) {
		Counter c = this.get(lexeme);
		if (c == null) {
			c = new Counter();
			this.put(lexeme, c);
		}
		return c;
	}

	/** Adds an annotation of chosen lexeme with chosen sense id
	 * @param lexeme
	 * @param senseId
	 */
	public void addAnnotation(String lexeme, String senseId) {
		if (!lexeme.equalsIgnoreCase("NULL")) {
			Counter c = this.getCounterForLexeme(lexeme);
			c.increase(senseId);
		}
	}

	/** Returns all lexemes
	 * @return lexemes
	 */
	public Set<String> getLexemeSet() {
		return this.keySet();
	}

	/** Returns set of lexemes of chosen part of speech
	 * @param pos part of speech
	 * @return lexemes
	 */
	public Set<String> getLexemeSet(String pos) {
		if (pos == null)
			return getLexemeSet();

		Set<String> result = new HashSet<String>();
		for (String s : this.keySet()) {
			if (senseIdToPos.get(s).equals(pos))
				result.add(s);
		}
		return result;
	}

	/** Returns list of subsense ids of given lexeme
	 * @param lexeme
	 * @return list of subsense ids
	 */
	public List<String> getSubsensesList(final String lexeme) {
		List<String> result = new ArrayList<String>(this.getCounterForLexeme(lexeme).keySet());
		Collections.sort(result, new Comparator<String>() {

			@Override
			public int compare(String arg0, String arg1) {
				return Integer.valueOf(getCountForSubsense(lexeme, arg1)).compareTo(getCountForSubsense(lexeme, arg0));
			}
		});
		return result;
	}

	/** Get the RARE value for given subsense of a lexeme
	 * @param lexeme
	 * @param subsenseId
	 * @return result
	 */
	public double getSenseWeight(String lexeme, String subsenseId) {
		int all = this.getCountForLexeme(lexeme);
		int subsenseCount = this.getCountForSubsense(lexeme, subsenseId);
		if (subsenseCount == 0)
			return 0;
		return Math.log(1.0 * all / subsenseCount) + 1;
	}

	/** Returns number of annotations with particular subsense
	 * @param lexeme chosen lexeme
	 * @param subsenseId subsense of this lexeme
	 * @return number
	 */
	public int getCountForSubsense(String lexeme, String subsenseId) {
		Counter c = this.getCounterForLexeme(lexeme);
		Integer i = c.get(subsenseId);
		if (i != null)
			return i;
		return 0;
	}

	/** Returns MFS accuracy for a lexeme
	 * @param lexeme
	 * @return mfs accuracy
	 */
	public double getMfsForLexeme(String lexeme) {
		int max = 0;
		for (String subsenseId : getSubsensesList(lexeme)) {
			int count = getCountForSubsense(lexeme, subsenseId);
			if (count > max)
				max = count;
		}
		return 1.0 * max / getCountForLexeme(lexeme);
	}

	/** Returns list of pairs : <lexeme, MFS accuracy>, sorted from lowest mfs
	 * @return list
	 */
	public List<Entry<String, Double>> getSortedMfs() {
		return getSortedMfs(null);
	}

	/**
	 * Returns list of pairs : <lexeme, MFS accuracy>, sorted from lowest mfs
	 * (and filtered for only chosen part of speech).
	 * 
	 * @param pos part of speech
	 * @return list
	 */
	public List<Entry<String, Double>> getSortedMfs(String pos) {
		// rows sorting
		Map<String, Double> mfs = new HashMap<String, Double>();
		for (String key : this.getLexemeSet(pos))
			mfs.put(key, this.getMfsForLexeme(key));

		List<Entry<String, Double>> sorted = new ArrayList<Entry<String, Double>>(mfs.entrySet());
		Collections.sort(sorted, new Comparator<Entry<String, Double>>() {

			@Override
			public int compare(Entry<String, Double> o1, Entry<String, Double> o2) {
				return o1.getValue().compareTo(o2.getValue());
			}
		});
		return sorted;
	}

	/**
	 * Gets random accuracy for this annotation and given part of speech
	 * 
	 * @param pos
	 *            part of speech
	 * @return random accuracy
	 */
	public Double getTotalRandomAccuracy(String pos) {

		int totalCorrect = 0;
		int totalAll = 0;

		for (String lexeme : getLexemeSet(pos)) {
			totalCorrect += getCountForLexeme(lexeme) / getSubsensesList(lexeme).size();
			totalAll += getCountForLexeme(lexeme);
		}

		return (1.0 * totalCorrect) / totalAll;
	}

	/**
	 * Calculates MFS accuracy for given annotation and part of speech
	 * 
	 * @param pos
	 *            part of speech
	 * @return mfs accuracy
	 */
	public Double getTotalMFSAccuracy(String pos) {

		int totalCorrect = 0;
		int totalAll = 0;

		for (String lexeme : getLexemeSet(pos)) {
			int mfs = 0;
			int all = 0;
			for (String sub : getSubsensesList(lexeme)) {
				int c = getCountForSubsense(lexeme, sub);
				if (c > mfs) {
					mfs = c;
				}
				all += c;
			}
			totalCorrect += mfs;
			totalAll += all;
		}

		return (1.0 * totalCorrect) / totalAll;
	}

}
