package corpusapi.tei;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.HashMap;
import java.util.List;
import java.util.Set;

import javax.xml.stream.XMLStreamException;

import corpusapi.ContinueMode;
import corpusapi.Corpus;
import corpusapi.CorpusFactory;
import corpusapi.CorpusTools;
import corpusapi.SegmentGroup;
import corpusapi.SenseSegmentGroup;
import corpusapi.SenseStatistics;

public class TEICorpusTools extends CorpusTools {

	HashMap<String, InCorpusPosition> corpusPositionsMap;

	public TEICorpusTools() {
		corpusPositionsMap = new HashMap<String, InCorpusPosition>();
	}

	@Override
	public String format(Corpus corpus, List<List<? extends SegmentGroup>> interp) {
		// TODO Auto-generated method stub
		return null;
	}

	private static enum Type {ALL, SENSE, SENSE_ENTRY};
	/** Pobiera kolejną SenseSegmentGroup. Np. getNextSense(corpus, "wyjsc.1").
	 * @param corpus dany korpus
	 * @param senseOrSenseEntry nazwa sensu lub senseEntry, lub null, gdy chcemy i to, i to.
	 * @return
	 */
	private SenseSegmentGroup getNextSense(Corpus corpus, String senseOrSenseEntry) {
		Type type;
		
		if (senseOrSenseEntry == null) {
			type = Type.ALL; // wszystkie sensyentry
		} else {
			// rozpoznaj czy mamy doczynienia z sensem czy sense entry; w tei
			// sense konczy sie .#sensu
			// wyjsc.INNE jest taki sens
			if (senseOrSenseEntry.matches(".*\\.[0-9]+") || senseOrSenseEntry.matches(".*\\.INNE")) {
				type = Type.SENSE_ENTRY; // senseEntry
			} else {
				type = Type.SENSE; // sense
			}
		}

		// czy juz mamy gdzies zapamietana pozycje? jesli tak to przypomnijmy ja sobie
		InCorpusPosition icp = null;
		if (!corpusPositionsMap.containsKey(corpus.getId() + "+" + senseOrSenseEntry)) {
			// pierwszy raz ten korpus jest grany
			icp = new InCorpusPosition(corpus);
			corpusPositionsMap.put(corpus.getId() + "+" + senseOrSenseEntry, icp);
		} else {
			icp = corpusPositionsMap.get(corpus.getId() + "+" + senseOrSenseEntry);
		}
		TEISenseSegmentGroup s = null;
		while ((s = icp.getNext()) != null) {
			if (type == Type.ALL) {// wszystkie
				return s;
			}

			String toCompareFromS = s.getSenseId().substring(s.getSenseId().indexOf('#') + 1);
			if (type != Type.SENSE_ENTRY) {
				if (toCompareFromS.equals("NULL")) {
					System.out.println("Warning! NULL sense");
					continue;
				}
				toCompareFromS = toCompareFromS.substring(0,toCompareFromS.lastIndexOf('.'));
			}

			if (toCompareFromS.equals(senseOrSenseEntry)) {
				return s;
			}
		}
		return null;
	}

	public SenseSegmentGroup getNextSenseEntryInstance(Corpus corpus) {
		return getNextSense(corpus, null);
	}

	@Override
	public SenseSegmentGroup getNextSenseEntryInstance(Corpus corpus, String senseEntry) {
		return getNextSense(corpus, senseEntry);
	}

	@Override
	public SenseSegmentGroup getNextSenseInstance(Corpus corpus, String sense) {
		return getNextSense(corpus, sense);
	}

	protected SenseStatistics makeSenseStatistics(Corpus corpus, Set<String> senseEntries) {

		TEISenseStatistics stats = new TEISenseStatistics(corpus.getId());

		corpus.open();

		// System.out.println("corpusID: "+corpus.getId()+", teksty: "+corpus.getCorpusTextIds()+" ");

		List<String> textIDs = corpus.getCorpusTextIds();

		for (String textID : textIDs) {
			TEICorpusText corpusText = (TEICorpusText) corpus.getCorpusText(textID);
//			System.out.println("\nPobrano tekst: " + corpusText.getPath());
			SenseSegmentGroup senseGroup = null;
			try {
				senseGroup = (SenseSegmentGroup) corpusText.getFirstSegmentGroup(SenseSegmentGroup.class);
			} catch (FileNotFoundException e) {
				// TODO Auto-generated catch block
//				e.printStackTrace();
			} catch (XMLStreamException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			} catch (IOException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
			if (senseGroup != null) {
				do {					
					String sense = senseGroup.getSenseId();
					sense = sense.substring(sense.indexOf('#') + 1);					
					if (sense.equals("NULL")) {
//						System.out.println("Warning! NULL sense at: ");
//						System.out.println("\t"+corpusText.getPath()+":"+senseGroup.getSegmentIds().get(0)+":"+senseGroup.getSenseId());
						continue;
					}
					String senseEntry = sense.substring(0, sense.lastIndexOf('.'));
					if (senseEntries == null || senseEntries.contains(senseEntry)) {
						stats.addOccurence(senseEntry, sense);
					}					
										
				} while ((senseGroup = (SenseSegmentGroup) senseGroup.getNext(ContinueMode.ALWAYS_CONTINUE)) != null);
			}
			corpusText.closeCorpusText();
			// System.out.println("DONE");

		}

		stats.complete = (senseEntries == null);

		return stats;
	}

	public static void main(String[] args) {
		CorpusFactory factory = CorpusFactory.getInstance();
		Corpus corpus = factory.getCorpus("data/nowy_format.xml", true);
		corpus.open();
		TEICorpusTools ct = new TEICorpusTools();
		
		TEISenseStatistics stats = (TEISenseStatistics)	ct.getSenseStatistics(corpus,"raz");
		System.out.println(stats.getSenseEntryCount("raz")); 
		Set<String> ss = stats.senseEntrySenseMap.get("raz");
		System.out.println(ss.toString());
		for (String s : ss) {
			System.out.println(s+":"+stats.getSenseCount(s));
		}
		 
		TEISenseSegmentGroup s = null;
		System.out.println(ct.getNextSenseEntryInstance(corpus, "wyjsc").getSenseId());
		System.out.println(ct.getNextSenseEntryInstance(corpus, "raz").getSenseId());
		System.out.println(ct.getNextSenseEntryInstance(corpus, "wyjsc").getSenseId());
		System.out.println(ct.getNextSenseEntryInstance(corpus, "raz").getSenseId());
		System.out.println(ct.getNextSenseEntryInstance(corpus, "raz.2").getSenseId());
//		System.out.println(ct.getNextSenseEntryInstance(corpus, "raz.3").getSenseId());
		while ((s = (TEISenseSegmentGroup) ct.getNextSenseInstance(corpus, "raz.2")) != null) {
			System.out.println("wystapienie: " + s.getSenseId() + " referencja do segmentu: " + s.getFirstSegment());
		}

		while ((s=(TEISenseSegmentGroup)ct.getNextSenseEntryInstance(corpus))!=null) {
			System.out.println(s.getSenseId());
		}

		System.out.println("OK");
		ct.getSenseStatistics(corpus);
		System.out.println("OK");
		ct.makeSenseStatistics(corpus, null);
	}

}

/**
 * pomocniczna klasa do pamietania pozycji w korpusie (czyli corpusText i
 * segmentGroup)
 * 
 * @author Project Manager
 * 
 */
class InCorpusPosition {
	Corpus corpus;
	String textId;
	TEICorpusText text;
	TEISenseSegmentGroup ssgroup;
	int textNumber = 0;

	public InCorpusPosition(Corpus corpus) {
		super();
		this.corpus = corpus;
		this.textId = null;
		this.ssgroup = null;
		this.text = null;
	}

	/** Pobiera kolejną SenseSegmentGroup z korpusu.
	 * @return SenseSegmentGroup lub null, gdy nie ma.
	 */
	public TEISenseSegmentGroup getNext() {
		if (ssgroup == null) { // start
			if (!nextText()) { // nie ma wiecej tekstow
				return null;
			}
		} else {// mamy ssgrupe
			if ((ssgroup = (TEISenseSegmentGroup) ssgroup.getNext()) == null) {// nie ma nastepnej w tekscie
				if (!nextText()) { // nie ma wiecej tekstow
					return null;
				}
			}
		}

		return ssgroup;
	}

	/** Ustawia ssgroup na pierwszą SenseSegmentGroup w tekście. 
	 * @return czy jest taka grupa
	 */
	private boolean getFirstGroupInText() {
		TEISegmentGroup sg = null;
		try {
			sg = text.getFirstSegmentGroup(TEISenseSegmentGroup.class);
		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}		
		return (ssgroup = (TEISenseSegmentGroup) sg) != null;
	}
	
	/** Przechodzi do kolejnego tekstu korpusu zawierającego SenseSegmentGroup.
	 * @return czy jest taki kolejny tekst
	 */
	private boolean nextText() {		
		if (!(textNumber < corpus.getCorpusTextIds().size())) {// jezeli nie ma juz tekstow
			return false;
		}
		if (text != null) {
			text.closeCorpusText();
		}
		textId = corpus.getCorpusTextIds().get(textNumber);
		text = (TEICorpusText) corpus.getCorpusText(textId);
		textNumber++;
		if (!getFirstGroupInText()) {// jezeli nie ma w tekscie zadnej ssgroup, to nastepny tekst
			return nextText();
		}
		return true;
	}

}