package corpusapi.tei;

import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;

import javax.xml.namespace.QName;
import javax.xml.stream.XMLEventReader;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.events.Characters;
import javax.xml.stream.events.EndElement;
import javax.xml.stream.events.StartElement;
import javax.xml.stream.events.XMLEvent;

import corpusapi.Sense;
import corpusapi.SenseEntry;
import corpusapi.SenseInventory;
import corpusapi.util.MultiSelection;

/**
 * 
 * założone jest, że sensy mają jedynie dwa poziomy zagłębienia
 * 
 * @author Rafał Młodzki
 * 
 */
public class TEISenseInventory implements SenseInventory {

	private Map<String, TEISenseEntry> senseEntries;
	private String configFile;

	public String getConfigFile() {
		return configFile;
	}

	private String title;
	private Map<String, String> senseEntriesBaseMap = new HashMap<String, String>();

	public static TEISenseInventory createFromXML(String senseInventoryFile) {
		final String nameSpaceURI = "http://www.w3.org/XML/1998/namespace";
		TEISenseInventory teiSI = new TEISenseInventory();
		teiSI.configFile = senseInventoryFile;

		TEISenseEntry currentSenseEntry = null;
		TEISense currentSense = null;
		TEISense currentSubSense = null;

		String currentStartElementName = null;

		boolean inDefinition = false;
		boolean inQuote = false;
		boolean inSelection = false;

		String entryID = null;
		String senseID = null;

		int senseDepth = 0; // jak zaglebiony sens; 1,2,...

		// StringBuilder currentDefinition = new StringBuilder(); //do
		// przygotowania definicji
		MultiSelection currentDefinition = new MultiSelection();
		MultiSelection currentQuote = new MultiSelection();

		/* Odczytaj plik słownika */
		XMLInputFactory factory = XMLInputFactory.newInstance();
		InputStreamReader reader;
		try {
			reader = new InputStreamReader(new FileInputStream(senseInventoryFile), "UTF-8");
			XMLEventReader eventReader = factory.createXMLEventReader(reader);
			while (eventReader.hasNext()) {
				XMLEvent event = eventReader.nextEvent();

				/*
				 * START ELEMENTU
				 */
				if (event.isStartElement()) {
					StartElement element = (StartElement) event;
					currentStartElementName = element.getName().getLocalPart();

					// System.out.println("Start Element: " +
					// element.getName());

					if ("entry".equalsIgnoreCase(element.getName().getLocalPart())) {

						entryID = element.getAttributeByName(new QName(nameSpaceURI, "id")).getValue();
						currentSenseEntry = new TEISenseEntry(entryID, teiSI);
					}

					if ("sense".equalsIgnoreCase(element.getName().getLocalPart())) {
						senseDepth++;
						if (senseDepth == 1) {// sensy gruboziarniste
							senseID = element.getAttributeByName(new QName(nameSpaceURI, "id")).getValue();
							currentSense = new TEISense(senseID, null, senseDepth);
							currentSense.setN(Integer.parseInt(element.getAttributeByName(new QName("n")).getValue()));
						}
						if (senseDepth == 2) {// sensy drobnoziarniste
												// (podsensy)
							currentSubSense = new TEISense(null, currentSense, senseDepth);
							currentSubSense.setN(Integer
									.parseInt(element.getAttributeByName(new QName("n")).getValue()));
						}

					}
					if ("def".equalsIgnoreCase(element.getName().getLocalPart())) {
						inDefinition = true;
						currentDefinition = new MultiSelection();
						// System.out.print(senseDepth+" ");
					}

					if ("quote".equalsIgnoreCase(element.getName().getLocalPart())) {
						inQuote = true;
						currentQuote = new MultiSelection();
						// System.out.print(senseDepth+" ");
					}

					if ("hi".equalsIgnoreCase(element.getName().getLocalPart())) {
						inSelection = true;
					}
				}

				/*
				 * ZNAKOWA TREŚĆ ELEMENTU
				 */

				if (event.isCharacters()) {
					Characters element = (Characters) event;
					if (currentStartElementName.equalsIgnoreCase("pos")) {
						currentSenseEntry.setPOS(element.getData());
					}

					if (currentStartElementName.equalsIgnoreCase("orth")) {
						currentSenseEntry.setOrthForm(element.getData());
					}

					if (inDefinition) {
						// currentDefinition.append(element.getData());
						currentDefinition.addText(element.getData(), inSelection);
					}

					if (inQuote) {
						currentQuote.addText(element.getData(), inSelection);
					}

					if (currentStartElementName.equalsIgnoreCase("title")) {// tytul
																			// calego
																			// sens
																			// inwentory
						teiSI.title = element.getData();
					}
				}

				/*
				 * KONIEC ELEMENTU
				 */
				if (event.isEndElement()) {
					EndElement element = (EndElement) event;

					// System.out.println(element.getName().getLocalPart().toString());
					currentStartElementName = ""; // ?
					if ("entry".equalsIgnoreCase(element.getName().getLocalPart().toString())) {
						teiSI.addSenseEntry(currentSenseEntry.getID(), currentSenseEntry);
					}

					if ("sense".equalsIgnoreCase(element.getName().getLocalPart())) {
						if (senseDepth == 1) {
							currentSenseEntry.addSense(currentSense);
						}
						if (senseDepth == 2) {
							currentSense.addSense(currentSubSense);
						}
						senseDepth--;
					}

					if ("hi".equalsIgnoreCase(element.getName().getLocalPart())) {
						inSelection = false;
					}

					if ("def".equalsIgnoreCase(element.getName().getLocalPart())) {
						if (senseDepth == 1) {
							currentSense.setDefinition(currentDefinition);
						}
						if (senseDepth == 2) {
							currentSubSense.setDefinition(currentDefinition);
						}
						inDefinition = false;
					}

					if ("quote".equalsIgnoreCase(element.getName().getLocalPart())) {
						if (senseDepth == 1) {
							currentSense.addQuote(currentQuote);
						}
						if (senseDepth == 2) {
							currentSubSense.addQuote(currentQuote);
						}
						inQuote = false;
					}

					// System.out.println("END of "+element.getName());
				}
			}
			reader.close();
		} catch (FileNotFoundException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (XMLStreamException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		return teiSI;

	}

	public TEISenseInventory() {
		senseEntries = new HashMap<String, TEISenseEntry>();
	}

	@Override
	public Map<String, TEISenseEntry> getSenseEntries() {
		return senseEntries;
	}

	public TEISenseEntry getSenseEntryByOrth(String orth) {
		return senseEntries.get(getSenseEntryIdFromOrth(orth));
	}
	
	public String getSenseEntryIdFromOrth(String orth) {
		return senseEntriesBaseMap.get(orth);
	}

	public TEISense getSenseById(String senseId) {
		String entryName = null;
		String entryVer = null;
		String entryId = null;

		String[] s = senseId.split("\\#");
		if (s.length == 2) {
			entryId = s[1];
		} else if (s.length == 1) {
			entryId = s[0];
		} else {
			return null;
		}

		s = entryId.split("\\.");
		if (s.length == 2) {
			entryName = s[0];
			entryVer = s[1];
		} else if (s.length == 1) {
			entryName = s[0];
		} else {
			return null;
		}

		TEISenseEntry entry = getSenseEntries().get(entryName);
		for (TEISense sense : entry.getSenseList()) {
			if (sense.getId().equals(entryId)) {
				return sense;
			}
		}

		return null;
	}

	/**
	 * zwraca nazwę słownika
	 * 
	 * @return zwraca nazwę słownika
	 */
	public String getTitle() {
		return title;
	}

	public void addSenseEntry(String entryID, TEISenseEntry senseEntry) {
		this.senseEntries.put(entryID, senseEntry);
		senseEntriesBaseMap.put(senseEntry.getOrthForm(), entryID);
	}
	
	/** Checks whether given base ortographic form of word exists in dictionary and have
	 * more than one sense.
	 * @param baseForm
	 * @return true if it exists and have more than one sense, else false
	 */
	public boolean isPolysemousLemma(String baseForm) {	
		return getSensesForLemma(baseForm).size() > 1;
	}

	/** Returns collection of senses for given base ortographic form of word. If word not
	 * present in dictionary, returns empty collection.
	 * @param baseForm
	 * @return collection of senses
	 */
	public Collection<? extends Sense> getSensesForLemma(String baseForm) { 
		SenseEntry se = getSenseEntryByOrth(baseForm);
		if (se != null)
			return se.getSenseList();		
		return new ArrayList<Sense>();
	}
	
	public Collection<String> getAllPolysemousEntryIDs() {
		Collection<String> result = new ArrayList<String>();
		for (TEISenseEntry se : getSenseEntries().values()) {
			if (se.getSenseList().size() > 1)
				result.add(se.getID());
		}
		return result;		
	}
	
	public Collection<String> getAllPolysemousLemmas() {
		Collection<String> result = new ArrayList<String>();
		for (TEISenseEntry se : getSenseEntries().values()) {
			if (se.getSenseList().size() > 1)
				result.add(se.getOrthForm());
		}
		return result;		
	}
}
