package corpusapi.tei.readers;

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.io.StringReader;

import javax.xml.namespace.QName;
import javax.xml.stream.XMLEventReader;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.events.Characters;
import javax.xml.stream.events.EndElement;
import javax.xml.stream.events.StartElement;
import javax.xml.stream.events.XMLEvent;

import corpusapi.tei.TEICorpusText;
import corpusapi.tei.TEITextBlock;

public class TEITextReader extends TEIReader {

	protected StringReader textOrthReader;
	protected String orthFileContents;
	
	protected TEITextReader(String fileName, TEICorpusText corpusText) {
		super(fileName, corpusText);
	}
	
	/**
	 * Probuje zwolnic wykorzystywane zasoby poprzez zwolnienie pamieci
	 * zarezerwowanej na wczytany plik.
	 * 
	 * @throws XMLStreamException
	 * @throws IOException 
	 * 
	 */
	public void close() throws XMLStreamException, IOException {		
		if (textOrthReader != null) {
			textOrthReader.close();
			textOrthReader = null;
		}

		orthFileContents = null;
		super.close();
	}
	
	/**
	 * Zwraca domyslny strumien do odczytu z przypisanego pliku XML form ortograficznych
	 * w tekście lub tworzy nowy strumien, jesli wczesniej nie istnial. Ta metoda zawsze tworzy
	 * maksymalnie jeden strumien.
	 * 
	 * @return strumien do odczytu z pliku.
	 * @throws IOException
	 */
	public StringReader getTextReaderForOrth() throws IOException {
		if (textOrthReader == null) {
			textOrthReader = createTextReaderForOrth();
		}
		return textOrthReader;
	}

	/**
	 * Wczytuje plik XML do pamieci, jesli wczesniej nie zostal wczytany oraz
	 * tworzy nowy obiekt klasy StringReader do odczytu z tego pliku. Ta metoda
	 * odczytuje plik z dysku maksymalnie jeden raz. Dodatkowo przywraca znaki
	 * niedozwolone w xml-u.
	 * 
	 * @return obiekt klasy StringReader do czytania z pliku XML
	 * @throws IOException
	 */
	public StringReader createTextReaderForOrth() throws IOException {
		if (orthFileContents == null) {
			BufferedReader br = new BufferedReader(new FileReader(file));

			String line = null;
			StringBuilder buffer = new StringBuilder((int) file.length());

			while ((line = br.readLine()) != null) {
				buffer.append(line);
			}

			br.close();
			orthFileContents = buffer.toString();
			
			orthFileContents = orthFileContents.replace("&quot;", "\""); // zamiana dla text.xml
			orthFileContents = orthFileContents.replace("&amp;", "&"); // zamiana dla text.xml
			orthFileContents = orthFileContents.replace("&gt;", ">"); // zamiana dla text.xml
			orthFileContents = orthFileContents.replace("&lt;", "<"); // zamiana dla text.xml
			orthFileContents = orthFileContents.replace("&apos;", "'"); // zamiana dla text.xml

		}
		StringReader sr = new StringReader(orthFileContents);
		sr.mark(0);

		return sr;
	}
	

	/** Pobiera TextBlock o podanym id lub null, gdy nie ma.
	 * @param textBlockId
	 * @return TextBlock o podanym id lub null, gdy nie ma
	 * @throws XMLStreamException
	 * @throws IOException
	 */
	public TEITextBlock getTextBlockById(String textBlockId) throws XMLStreamException, IOException {
		XMLEventReader eventReader = createEventReader();
		
		boolean prevRelated = true;
		
		int textBlockLength = 0;
		int textBlockFilePosition = 0;
		int textBlockPosition = -1;
		boolean textBlockNextRelated = false;
		String textBlockType = null;
		boolean textBlockFound = false;
		
		try {
			boolean blockOpened = false;
			while ((eventReader.hasNext())) {
				XMLEvent event;
				event = eventReader.nextEvent();

				if (event.isEndElement()) {
					EndElement element = (EndElement) event;
					QName qName = element.getName();
					
					if ((qName.getLocalPart().equalsIgnoreCase("ab"))
							|| (qName.getLocalPart().equalsIgnoreCase("p"))
							|| (qName.getLocalPart().equalsIgnoreCase("u"))) {
						if (textBlockFound) {
							textBlockLength = element.getLocation().getCharacterOffset() - textBlockFilePosition;
						}
						blockOpened = false;
					}
				}
				
				if (event.isStartElement()) {
					StartElement element = (StartElement) event;
					QName qName = element.getName();
					
					// Nastepny tag okazal sie byc nowym znacznikiem div
					if (qName.getLocalPart().equalsIgnoreCase("div")) {
						if (textBlockFound) {
							textBlockNextRelated = false;
							break;
						} else {
							prevRelated = false;
						}
					}
					
					if ((qName.getLocalPart().equalsIgnoreCase("ab"))
							|| (qName.getLocalPart().equalsIgnoreCase("p"))
							|| (qName.getLocalPart().equalsIgnoreCase("u"))) {
												
						blockOpened = true;
						
						// Nastepny tag okazal sie byc nastepnym blokiem
						if (textBlockFound) {
							textBlockNextRelated = true;
							break;
						}
						
						QName attrName = new QName("http://www.w3.org/XML/1998/namespace", "id");
						
						textBlockPosition++;
						
						if (textBlockId.equals(element.getAttributeByName(attrName).getValue())) {
							textBlockType = qName.getLocalPart();
							textBlockFound = true;							
						} else {
							prevRelated = true;
						}
					}
				}
				
				if (event.isCharacters()) {
					if ((blockOpened) && (textBlockFound)) {
						Characters chars = (Characters)event;
						textBlockFilePosition = chars.getLocation().getCharacterOffset();
					}
				}
			}
			eventReader.close();
			
		} catch (XMLStreamException e) {
			System.out.println(e);
		}
				
		return new TEITextBlock(corpusText, textBlockId, textBlockType, textBlockFilePosition, textBlockLength, prevRelated, textBlockNextRelated, textBlockPosition);
	}

}
