/*
 * Decompiled with CFR 0.152.
 */
package corpusapi.util;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.Vector;
import javax.xml.stream.XMLEventReader;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.events.Characters;
import javax.xml.stream.events.EndElement;
import javax.xml.stream.events.StartElement;
import javax.xml.stream.events.XMLEvent;
import uk.ac.shef.wit.simmetrics.similaritymetrics.AbstractStringMetric;
import uk.ac.shef.wit.simmetrics.similaritymetrics.Levenshtein;

public class NKJP_WSI_XML_UPGRADER {
    static Vector<Vector<String>> takipiBases = new Vector();
    static Vector<Vector<String>> takipiOrths = new Vector();
    static int index = 0;
    static int counter = 0;
    static AbstractStringMetric abs = new Levenshtein();

    public static void main(String[] args) {
        try {
            String strLine;
            boolean inQuote = false;
            boolean inOrth = false;
            boolean inBase = false;
            boolean freshBaseInTok = true;
            Vector<String> currentBase = null;
            Vector<String> currentOrth = null;
            String orth = null;
            String base = null;
            XMLInputFactory factory = XMLInputFactory.newInstance();
            try {
                FileReader reader = new FileReader("data/NKJP_WSI.xml.takipized");
                XMLEventReader eventReader = factory.createXMLEventReader(reader);
                while (eventReader.hasNext()) {
                    XMLEvent element;
                    XMLEvent event = eventReader.nextEvent();
                    if (event.isStartElement()) {
                        element = (StartElement)event;
                        if ("quote".equalsIgnoreCase(element.getName().getLocalPart())) {
                            inQuote = true;
                            currentBase = new Vector<String>();
                            currentOrth = new Vector<String>();
                        }
                        if ("tok".equalsIgnoreCase(element.getName().getLocalPart())) {
                            freshBaseInTok = true;
                        }
                        if ("orth".equalsIgnoreCase(element.getName().getLocalPart())) {
                            inOrth = true;
                        }
                        if ("base".equalsIgnoreCase(element.getName().getLocalPart())) {
                            inBase = true;
                        }
                    }
                    if (event.isCharacters()) {
                        element = (Characters)event;
                        if (inQuote) {
                            if (inOrth) {
                                orth = element.getData();
                            }
                            if (freshBaseInTok && inBase) {
                                base = element.getData();
                            }
                        }
                    }
                    if (!event.isEndElement()) continue;
                    element = (EndElement)event;
                    if ("quote".equalsIgnoreCase(element.getName().getLocalPart())) {
                        inQuote = false;
                        takipiBases.add(currentBase);
                        takipiOrths.add(currentOrth);
                    }
                    if ("orth".equalsIgnoreCase(element.getName().getLocalPart())) {
                        inOrth = false;
                    }
                    if ("tok".equalsIgnoreCase(element.getName().getLocalPart()) && inQuote) {
                        currentBase.add(base);
                        currentOrth.add(orth);
                    }
                    if ("lex".equalsIgnoreCase(element.getName().getLocalPart())) {
                        freshBaseInTok = false;
                    }
                    if (!"base".equalsIgnoreCase(element.getName().getLocalPart())) continue;
                    inBase = false;
                }
                reader.close();
            }
            catch (FileNotFoundException e) {
                e.printStackTrace();
            }
            catch (XMLStreamException e) {
                e.printStackTrace();
            }
            catch (IOException e) {
                e.printStackTrace();
            }
            BufferedReader cr = new BufferedReader(new FileReader("data/NKJP_WSI.xml"));
            BufferedWriter br = new BufferedWriter(new FileWriter("data/NKJP_WSI_UPGRADED.xml"));
            StringBuilder sb = new StringBuilder();
            String baseForm = null;
            while ((strLine = cr.readLine()) != null) {
                if (strLine.trim().startsWith("<orth>")) {
                    baseForm = strLine.trim().substring(6, strLine.trim().length() - 7);
                }
                if (strLine.trim().startsWith("<quote>")) {
                    sb.append(NKJP_WSI_XML_UPGRADER.upgradeQuote(strLine, baseForm));
                    sb.append(System.getProperty("line.separator"));
                    continue;
                }
                sb.append(strLine);
                sb.append(System.getProperty("line.separator"));
            }
            br.write(sb.toString());
            br.close();
            cr.close();
        }
        catch (Exception e) {
            System.err.println("Error: " + e.getMessage());
        }
    }

    private static String upgradeQuoteSimple(String line, String base) {
        String nonchar = "[^abcdefghijklmnopqrstuwxyz\u0119\u00f3\u0105\u015b\u0142\u017c\u017a\u0107\u0144ABCDEFGHIJKLMNOPQRSTUWXYZ\u0118\u00d3\u0104\u015a\u0141\u017b\u0179\u0106\u01431234567890]";
        String[] splitted = line.trim().split(nonchar);
        float max = -1.0f;
        int maxIndex = 0;
        int i = 1;
        while (i < splitted.length - 1) {
            float score;
            if (!(splitted[i].equals(" ") | splitted[i].equals("quote") | splitted[i].equals("")) && (score = abs.getSimilarity(base, splitted[i])) >= max) {
                max = score;
                maxIndex = i;
            }
            ++i;
        }
        line = (double)max > 0.5 ? line.replace(splitted[maxIndex], "<hi rend=\"bold\">" + splitted[maxIndex] + "</hi>") : line.replace(splitted[maxIndex], "#" + splitted[maxIndex] + "$");
        return line;
    }

    private static String upgradeQuote(String line, String base) {
        Vector<String> tbs = takipiBases.get(index);
        Vector<String> tos = takipiOrths.get(index);
        ++index;
        Vector<String> orthsToReplace = new Vector<String>();
        int i = 0;
        while (i < tbs.size()) {
            if (tbs.get(i).equals(base)) {
                orthsToReplace.add(tos.get(i));
            }
            ++i;
        }
        for (String rep : orthsToReplace) {
            String tmp;
            int start;
            int end = start = line.indexOf(rep);
            while (Character.isLetterOrDigit(line.charAt(start - 1))) {
                --start;
            }
            while (Character.isLetterOrDigit(line.charAt(end))) {
                ++end;
            }
            line = tmp = String.valueOf(line.substring(0, start)) + "<hi rend=\"bold\">" + line.substring(start, end) + "</hi>" + line.substring(end);
        }
        if (orthsToReplace.size() == 0) {
            return NKJP_WSI_XML_UPGRADER.upgradeQuoteSimple(line, base);
        }
        return line;
    }
}

