/*
 * Decompiled with CFR 0.152.
 */
package wsdde.corpus.knowledge;

import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.Arrays;
import java.util.StringTokenizer;
import java.util.Vector;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;
import wsdde.corpus.ContextInfo;
import wsdde.corpus.KWIC;
import wsdde.corpus.POSInfo;
import wsdde.corpus.WSDCorpus;

public class SimpleTokenizerParser
extends DefaultHandler {
    KWIC k = null;
    WSDCorpus wsdc = null;
    Vector<String> typeOfInfo = null;
    static int id = 0;
    StringBuffer sb;
    boolean isKeyword = false;
    String orthForm = "";
    String baseForm = "";
    String gramInfo = "";
    String sense = "";
    String left = "";
    String right = "";
    boolean listenForDisamb = true;
    boolean listenForNextBaseAndCtag = false;

    public static void main(String[] args) {
        SimpleTokenizerParser txp = new SimpleTokenizerParser();
        WSDCorpus w = txp.parse("jezyk.xml");
    }

    public WSDCorpus parse(String filename) {
        this.wsdc = new WSDCorpus(filename);
        this.typeOfInfo = new Vector<String>(Arrays.asList("words", "lemmas", "posinfos"));
        this.sb = new StringBuffer();
        try {
            XMLReader xr = XMLReaderFactory.createXMLReader();
            xr.setContentHandler(this);
            xr.setErrorHandler(this);
            InputStreamReader r = new InputStreamReader((InputStream)new FileInputStream(filename), "UTF-8");
            xr.parse(new InputSource(r));
        }
        catch (SAXException e) {
            e.printStackTrace();
        }
        catch (FileNotFoundException e) {
            e.printStackTrace();
        }
        catch (IOException e) {
            e.printStackTrace();
        }
        return this.wsdc;
    }

    @Override
    public void startElement(String uri, String name, String qName, Attributes atts) {
        if (qName.equalsIgnoreCase("context")) {
            this.k = new KWIC(++id);
        }
        if (qName.equalsIgnoreCase("sense")) {
            this.left = this.sb.toString();
            this.sb = new StringBuffer();
            StringTokenizer st = new StringTokenizer(this.left);
            while (st.hasMoreTokens()) {
                this.isKeyword = false;
                this.orthForm = st.nextToken();
                this.baseForm = this.orthForm.substring(0, Math.min(5, this.orthForm.length())).toLowerCase();
                this.gramInfo = "null";
                Vector<ContextInfo> info = new Vector<ContextInfo>(Arrays.asList(new ContextInfo(this.orthForm), new ContextInfo(this.baseForm), new POSInfo(this.gramInfo)));
                this.k.wstaw(this.typeOfInfo, info, this.isKeyword);
            }
            this.sb = new StringBuffer();
            this.k.sense = atts.getValue("label");
        }
    }

    @Override
    public void endElement(String uri, String name, String qName) {
        if (qName.equalsIgnoreCase("sense")) {
            this.isKeyword = true;
            this.orthForm = this.sb.toString();
            this.baseForm = this.orthForm.substring(0, Math.min(5, this.orthForm.length()));
            this.gramInfo = "null";
            Vector<ContextInfo> info = new Vector<ContextInfo>(Arrays.asList(new ContextInfo(this.orthForm), new ContextInfo(this.baseForm), new POSInfo(this.gramInfo)));
            this.k.wstaw(this.typeOfInfo, info, this.isKeyword);
            this.isKeyword = false;
            this.sb = new StringBuffer();
        }
        if (qName.equalsIgnoreCase("context")) {
            this.right = this.sb.toString();
            this.sb = new StringBuffer();
            StringTokenizer st = new StringTokenizer(this.right);
            while (st.hasMoreTokens()) {
                this.isKeyword = false;
                this.orthForm = st.nextToken();
                this.baseForm = this.orthForm.substring(0, Math.min(5, this.orthForm.length())).toLowerCase();
                this.gramInfo = "null";
                Vector<ContextInfo> info = new Vector<ContextInfo>(Arrays.asList(new ContextInfo(this.orthForm), new ContextInfo(this.baseForm), new POSInfo(this.gramInfo)));
                this.k.wstaw(this.typeOfInfo, info, this.isKeyword);
                this.sb = new StringBuffer();
            }
            this.wsdc.add(this.k);
        }
        if (qName.equalsIgnoreCase("wsdcorpus")) {
            this.wsdc.sensesFromKWICS();
        }
    }

    @Override
    public void characters(char[] ch, int start, int length) {
        int i = start;
        while (i < start + length) {
            this.sb.append(ch[i]);
            ++i;
        }
    }
}

