/*
 * Decompiled with CFR 0.152.
 */
package termopl;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.RandomAccessFile;
import java.io.Reader;
import java.io.StringReader;
import java.io.UnsupportedEncodingException;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.ListIterator;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.xml.stream.XMLEventReader;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.events.Attribute;
import javax.xml.stream.events.EndElement;
import javax.xml.stream.events.StartElement;
import javax.xml.stream.events.XMLEvent;
import termopl.MultiWordToken;
import termopl.Pair;
import termopl.Token;
import termopl.UDToken;

public class CorpusReader {
    public static final int UNKNOWN_FORMAT = -1;
    public static final int TEXT_FORMAT = 0;
    public static final int XCES_FORMAT = 1;
    public static final int TEI_FORMAT = 2;
    public static final int CONLLU_FORMAT = 3;
    public static final int INTERNAL_FORMAT = 4;
    private static final Pattern patt = Pattern.compile("(.*)#(.*)#(.*)#");
    private int format = -1;
    private int docNumber = 0;
    private int end_char;
    private int tokenIndex = 0;
    private long sentenceStart;
    private long sentenceEnd;
    private boolean eos;
    private File currentFile = null;
    private String processedFileName;
    private TXTPosReader txtPosReader;
    private XMLPosReader xmlPosReader;
    private BufferedReader textReader = null;
    private XMLEventReader xmlReader = null;
    private LinkedList<MultiWordToken> mwTokens = null;
    private MultiWordToken mwt = null;
    private Token current = null;
    private Token previous = null;

    public CorpusReader() {
    }

    public CorpusReader(int format, String input) {
        this();
        this.format = format;
        StringReader strReader = new StringReader(input);
        if (format == 4 || format == 0 || format == 3) {
            this.txtPosReader = new TXTPosReader(strReader);
            this.textReader = new BufferedReader(this.txtPosReader);
        } else {
            XMLInputFactory inputFactory = XMLInputFactory.newInstance();
            inputFactory.setProperty("javax.xml.stream.isNamespaceAware", Boolean.FALSE);
            this.xmlPosReader = new XMLPosReader(strReader);
            try {
                this.xmlReader = inputFactory.createXMLEventReader(this.xmlPosReader);
            }
            catch (XMLStreamException e) {
                e.printStackTrace();
            }
        }
    }

    public void setCurrentFile(File file) {
        this.currentFile = file;
        this.processedFileName = file.getName();
    }

    public void setCurrentFile(File file, int format) {
        this.currentFile = file;
        this.format = format;
    }

    public String getCurrentFileName() {
        if (this.currentFile != null) {
            return this.currentFile.getName();
        }
        return null;
    }

    public String getProcessedFileName() {
        return this.processedFileName;
    }

    public int getFormat() {
        return this.format;
    }

    public Token getPreviousToken() {
        return this.previous;
    }

    public int getDocNumber() {
        return this.docNumber;
    }

    public void initMultiWordTokens() {
        this.mwTokens = null;
    }

    public LinkedList<MultiWordToken> getMultiWordTokens() {
        return this.mwTokens;
    }

    public long getSentenceStart() {
        return this.sentenceStart;
    }

    public int getSentenceLength() {
        return (int)(this.sentenceEnd - this.sentenceStart);
    }

    public void openFile() {
        try {
            switch (this.format) {
                case 0: 
                case 3: 
                case 4: {
                    this.openTextFile();
                    break;
                }
                case 1: 
                case 2: {
                    this.openXMLFile();
                }
            }
            ++this.docNumber;
            this.sentenceEnd = 0L;
            this.sentenceStart = 0L;
            this.end_char = -1;
            this.eos = false;
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }

    public void closeFile() {
        try {
            switch (this.format) {
                case 0: 
                case 3: 
                case 4: {
                    this.textReader.close();
                    break;
                }
                case 1: 
                case 2: {
                    this.xmlReader.close();
                }
            }
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }

    public void openTextFile() throws UnsupportedEncodingException, FileNotFoundException {
        this.txtPosReader = new TXTPosReader(new InputStreamReader((InputStream)new FileInputStream(this.currentFile), "UTF8"));
        this.textReader = new BufferedReader(this.txtPosReader);
    }

    public void openXMLFile() throws FileNotFoundException, XMLStreamException, UnsupportedEncodingException {
        XMLInputFactory inputFactory = XMLInputFactory.newInstance();
        this.xmlPosReader = new XMLPosReader(new InputStreamReader((InputStream)new FileInputStream(this.currentFile), "UTF8"));
        this.xmlReader = inputFactory.createXMLEventReader(this.xmlPosReader);
    }

    public void checkFormat() {
        try {
            this.format = this.currentFile.getName().toLowerCase().endsWith(".tgt") ? 4 : (this.currentFile.getName().toLowerCase().endsWith(".ccl") ? 1 : (this.currentFile.getName().toLowerCase().endsWith(".conllu") ? 3 : (this.checkFormatTXT() ? 0 : (this.checkFormatXCES() ? 1 : (this.checkFormatTEI() ? 2 : -1)))));
        }
        catch (IOException e) {
            e.printStackTrace();
            this.format = -1;
        }
    }

    public boolean checkFormatTXT() throws IOException {
        String line;
        boolean ok = true;
        int count = 0;
        BufferedReader reader = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(this.currentFile), "UTF8"));
        while ((line = reader.readLine()) != null) {
            if ((line = line.trim()).startsWith("%%")) continue;
            if (line.isEmpty() || line.matches("&\\s*#&\\s*#interp#") || line.matches("&\\t&\\tinterp.*")) {
                ++count;
            } else {
                int index = 0;
                int i = 0;
                while (i < 3) {
                    if ((index = line.indexOf(35, index)) < 0) {
                        ok = false;
                        break;
                    }
                    ++index;
                    ++i;
                }
                if (ok) {
                    ok = index == line.length();
                } else {
                    ok = true;
                    index = 0;
                    i = 0;
                    while (i < 2) {
                        if ((index = line.indexOf(9, index)) < 0) {
                            ok = false;
                            break;
                        }
                        ++index;
                        ++i;
                    }
                }
                if (ok) {
                    ++count;
                }
            }
            if (count < 10) continue;
        }
        reader.close();
        return ok;
    }

    public boolean checkFormatXCES() throws IOException {
        return this.checkXML("<cesAna");
    }

    public boolean checkFormatTEI() throws IOException {
        return this.checkXML("<teiCorpus");
    }

    public boolean checkXML(String str) throws IOException {
        try {
            BufferedReader reader = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(this.currentFile), "UTF8"));
            char[] buff = new char[512];
            int count = reader.read(buff);
            reader.close();
            if (count >= 0) {
                String fragment = new String(buff, 0, count);
                Pattern pattern = Pattern.compile("<\\?xml.+" + str, 32);
                Matcher matcher = pattern.matcher(fragment);
                if (matcher.find()) {
                    return true;
                }
            }
        }
        catch (IOException e) {
            e.printStackTrace();
        }
        return false;
    }

    public Token getNextToken() {
        this.previous = this.current;
        if (this.previous == Token.nullToken) {
            this.previous = null;
        }
        switch (this.format) {
            case 4: {
                this.current = this.getTokenFromTGTFile();
                break;
            }
            case 0: {
                this.current = this.getTokenFromTextFile();
                break;
            }
            case 1: {
                this.current = this.getTokenFromXCESFile();
                break;
            }
            case 2: {
                this.current = this.getTokenFromTEIFile();
                break;
            }
            case 3: {
                this.current = this.getTokenFromCONLLUFile();
                break;
            }
            default: {
                this.current = null;
            }
        }
        return this.current;
    }

    public Token getTokenFromTGTFile() {
        try {
            if (this.eos) {
                this.sentenceStart = this.txtPosReader.getPos();
                this.eos = false;
            }
            String line = this.textReader.readLine();
            while (line != null) {
                if (!(line = line.trim()).isEmpty()) {
                    if (line.equals("eos")) {
                        this.eos = true;
                        this.sentenceEnd = this.txtPosReader.getPos();
                        return Token.nullToken;
                    }
                    String[] cols = line.split("\\t");
                    if (cols.length >= 3) {
                        if (cols.length >= 4 && cols[3].equals("nps") && this.previous != null) {
                            this.previous.spaceAfter = false;
                        }
                        return new Token(cols[0], cols[1], cols[2]);
                    }
                }
                line = this.textReader.readLine();
            }
            this.eos = true;
            this.sentenceEnd = this.txtPosReader.getPos();
            return null;
        }
        catch (Exception e) {
            e.printStackTrace();
            return null;
        }
    }

    public Token getTokenFromTextFile() {
        try {
            if (this.eos) {
                this.sentenceStart = this.txtPosReader.getPos();
                this.eos = false;
            }
            String line = this.textReader.readLine();
            while (line != null) {
                if (!(line = line.trim()).startsWith("%%")) {
                    if (line.isEmpty() || line.matches("&\\s*#&\\s*#interp#") || line.matches("&\\t&\\tinterp.*")) {
                        this.eos = true;
                        this.sentenceEnd = this.txtPosReader.getPos();
                        return Token.nullToken;
                    }
                    Matcher matcher = patt.matcher(line);
                    if (matcher.matches()) {
                        return new Token(matcher.group(1).trim(), matcher.group(2).trim(), matcher.group(3).trim());
                    }
                    String[] cols = line.split("\\t");
                    if (cols.length >= 3) {
                        return new Token(cols[0], cols[1], cols[2]);
                    }
                } else {
                    this.previous = null;
                    ++this.docNumber;
                    this.sentenceEnd = 0L;
                    this.sentenceStart = 0L;
                }
                line = this.textReader.readLine();
            }
            this.eos = true;
            this.sentenceEnd = this.txtPosReader.getPos();
            return null;
        }
        catch (Exception e) {
            e.printStackTrace();
            return null;
        }
    }

    public Token getTokenFromCONLLUFile() {
        boolean spaceAfter = true;
        try {
            if (this.eos) {
                this.sentenceStart = this.txtPosReader.getPos();
                this.eos = false;
            }
            String line = this.textReader.readLine();
            while (line != null) {
                if ((line = line.trim()).isEmpty()) {
                    this.eos = true;
                    this.sentenceEnd = this.txtPosReader.getPos();
                    return Token.nullToken;
                }
                if (line.matches("\\d+.*")) {
                    String[] cols = line.split("\\t");
                    if (cols[9].contains("SpaceAfter=No")) {
                        spaceAfter = false;
                    } else {
                        Pattern p = Pattern.compile("start_char=(\\d+)\\|end_char=(\\d+)");
                        Matcher m = p.matcher(cols[9]);
                        if (m.find()) {
                            int s = Integer.parseInt(m.group(1));
                            int e = Integer.parseInt(m.group(2));
                            if (s == this.end_char && this.previous != null) {
                                this.previous.spaceAfter = false;
                            }
                            this.end_char = e;
                        }
                    }
                    if (cols[0].matches("\\d+\\-\\d+")) {
                        String[] r = cols[0].split("\\-");
                        this.mwt = new MultiWordToken(cols[1], spaceAfter, Integer.parseInt(r[0]), Integer.parseInt(r[1]));
                        if (this.mwTokens == null) {
                            this.mwTokens = new LinkedList();
                        }
                        this.mwTokens.add(this.mwt);
                    } else if (cols[0].matches("\\d+")) {
                        UDToken t = new UDToken(cols[1], cols[2], cols[4], cols[3], cols[5].toLowerCase(), cols[7], Integer.parseInt(cols[0]), Integer.parseInt(cols[6]), spaceAfter);
                        if (this.mwt != null) {
                            if (t.index >= this.mwt.startToken() && t.index <= this.mwt.endToken()) {
                                this.mwt.addToken(t);
                            } else {
                                this.mwt = null;
                            }
                        }
                        if (cols[9].contains("ner=S")) {
                            t.ner = (char)83;
                        } else if (cols[9].contains("ner=B")) {
                            t.ner = (char)66;
                        } else if (cols[9].contains("ner=I")) {
                            t.ner = (char)73;
                        } else if (cols[9].contains("ner=E")) {
                            t.ner = (char)69;
                        }
                        return t;
                    }
                } else if (line.contains("newdoc")) {
                    this.previous = null;
                    this.mwt = null;
                    ++this.docNumber;
                    this.sentenceEnd = 0L;
                    this.sentenceStart = 0L;
                }
                line = this.textReader.readLine();
            }
            this.eos = true;
            this.sentenceEnd = this.txtPosReader.getPos();
            return null;
        }
        catch (Exception e) {
            e.printStackTrace();
            return null;
        }
    }

    public Token getTokenFromXCESFile() {
        int distance = 0;
        boolean disamb = false;
        boolean morph = false;
        String ner = null;
        String position = null;
        String UDRel = null;
        String UDTag = null;
        String UDPos = null;
        String ctag = null;
        String lemma = null;
        String form = null;
        block2: while (this.xmlReader.hasNext()) {
            try {
                String name;
                XMLEvent event = this.xmlReader.nextEvent();
                if (event.isStartElement()) {
                    StartElement startElement = event.asStartElement();
                    name = startElement.getName().getLocalPart();
                    if (name.equals("chunk")) {
                        Iterator<Attribute> it = startElement.getAttributes();
                        while (it.hasNext()) {
                            Attribute attr = it.next();
                            if (!attr.getName().getLocalPart().equals("type") || !attr.getValue().equals("s")) continue;
                            this.sentenceStart = this.xmlPosReader.getPos();
                            this.previous = null;
                            continue block2;
                        }
                        continue;
                    }
                    if (name.equals("sentence")) {
                        this.sentenceStart = this.xmlPosReader.getPos();
                        this.tokenIndex = 0;
                        this.previous = null;
                        continue;
                    }
                    if (name.equals("tok")) {
                        ner = null;
                        position = null;
                        UDRel = null;
                        UDTag = null;
                        UDPos = null;
                        ctag = null;
                        lemma = null;
                        form = null;
                        distance = 0;
                        ++this.tokenIndex;
                        continue;
                    }
                    if (name.equals("orth")) {
                        form = this.xmlReader.getElementText();
                        continue;
                    }
                    if (name.equals("lex")) {
                        Iterator<Attribute> it = startElement.getAttributes();
                        disamb = false;
                        while (it.hasNext()) {
                            Attribute attr = it.next();
                            if (!attr.getName().getLocalPart().equals("disamb") || !attr.getValue().equals("1")) continue;
                            disamb = true;
                            continue block2;
                        }
                        continue;
                    }
                    if (name.equals("base")) {
                        if (!disamb || !(lemma = this.xmlReader.getElementText()).contains(":") || lemma.equals(":")) continue;
                        lemma = lemma.split(":")[0];
                        continue;
                    }
                    if (name.equals("ctag")) {
                        if (!disamb) continue;
                        ctag = this.xmlReader.getElementText();
                        continue;
                    }
                    if (name.equals("upos")) {
                        UDPos = this.xmlReader.getElementText();
                        continue;
                    }
                    if (name.equals("deprel")) {
                        UDRel = this.xmlReader.getElementText();
                        continue;
                    }
                    if (name.equals("xpos")) {
                        ctag = this.xmlReader.getElementText();
                        continue;
                    }
                    if (name.equals("lemma")) {
                        lemma = this.xmlReader.getElementText();
                        if (!lemma.contains(":") || lemma.equals(":")) continue;
                        lemma = lemma.split(":")[0];
                        continue;
                    }
                    if (name.equals("head.distance")) {
                        distance = Integer.valueOf(this.xmlReader.getElementText());
                        continue;
                    }
                    if (name.equals("head.position")) {
                        position = this.xmlReader.getElementText();
                        continue;
                    }
                    if (name.equals("morph")) {
                        morph = true;
                        continue;
                    }
                    if (name.equals("ner")) {
                        Iterator<Attribute> it = startElement.getAttributes();
                        while (it.hasNext()) {
                            Attribute attr = it.next();
                            if (!attr.getName().getLocalPart().equals("mark")) continue;
                            ner = attr.getValue();
                        }
                        continue;
                    }
                    if (!morph) continue;
                    String val = String.valueOf(name) + "=" + this.xmlReader.getElementText();
                    if (UDTag != null) {
                        UDTag = String.valueOf(UDTag) + "|" + val;
                        continue;
                    }
                    UDTag = val;
                    continue;
                }
                if (!event.isEndElement()) continue;
                EndElement endElement = event.asEndElement();
                name = endElement.getName().getLocalPart();
                if (name.equals("chunk") || name.equals("sentence")) {
                    this.sentenceEnd = this.xmlPosReader.getMark();
                    return Token.nullToken;
                }
                if (name.equals("tok")) {
                    boolean createToken = false;
                    boolean createUDToken = false;
                    disamb = false;
                    if (form != null && lemma != null && ctag != null) {
                        createToken = true;
                        if (UDPos != null && UDRel != null && position != null && distance > 0) {
                            createUDToken = true;
                        }
                    }
                    if (createUDToken) {
                        int UDLink = this.tokenIndex;
                        UDLink = position.equals("right") ? (UDLink += distance) : (UDLink -= distance);
                        UDToken t = new UDToken(form, lemma, ctag, UDPos, UDTag, UDRel, this.tokenIndex, UDLink);
                        if (ner != null) {
                            t.ner = ner.charAt(0);
                        }
                        return t;
                    }
                    if (!createToken) continue;
                    Token t = new Token(form, lemma, ctag);
                    if (ner != null) {
                        t.ner = ner.charAt(0);
                    }
                    return t;
                }
                if (name.equals("lex")) {
                    disamb = false;
                    continue;
                }
                if (name.equals("morph")) {
                    morph = false;
                    continue;
                }
                if (!name.equals("ns") || this.previous == null) continue;
                this.previous.spaceAfter = false;
            }
            catch (XMLStreamException e) {
                e.printStackTrace();
                break;
            }
        }
        return null;
    }

    public Token getTokenFromTEIFile() {
        while (this.xmlReader.hasNext()) {
            try {
                EndElement endElement;
                String name;
                XMLEvent event = this.xmlReader.nextEvent();
                if (event.isStartElement()) {
                    StartElement startElement = event.asStartElement();
                    name = startElement.getName().getLocalPart();
                    if (name.equals("s")) {
                        this.sentenceStart = this.xmlPosReader.getPos();
                        this.previous = null;
                        continue;
                    }
                    if (!name.equals("fs")) continue;
                    Iterator<Attribute> it = startElement.getAttributes();
                    while (it.hasNext()) {
                        Attribute attr = it.next();
                        if (!attr.getName().getLocalPart().equals("type") || !attr.getValue().equals("morph")) continue;
                        Token token = new Token();
                        this.getMorph(token);
                        return token;
                    }
                    continue;
                }
                if (!event.isEndElement() || !(name = (endElement = event.asEndElement()).getName().getLocalPart()).equals("s")) continue;
                this.sentenceEnd = this.xmlPosReader.getMark();
                return Token.nullToken;
            }
            catch (XMLStreamException e) {
                e.printStackTrace();
                return null;
            }
        }
        return null;
    }

    public void getMorph(Token token) {
        boolean done = false;
        boolean nps = false;
        try {
            block2: do {
                XMLEvent event;
                if ((event = this.xmlReader.nextEvent()).isStartElement()) {
                    Attribute attr;
                    Iterator<Attribute> it;
                    StartElement startElement = event.asStartElement();
                    String name = startElement.getName().getLocalPart();
                    if (name.equals("f")) {
                        it = startElement.getAttributes();
                        while (it.hasNext()) {
                            attr = it.next();
                            if (!attr.getName().getLocalPart().equals("name")) continue;
                            String value = attr.getValue();
                            if (value.equals("orth")) {
                                this.getOrth(token);
                                continue;
                            }
                            if (value.equals("interps")) {
                                this.getInterps(token);
                                continue;
                            }
                            if (value.equals("nps")) {
                                nps = true;
                                continue;
                            }
                            if (!value.equals("disamb")) continue;
                            this.getDisamb(token);
                            done = true;
                            continue block2;
                        }
                    } else {
                        if (!name.equals("binary")) continue;
                        it = startElement.getAttributes();
                        while (it.hasNext()) {
                            attr = it.next();
                            if (!attr.getName().getLocalPart().equals("value") || !attr.getValue().equals("true") || !nps) continue;
                            nps = false;
                            if (this.previous == null) continue;
                            this.previous.spaceAfter = false;
                        }
                    }
                } else {
                    EndElement endElement;
                    if (!event.isEndElement() || !(endElement = event.asEndElement()).getName().getLocalPart().equals("fs")) continue;
                    done = true;
                }
            } while (!done);
        }
        catch (XMLStreamException e) {
            e.printStackTrace();
        }
    }

    public void getOrth(Token token) {
        boolean done = false;
        try {
            do {
                EndElement endElement;
                XMLEvent event;
                if ((event = this.xmlReader.nextEvent()).isStartElement()) {
                    StartElement startElement = event.asStartElement();
                    if (!startElement.getName().getLocalPart().equals("string")) continue;
                    token.form = this.xmlReader.getElementText();
                    done = true;
                    continue;
                }
                if (!event.isEndElement() || !(endElement = event.asEndElement()).getName().getLocalPart().equals("f")) continue;
                done = true;
            } while (!done);
        }
        catch (XMLStreamException e) {
            e.printStackTrace();
        }
    }

    public void getInterps(Token token) {
        boolean done = false;
        try {
            block2: do {
                XMLEvent event;
                if ((event = this.xmlReader.nextEvent()).isStartElement()) {
                    StartElement startElement = event.asStartElement();
                    if (!startElement.getName().getLocalPart().equals("fs")) continue;
                    Iterator<Attribute> it = startElement.getAttributes();
                    while (it.hasNext()) {
                        Attribute attr = it.next();
                        if (!attr.getName().getLocalPart().equals("type") || !attr.getValue().equals("lex")) continue;
                        this.getLex(token);
                        continue block2;
                    }
                } else {
                    EndElement endElement;
                    if (!event.isEndElement() || !(endElement = event.asEndElement()).getName().getLocalPart().equals("f")) continue;
                    done = true;
                }
            } while (!done);
        }
        catch (XMLStreamException e) {
            e.printStackTrace();
        }
    }

    public void getLex(Token token) {
        boolean done = false;
        try {
            block2: do {
                XMLEvent event;
                if ((event = this.xmlReader.nextEvent()).isStartElement()) {
                    StartElement startElement = event.asStartElement();
                    if (!startElement.getName().getLocalPart().equals("f")) continue;
                    Iterator<Attribute> it = startElement.getAttributes();
                    while (it.hasNext()) {
                        Attribute attr = it.next();
                        if (!attr.getName().getLocalPart().equals("name")) continue;
                        String value = attr.getValue();
                        if (value.equals("base")) {
                            this.getBase(token);
                            continue block2;
                        }
                        if (value.equals("ctag")) {
                            this.getCTag(token);
                            continue;
                        }
                        if (!value.equals("msd")) continue;
                        this.getMSD(token);
                        continue block2;
                    }
                } else {
                    EndElement endElement;
                    if (!event.isEndElement() || !(endElement = event.asEndElement()).getName().getLocalPart().equals("fs")) continue;
                    done = true;
                }
            } while (!done);
        }
        catch (XMLStreamException e) {
            e.printStackTrace();
        }
    }

    public void getBase(Token token) {
        boolean done = false;
        try {
            do {
                EndElement endElement;
                XMLEvent event;
                if ((event = this.xmlReader.nextEvent()).isStartElement()) {
                    StartElement startElement = event.asStartElement();
                    if (!startElement.getName().getLocalPart().equals("string")) continue;
                    token.lemma = this.xmlReader.getElementText();
                    done = true;
                    continue;
                }
                if (!event.isEndElement() || !(endElement = event.asEndElement()).getName().getLocalPart().equals("f")) continue;
                done = true;
            } while (!done);
        }
        catch (XMLStreamException e) {
            e.printStackTrace();
        }
    }

    public void getCTag(Token token) {
        boolean done = false;
        try {
            block2: do {
                XMLEvent event;
                if ((event = this.xmlReader.nextEvent()).isStartElement()) {
                    StartElement startElement = event.asStartElement();
                    if (!startElement.getName().getLocalPart().equals("symbol")) continue;
                    Iterator<Attribute> it = startElement.getAttributes();
                    while (it.hasNext()) {
                        Attribute attr = it.next();
                        if (!attr.getName().getLocalPart().equals("value")) continue;
                        token.ctag = attr.getValue();
                        done = true;
                        continue block2;
                    }
                } else {
                    EndElement endElement;
                    if (!event.isEndElement() || !(endElement = event.asEndElement()).getName().getLocalPart().equals("f")) continue;
                    done = true;
                }
            } while (!done);
        }
        catch (XMLStreamException e) {
            e.printStackTrace();
        }
    }

    public void getMSD(Token token) {
        boolean done = false;
        try {
            block2: do {
                XMLEvent event;
                if ((event = this.xmlReader.nextEvent()).isStartElement()) {
                    StartElement startElement = event.asStartElement();
                    if (!startElement.getName().getLocalPart().equals("symbol")) continue;
                    Iterator<Attribute> it = startElement.getAttributes();
                    while (it.hasNext()) {
                        Attribute attr = it.next();
                        if (!attr.getName().getLocalPart().equals("value")) continue;
                        token.ctag = String.valueOf(token.ctag) + ":" + attr.getValue();
                        done = true;
                        continue block2;
                    }
                } else {
                    EndElement endElement;
                    if (!event.isEndElement() || !(endElement = event.asEndElement()).getName().getLocalPart().equals("f")) continue;
                    done = true;
                }
            } while (!done);
        }
        catch (XMLStreamException e) {
            e.printStackTrace();
        }
    }

    public void getDisamb(Token token) {
        boolean done = false;
        try {
            do {
                EndElement endElement;
                XMLEvent event;
                if ((event = this.xmlReader.nextEvent()).isStartElement()) {
                    StartElement startElement = event.asStartElement();
                    if (!startElement.getName().getLocalPart().equals("fs")) continue;
                    block3: do {
                        if ((event = this.xmlReader.nextEvent()).isStartElement()) {
                            startElement = event.asStartElement();
                            if (!startElement.getName().getLocalPart().equals("f")) continue;
                            Iterator<Attribute> it = startElement.getAttributes();
                            while (it.hasNext()) {
                                Attribute attr = it.next();
                                if (!attr.getName().getLocalPart().equals("name") || !attr.getValue().equals("interpretation")) continue;
                                this.getInterpretation(token);
                                done = true;
                                continue block3;
                            }
                        } else {
                            EndElement endElement2;
                            if (!event.isEndElement() || !(endElement2 = event.asEndElement()).getName().getLocalPart().equals("fs")) continue;
                            done = true;
                        }
                    } while (!done);
                    continue;
                }
                if (!event.isEndElement() || !(endElement = event.asEndElement()).getName().getLocalPart().equals("f")) continue;
                done = true;
            } while (!done);
        }
        catch (XMLStreamException e) {
            e.printStackTrace();
        }
    }

    public void getInterpretation(Token token) {
        boolean done = false;
        try {
            do {
                EndElement endElement;
                XMLEvent event;
                if ((event = this.xmlReader.nextEvent()).isStartElement()) {
                    StartElement startElement = event.asStartElement();
                    if (!startElement.getName().getLocalPart().equals("string")) continue;
                    String str = this.xmlReader.getElementText();
                    if (str.startsWith(":")) {
                        token.lemma = ":";
                        token.ctag = str.substring(2);
                    } else {
                        int index = str.indexOf(":");
                        token.lemma = str.substring(0, index);
                        token.ctag = str.substring(index + 1);
                    }
                    done = true;
                    continue;
                }
                if (!event.isEndElement() || !(endElement = event.asEndElement()).getName().getLocalPart().equals("f")) continue;
                done = true;
            } while (!done);
        }
        catch (XMLStreamException e) {
            e.printStackTrace();
        }
    }

    public static Pair<LinkedList<Token>, LinkedList<MultiWordToken>> getSentence(RandomAccessFile file, int fileFormat, long start, int len) {
        byte[] b = new byte[len];
        String str = null;
        try {
            file.seek(start);
            file.read(b);
            str = new String(b, "UTF8").trim();
        }
        catch (Exception e) {
            e.printStackTrace();
        }
        if (fileFormat == 1) {
            str = "<sentence>" + str + "</sentence>";
        } else if (fileFormat == 2) {
            str = "<s>" + str + "</s>";
        }
        CorpusReader reader = new CorpusReader(fileFormat, str);
        return new Pair<LinkedList<Token>, LinkedList<MultiWordToken>>(CorpusReader.loadTokens(reader), reader.getMultiWordTokens());
    }

    public static LinkedList<Token> loadTokens(CorpusReader reader) {
        LinkedList<Token> tokenList = new LinkedList<Token>();
        Token t = reader.getNextToken();
        while (t != null && !t.stop()) {
            tokenList.add(t);
            t = reader.getNextToken();
        }
        if (tokenList.isEmpty()) {
            return null;
        }
        return tokenList;
    }

    public static LinkedList<Token> replaceMWT(LinkedList<? extends Token> phrase, LinkedList<MultiWordToken> mwtList) {
        LinkedList<Token> repl = new LinkedList<Token>();
        repl.addAll(phrase);
        for (MultiWordToken mwt : mwtList) {
            Token s = mwt.getTokens().getFirst();
            Token e = mwt.getTokens().getLast();
            ListIterator<MultiWordToken> it = repl.listIterator();
            Token t = null;
            boolean remove = false;
            while (it.hasNext() && t != e) {
                t = (Token)it.next();
                if (t == s) {
                    remove = true;
                }
                if (t == e) {
                    it.set(mwt);
                    continue;
                }
                if (!remove) continue;
                it.remove();
            }
        }
        return repl;
    }

    private class TXTPosReader
    extends Reader {
        private Reader internalReader;
        private long pos = 0L;
        private boolean surrogate = false;

        public TXTPosReader(Reader internalReader) {
            this.internalReader = internalReader;
        }

        public long getPos() {
            return this.pos;
        }

        @Override
        public int read(char[] cbuf, int off, int len) throws IOException {
            int chars_read = this.internalReader.read(cbuf, off, 1);
            int nbytes = 0;
            if (chars_read > 0) {
                if (this.surrogate) {
                    this.surrogate = false;
                } else {
                    char ch = cbuf[off];
                    if (ch <= '\u007f') {
                        nbytes = 1;
                    } else if (ch <= '\u07ff') {
                        nbytes = 2;
                    } else if (Character.isSurrogate(ch)) {
                        this.surrogate = true;
                        nbytes = 4;
                    } else {
                        nbytes = 3;
                    }
                }
            }
            this.pos += (long)nbytes;
            return chars_read;
        }

        @Override
        public void close() throws IOException {
            this.internalReader.close();
        }
    }

    private class XMLPosReader
    extends TXTPosReader {
        private long mark;

        public XMLPosReader(Reader internalReader) {
            super(internalReader);
            this.mark = 0L;
        }

        public long getMark() {
            return this.mark;
        }

        @Override
        public int read(char[] cbuf, int off, int len) throws IOException {
            int chars_read = super.read(cbuf, off, len);
            if (chars_read > 0 && cbuf[off] == '<') {
                this.mark = this.getPos() - 1L;
            }
            return chars_read;
        }
    }
}

