package termopl;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.RandomAccessFile;
import java.io.Reader;
import java.io.StringReader;
import java.io.UnsupportedEncodingException;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.ListIterator;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.xml.stream.XMLEventReader;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.events.Attribute;
import javax.xml.stream.events.StartElement;
import javax.xml.stream.events.XMLEvent;

/* loaded from: input_file:termopl/CorpusReader.class */
public class CorpusReader {
    public static final int UNKNOWN_FORMAT = -1;
    public static final int TEXT_FORMAT = 0;
    public static final int XCES_FORMAT = 1;
    public static final int TEI_FORMAT = 2;
    public static final int CONLLU_FORMAT = 3;
    public static final int INTERNAL_FORMAT = 4;
    private static final Pattern patt = Pattern.compile("(.*)#(.*)#(.*)#");
    private int format;
    private int docNumber;
    private int end_char;
    private long sentenceStart;
    private long sentenceEnd;
    private boolean eos;
    private File currentFile;
    private String processedFileName;
    private TXTPosReader txtPosReader;
    private XMLPosReader xmlPosReader;
    private BufferedReader textReader;
    private XMLEventReader xmlReader;
    private LinkedList<MultiWordToken> mwTokens;
    private MultiWordToken mwt;
    private Token current;
    private Token previous;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:termopl/CorpusReader$TXTPosReader.class */
    public class TXTPosReader extends Reader {
        private Reader internalReader;
        private long pos = 0;
        private boolean surrogate = false;

        public TXTPosReader(Reader reader) {
            this.internalReader = reader;
        }

        public long getPos() {
            return this.pos;
        }

        @Override // java.io.Reader
        public int read(char[] cArr, int i, int i2) throws IOException {
            int read = this.internalReader.read(cArr, i, 1);
            int i3 = 0;
            if (read > 0) {
                if (this.surrogate) {
                    this.surrogate = false;
                } else {
                    char c = cArr[i];
                    if (c <= 127) {
                        i3 = 1;
                    } else if (c <= 2047) {
                        i3 = 2;
                    } else if (Character.isSurrogate(c)) {
                        this.surrogate = true;
                        i3 = 4;
                    } else {
                        i3 = 3;
                    }
                }
            }
            this.pos += i3;
            return read;
        }

        @Override // java.io.Reader, java.io.Closeable, java.lang.AutoCloseable
        public void close() throws IOException {
            this.internalReader.close();
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:termopl/CorpusReader$XMLPosReader.class */
    public class XMLPosReader extends TXTPosReader {
        private long mark;

        public XMLPosReader(Reader reader) {
            super(reader);
            this.mark = 0L;
        }

        public long getMark() {
            return this.mark;
        }

        @Override // termopl.CorpusReader.TXTPosReader, java.io.Reader
        public int read(char[] cArr, int i, int i2) throws IOException {
            int read = super.read(cArr, i, i2);
            if (read > 0 && cArr[i] == '<') {
                this.mark = getPos() - 1;
            }
            return read;
        }
    }

    public CorpusReader() {
        this.format = -1;
        this.docNumber = 0;
        this.currentFile = null;
        this.textReader = null;
        this.xmlReader = null;
        this.mwTokens = null;
        this.mwt = null;
        this.previous = null;
        this.current = null;
    }

    public CorpusReader(int i, String str) {
        this();
        this.format = i;
        StringReader stringReader = new StringReader(str);
        if (i == 4 || i == 0 || i == 3) {
            this.txtPosReader = new TXTPosReader(stringReader);
            this.textReader = new BufferedReader(this.txtPosReader);
            return;
        }
        XMLInputFactory newInstance = XMLInputFactory.newInstance();
        newInstance.setProperty("javax.xml.stream.isNamespaceAware", Boolean.FALSE);
        this.xmlPosReader = new XMLPosReader(stringReader);
        try {
            this.xmlReader = newInstance.createXMLEventReader(this.xmlPosReader);
        } catch (XMLStreamException e) {
            e.printStackTrace();
        }
    }

    public void setCurrentFile(File file) {
        this.currentFile = file;
        this.processedFileName = file.getName();
    }

    public void setCurrentFile(File file, int i) {
        this.currentFile = file;
        this.format = i;
    }

    public String getCurrentFileName() {
        if (this.currentFile != null) {
            return this.currentFile.getName();
        }
        return null;
    }

    public String getProcessedFileName() {
        return this.processedFileName;
    }

    public int getFormat() {
        return this.format;
    }

    public Token getPreviousToken() {
        return this.previous;
    }

    public int getDocNumber() {
        return this.docNumber;
    }

    public void initMultiWordTokens() {
        this.mwTokens = null;
    }

    public LinkedList<MultiWordToken> getMultiWordTokens() {
        return this.mwTokens;
    }

    public long getSentenceStart() {
        return this.sentenceStart;
    }

    public int getSentenceLength() {
        return (int) (this.sentenceEnd - this.sentenceStart);
    }

    public void openFile() {
        try {
            switch (this.format) {
                case 0:
                case 3:
                case 4:
                    openTextFile();
                    break;
                case 1:
                case 2:
                    openXMLFile();
                    break;
            }
            this.docNumber++;
            this.sentenceEnd = 0L;
            this.sentenceStart = 0L;
            this.end_char = -1;
            this.eos = false;
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    public void closeFile() {
        try {
            switch (this.format) {
                case 0:
                case 3:
                case 4:
                    this.textReader.close();
                    return;
                case 1:
                case 2:
                    this.xmlReader.close();
                    break;
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    public void openTextFile() throws UnsupportedEncodingException, FileNotFoundException {
        this.txtPosReader = new TXTPosReader(new InputStreamReader(new FileInputStream(this.currentFile), "UTF8"));
        this.textReader = new BufferedReader(this.txtPosReader);
    }

    public void openXMLFile() throws FileNotFoundException, XMLStreamException, UnsupportedEncodingException {
        XMLInputFactory newInstance = XMLInputFactory.newInstance();
        this.xmlPosReader = new XMLPosReader(new InputStreamReader(new FileInputStream(this.currentFile), "UTF8"));
        this.xmlReader = newInstance.createXMLEventReader(this.xmlPosReader);
    }

    public void checkFormat() {
        try {
            if (this.currentFile.getName().toLowerCase().endsWith(".tgt")) {
                this.format = 4;
            } else if (this.currentFile.getName().toLowerCase().endsWith(".ccl")) {
                this.format = 1;
            } else if (this.currentFile.getName().toLowerCase().endsWith(".conllu")) {
                this.format = 3;
            } else if (checkFormatTXT()) {
                this.format = 0;
            } else if (checkFormatXCES()) {
                this.format = 1;
            } else if (checkFormatTEI()) {
                this.format = 2;
            } else {
                this.format = -1;
            }
        } catch (IOException e) {
            e.printStackTrace();
            this.format = -1;
        }
    }

    public boolean checkFormatTXT() throws IOException {
        boolean z = true;
        int i = 0;
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(this.currentFile), "UTF8"));
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                break;
            }
            String trim = readLine.trim();
            if (!trim.startsWith("%%")) {
                if (trim.isEmpty() || trim.matches("&\\s*#&\\s*#interp#") || trim.matches("&\\t&\\tinterp.*")) {
                    i++;
                } else {
                    int i2 = 0;
                    int i3 = 0;
                    while (true) {
                        if (i3 >= 3) {
                            break;
                        }
                        i2 = trim.indexOf(35, i2);
                        if (i2 < 0) {
                            z = false;
                            break;
                        }
                        i2++;
                        i3++;
                    }
                    if (z) {
                        z = i2 == trim.length();
                    } else {
                        z = true;
                        int i4 = 0;
                        int i5 = 0;
                        while (true) {
                            if (i5 >= 2) {
                                break;
                            }
                            int indexOf = trim.indexOf(9, i4);
                            if (indexOf < 0) {
                                z = false;
                                break;
                            }
                            i4 = indexOf + 1;
                            i5++;
                        }
                    }
                    if (z) {
                        i++;
                    }
                }
                if (i >= 10) {
                    break;
                }
            }
        }
        bufferedReader.close();
        return z;
    }

    public boolean checkFormatXCES() throws IOException {
        return checkXML("<cesAna");
    }

    public boolean checkFormatTEI() throws IOException {
        return checkXML("<teiCorpus");
    }

    public boolean checkXML(String str) throws IOException {
        try {
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(this.currentFile), "UTF8"));
            char[] cArr = new char[TermoPLDocument.MAX_TERM_QUEUE_SIZE];
            int read = bufferedReader.read(cArr);
            bufferedReader.close();
            if (read >= 0) {
                return Pattern.compile(new StringBuilder("<\\?xml.+").append(str).toString(), 32).matcher(new String(cArr, 0, read)).find();
            }
            return false;
        } catch (IOException e) {
            e.printStackTrace();
            return false;
        }
    }

    public Token getNextToken() {
        this.previous = this.current;
        if (this.previous == Token.nullToken) {
            this.previous = null;
        }
        switch (this.format) {
            case 0:
                this.current = getTokenFromTextFile();
                break;
            case 1:
                this.current = getTokenFromXCESFile();
                break;
            case 2:
                this.current = getTokenFromTEIFile();
                break;
            case 3:
                this.current = getTokenFromCONLLUFile();
                break;
            case 4:
                this.current = getTokenFromTGTFile();
                break;
            default:
                this.current = null;
                break;
        }
        return this.current;
    }

    public Token getTokenFromTGTFile() {
        try {
            if (this.eos) {
                this.sentenceStart = this.txtPosReader.getPos();
                this.eos = false;
            }
            String readLine = this.textReader.readLine();
            while (readLine != null) {
                String trim = readLine.trim();
                if (!trim.isEmpty()) {
                    if (trim.equals("eos")) {
                        this.eos = true;
                        this.sentenceEnd = this.txtPosReader.getPos();
                        return Token.nullToken;
                    }
                    String[] split = trim.split("\\t");
                    if (split.length >= 3) {
                        if (split.length >= 4 && split[3].equals("nps") && this.previous != null) {
                            this.previous.spaceAfter = false;
                        }
                        return new Token(split[0], split[1], split[2]);
                    }
                }
                readLine = this.textReader.readLine();
            }
            this.eos = true;
            this.sentenceEnd = this.txtPosReader.getPos();
            return null;
        } catch (Exception e) {
            e.printStackTrace();
            return null;
        }
    }

    public Token getTokenFromTextFile() {
        try {
            if (this.eos) {
                this.sentenceStart = this.txtPosReader.getPos();
                this.eos = false;
            }
            String readLine = this.textReader.readLine();
            while (readLine != null) {
                String trim = readLine.trim();
                if (trim.startsWith("%%")) {
                    this.previous = null;
                    this.docNumber++;
                    this.sentenceEnd = 0L;
                    this.sentenceStart = 0L;
                } else {
                    if (trim.isEmpty() || trim.matches("&\\s*#&\\s*#interp#") || trim.matches("&\\t&\\tinterp.*")) {
                        this.eos = true;
                        this.sentenceEnd = this.txtPosReader.getPos();
                        return Token.nullToken;
                    }
                    Matcher matcher = patt.matcher(trim);
                    if (matcher.matches()) {
                        return new Token(matcher.group(1).trim(), matcher.group(2).trim(), matcher.group(3).trim());
                    }
                    String[] split = trim.split("\\t");
                    if (split.length >= 3) {
                        return new Token(split[0], split[1], split[2]);
                    }
                }
                readLine = this.textReader.readLine();
            }
            this.eos = true;
            this.sentenceEnd = this.txtPosReader.getPos();
            return null;
        } catch (Exception e) {
            e.printStackTrace();
            return null;
        }
    }

    public Token getTokenFromCONLLUFile() {
        boolean z = true;
        try {
            if (this.eos) {
                this.sentenceStart = this.txtPosReader.getPos();
                this.eos = false;
            }
            String readLine = this.textReader.readLine();
            while (readLine != null) {
                String trim = readLine.trim();
                if (trim.isEmpty()) {
                    this.eos = true;
                    this.sentenceEnd = this.txtPosReader.getPos();
                    return Token.nullToken;
                }
                if (trim.matches("\\d+.*")) {
                    String[] split = trim.split("\\t");
                    if (split[9].contains("SpaceAfter=No")) {
                        z = false;
                    } else {
                        Matcher matcher = Pattern.compile("start_char=(\\d+)\\|end_char=(\\d+)").matcher(split[9]);
                        if (matcher.find()) {
                            int parseInt = Integer.parseInt(matcher.group(1));
                            int parseInt2 = Integer.parseInt(matcher.group(2));
                            if (parseInt == this.end_char && this.previous != null) {
                                this.previous.spaceAfter = false;
                            }
                            this.end_char = parseInt2;
                        }
                    }
                    if (split[0].matches("\\d+\\-\\d+")) {
                        String[] split2 = split[0].split("\\-");
                        this.mwt = new MultiWordToken(split[1], z, Integer.parseInt(split2[0]), Integer.parseInt(split2[1]));
                        if (this.mwTokens == null) {
                            this.mwTokens = new LinkedList<>();
                        }
                        this.mwTokens.add(this.mwt);
                    } else if (split[0].matches("\\d+")) {
                        UDToken uDToken = new UDToken(split[1], split[2], split[4], split[3], split[5], split[7], Integer.parseInt(split[0]), Integer.parseInt(split[6]), z);
                        if (this.mwt != null) {
                            if (uDToken.index < this.mwt.startToken() || uDToken.index > this.mwt.endToken()) {
                                this.mwt = null;
                            } else {
                                this.mwt.addToken(uDToken);
                            }
                        }
                        if (split[9].contains("ner=S")) {
                            uDToken.ner = 'S';
                        } else if (split[9].contains("ner=B")) {
                            uDToken.ner = 'B';
                        } else if (split[9].contains("ner=I")) {
                            uDToken.ner = 'I';
                        } else if (split[9].contains("ner=E")) {
                            uDToken.ner = 'E';
                        }
                        return uDToken;
                    }
                } else if (trim.contains("newdoc")) {
                    this.previous = null;
                    this.mwt = null;
                    this.docNumber++;
                    this.sentenceEnd = 0L;
                    this.sentenceStart = 0L;
                }
                readLine = this.textReader.readLine();
            }
            this.eos = true;
            this.sentenceEnd = this.txtPosReader.getPos();
            return null;
        } catch (Exception e) {
            e.printStackTrace();
            return null;
        }
    }

    public Token getTokenFromXCESFile() {
        while (this.xmlReader.hasNext()) {
            try {
                XMLEvent nextEvent = this.xmlReader.nextEvent();
                if (nextEvent.isStartElement()) {
                    StartElement asStartElement = nextEvent.asStartElement();
                    String localPart = asStartElement.getName().getLocalPart();
                    if (localPart.equals("chunk")) {
                        Iterator attributes = asStartElement.getAttributes();
                        while (true) {
                            if (!attributes.hasNext()) {
                                break;
                            }
                            Attribute attribute = (Attribute) attributes.next();
                            if (attribute.getName().getLocalPart().equals("type") && attribute.getValue().equals("s")) {
                                this.sentenceStart = this.xmlPosReader.getPos();
                                this.previous = null;
                                break;
                            }
                        }
                    } else if (localPart.equals("sentence")) {
                        this.sentenceStart = this.xmlPosReader.getPos();
                        this.previous = null;
                    } else if (localPart.equals("tok")) {
                        String str = null;
                        String str2 = null;
                        String str3 = null;
                        boolean z = false;
                        do {
                            XMLEvent nextEvent2 = this.xmlReader.nextEvent();
                            if (nextEvent2.isStartElement()) {
                                StartElement asStartElement2 = nextEvent2.asStartElement();
                                String localPart2 = asStartElement2.getName().getLocalPart();
                                if (localPart2.equals("orth")) {
                                    str = this.xmlReader.getElementText();
                                } else if (localPart2.equals("lex")) {
                                    Iterator attributes2 = asStartElement2.getAttributes();
                                    boolean z2 = false;
                                    while (true) {
                                        if (!attributes2.hasNext()) {
                                            break;
                                        }
                                        Attribute attribute2 = (Attribute) attributes2.next();
                                        if (attribute2.getName().getLocalPart().equals("disamb") && attribute2.getValue().equals("1")) {
                                            z2 = true;
                                            break;
                                        }
                                    }
                                    if (z2) {
                                        while (true) {
                                            if (str2 != null && str3 != null) {
                                                break;
                                            }
                                            XMLEvent nextEvent3 = this.xmlReader.nextEvent();
                                            if (nextEvent3.isStartElement()) {
                                                String localPart3 = nextEvent3.asStartElement().getName().getLocalPart();
                                                if (localPart3.equals("base")) {
                                                    str2 = this.xmlReader.getElementText();
                                                } else if (localPart3.equals("ctag")) {
                                                    str3 = this.xmlReader.getElementText();
                                                }
                                            } else if (nextEvent3.isEndElement()) {
                                                String localPart4 = nextEvent3.asEndElement().getName().getLocalPart();
                                                if (localPart4.equals("tok")) {
                                                    z = true;
                                                    break;
                                                }
                                                if (localPart4.equals("lex")) {
                                                    break;
                                                }
                                            } else {
                                                continue;
                                            }
                                        }
                                        if (str2 != null && str3 != null) {
                                            z = true;
                                        }
                                    }
                                }
                            }
                        } while (!z);
                        return new Token(str, str2, str3);
                    }
                } else if (nextEvent.isEndElement()) {
                    String localPart5 = nextEvent.asEndElement().getName().getLocalPart();
                    if (localPart5.equals("chunk") || localPart5.equals("sentence")) {
                        this.sentenceEnd = this.xmlPosReader.getMark();
                        return Token.nullToken;
                    }
                    if (localPart5.equals("ns") && this.previous != null) {
                        this.previous.spaceAfter = false;
                    }
                } else {
                    continue;
                }
            } catch (XMLStreamException e) {
                e.printStackTrace();
                return null;
            }
        }
        return null;
    }

    public Token getTokenFromTEIFile() {
        while (this.xmlReader.hasNext()) {
            try {
                XMLEvent nextEvent = this.xmlReader.nextEvent();
                if (nextEvent.isStartElement()) {
                    StartElement asStartElement = nextEvent.asStartElement();
                    String localPart = asStartElement.getName().getLocalPart();
                    if (localPart.equals("s")) {
                        this.sentenceStart = this.xmlPosReader.getPos();
                        this.previous = null;
                    } else if (localPart.equals("fs")) {
                        Iterator attributes = asStartElement.getAttributes();
                        while (attributes.hasNext()) {
                            Attribute attribute = (Attribute) attributes.next();
                            if (attribute.getName().getLocalPart().equals("type") && attribute.getValue().equals("morph")) {
                                Token token = new Token();
                                getMorph(token);
                                return token;
                            }
                        }
                    } else {
                        continue;
                    }
                } else if (nextEvent.isEndElement() && nextEvent.asEndElement().getName().getLocalPart().equals("s")) {
                    this.sentenceEnd = this.xmlPosReader.getMark();
                    return Token.nullToken;
                }
            } catch (XMLStreamException e) {
                e.printStackTrace();
                return null;
            }
        }
        return null;
    }

    public void getMorph(Token token) {
        boolean z = false;
        boolean z2 = false;
        do {
            try {
                XMLEvent nextEvent = this.xmlReader.nextEvent();
                if (nextEvent.isStartElement()) {
                    StartElement asStartElement = nextEvent.asStartElement();
                    String localPart = asStartElement.getName().getLocalPart();
                    if (localPart.equals("f")) {
                        Iterator attributes = asStartElement.getAttributes();
                        while (true) {
                            if (!attributes.hasNext()) {
                                break;
                            }
                            Attribute attribute = (Attribute) attributes.next();
                            if (attribute.getName().getLocalPart().equals("name")) {
                                String value = attribute.getValue();
                                if (value.equals("orth")) {
                                    getOrth(token);
                                } else if (value.equals("interps")) {
                                    getInterps(token);
                                } else if (value.equals("nps")) {
                                    z2 = true;
                                } else if (value.equals("disamb")) {
                                    getDisamb(token);
                                    z = true;
                                    break;
                                }
                            }
                        }
                    } else if (localPart.equals("binary")) {
                        Iterator attributes2 = asStartElement.getAttributes();
                        while (attributes2.hasNext()) {
                            Attribute attribute2 = (Attribute) attributes2.next();
                            if (attribute2.getName().getLocalPart().equals("value") && attribute2.getValue().equals("true") && z2) {
                                z2 = false;
                                if (this.previous != null) {
                                    this.previous.spaceAfter = false;
                                }
                            }
                        }
                    }
                } else if (nextEvent.isEndElement() && nextEvent.asEndElement().getName().getLocalPart().equals("fs")) {
                    z = true;
                }
            } catch (XMLStreamException e) {
                e.printStackTrace();
                return;
            }
        } while (!z);
    }

    public void getOrth(Token token) {
        boolean z = false;
        do {
            try {
                XMLEvent nextEvent = this.xmlReader.nextEvent();
                if (nextEvent.isStartElement()) {
                    if (nextEvent.asStartElement().getName().getLocalPart().equals("string")) {
                        token.form = this.xmlReader.getElementText();
                        z = true;
                    }
                } else if (nextEvent.isEndElement() && nextEvent.asEndElement().getName().getLocalPart().equals("f")) {
                    z = true;
                }
            } catch (XMLStreamException e) {
                e.printStackTrace();
                return;
            }
        } while (!z);
    }

    public void getInterps(Token token) {
        boolean z = false;
        do {
            try {
                XMLEvent nextEvent = this.xmlReader.nextEvent();
                if (nextEvent.isStartElement()) {
                    StartElement asStartElement = nextEvent.asStartElement();
                    if (asStartElement.getName().getLocalPart().equals("fs")) {
                        Iterator attributes = asStartElement.getAttributes();
                        while (true) {
                            if (!attributes.hasNext()) {
                                break;
                            }
                            Attribute attribute = (Attribute) attributes.next();
                            if (attribute.getName().getLocalPart().equals("type") && attribute.getValue().equals("lex")) {
                                getLex(token);
                                break;
                            }
                        }
                    }
                } else if (nextEvent.isEndElement() && nextEvent.asEndElement().getName().getLocalPart().equals("f")) {
                    z = true;
                }
            } catch (XMLStreamException e) {
                e.printStackTrace();
                return;
            }
        } while (!z);
    }

    public void getLex(Token token) {
        boolean z = false;
        do {
            try {
                XMLEvent nextEvent = this.xmlReader.nextEvent();
                if (nextEvent.isStartElement()) {
                    StartElement asStartElement = nextEvent.asStartElement();
                    if (asStartElement.getName().getLocalPart().equals("f")) {
                        Iterator attributes = asStartElement.getAttributes();
                        while (true) {
                            if (!attributes.hasNext()) {
                                break;
                            }
                            Attribute attribute = (Attribute) attributes.next();
                            if (attribute.getName().getLocalPart().equals("name")) {
                                String value = attribute.getValue();
                                if (value.equals("base")) {
                                    getBase(token);
                                    break;
                                } else if (value.equals("ctag")) {
                                    getCTag(token);
                                } else if (value.equals("msd")) {
                                    getMSD(token);
                                    break;
                                }
                            }
                        }
                    }
                } else if (nextEvent.isEndElement() && nextEvent.asEndElement().getName().getLocalPart().equals("fs")) {
                    z = true;
                }
            } catch (XMLStreamException e) {
                e.printStackTrace();
                return;
            }
        } while (!z);
    }

    public void getBase(Token token) {
        boolean z = false;
        do {
            try {
                XMLEvent nextEvent = this.xmlReader.nextEvent();
                if (nextEvent.isStartElement()) {
                    if (nextEvent.asStartElement().getName().getLocalPart().equals("string")) {
                        token.lemma = this.xmlReader.getElementText();
                        z = true;
                    }
                } else if (nextEvent.isEndElement() && nextEvent.asEndElement().getName().getLocalPart().equals("f")) {
                    z = true;
                }
            } catch (XMLStreamException e) {
                e.printStackTrace();
                return;
            }
        } while (!z);
    }

    public void getCTag(Token token) {
        boolean z = false;
        do {
            try {
                XMLEvent nextEvent = this.xmlReader.nextEvent();
                if (nextEvent.isStartElement()) {
                    StartElement asStartElement = nextEvent.asStartElement();
                    if (asStartElement.getName().getLocalPart().equals("symbol")) {
                        Iterator attributes = asStartElement.getAttributes();
                        while (true) {
                            if (!attributes.hasNext()) {
                                break;
                            }
                            Attribute attribute = (Attribute) attributes.next();
                            if (attribute.getName().getLocalPart().equals("value")) {
                                token.ctag = attribute.getValue();
                                z = true;
                                break;
                            }
                        }
                    }
                } else if (nextEvent.isEndElement() && nextEvent.asEndElement().getName().getLocalPart().equals("f")) {
                    z = true;
                }
            } catch (XMLStreamException e) {
                e.printStackTrace();
                return;
            }
        } while (!z);
    }

    public void getMSD(Token token) {
        boolean z = false;
        do {
            try {
                XMLEvent nextEvent = this.xmlReader.nextEvent();
                if (nextEvent.isStartElement()) {
                    StartElement asStartElement = nextEvent.asStartElement();
                    if (asStartElement.getName().getLocalPart().equals("symbol")) {
                        Iterator attributes = asStartElement.getAttributes();
                        while (true) {
                            if (!attributes.hasNext()) {
                                break;
                            }
                            Attribute attribute = (Attribute) attributes.next();
                            if (attribute.getName().getLocalPart().equals("value")) {
                                token.ctag = String.valueOf(token.ctag) + Tagset.DEAULT_DELIMITER + attribute.getValue();
                                z = true;
                                break;
                            }
                        }
                    }
                } else if (nextEvent.isEndElement() && nextEvent.asEndElement().getName().getLocalPart().equals("f")) {
                    z = true;
                }
            } catch (XMLStreamException e) {
                e.printStackTrace();
                return;
            }
        } while (!z);
    }

    /* JADX WARN: Code restructure failed: missing block: B:10:0x005b, code lost:
    
        if (r0.getName().getLocalPart().equals("f") == false) goto L25;
     */
    /* JADX WARN: Code restructure failed: missing block: B:11:0x005e, code lost:
    
        r0 = r0.getAttributes();
     */
    /* JADX WARN: Code restructure failed: missing block: B:13:0x00aa, code lost:
    
        if (r0.hasNext() != false) goto L12;
     */
    /* JADX WARN: Code restructure failed: missing block: B:14:0x006a, code lost:
    
        r0 = (javax.xml.stream.events.Attribute) r0.next();
     */
    /* JADX WARN: Code restructure failed: missing block: B:15:0x0086, code lost:
    
        if (r0.getName().getLocalPart().equals("name") == false) goto L49;
     */
    /* JADX WARN: Code restructure failed: missing block: B:18:0x0096, code lost:
    
        if (r0.getValue().equals("interpretation") == false) goto L50;
     */
    /* JADX WARN: Code restructure failed: missing block: B:20:0x0099, code lost:
    
        getInterpretation(r4);
        r6 = true;
     */
    /* JADX WARN: Code restructure failed: missing block: B:27:0x00d7, code lost:
    
        if (r6 == false) goto L44;
     */
    /* JADX WARN: Code restructure failed: missing block: B:32:0x00b6, code lost:
    
        if (r0.isEndElement() == false) goto L25;
     */
    /* JADX WARN: Code restructure failed: missing block: B:34:0x00d1, code lost:
    
        if (r0.asEndElement().getName().getLocalPart().equals("fs") == false) goto L25;
     */
    /* JADX WARN: Code restructure failed: missing block: B:35:0x00d4, code lost:
    
        r6 = true;
     */
    /* JADX WARN: Code restructure failed: missing block: B:6:0x002d, code lost:
    
        if (r0.asStartElement().getName().getLocalPart().equals("fs") != false) goto L7;
     */
    /* JADX WARN: Code restructure failed: missing block: B:7:0x0030, code lost:
    
        r0 = r3.xmlReader.nextEvent();
     */
    /* JADX WARN: Code restructure failed: missing block: B:8:0x0040, code lost:
    
        if (r0.isStartElement() == false) goto L20;
     */
    /* JADX WARN: Code restructure failed: missing block: B:9:0x0043, code lost:
    
        r0 = r0.asStartElement();
     */
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    public void getDisamb(termopl.Token r4) {
        /*
            Method dump skipped, instructions count: 274
            To view this dump add '--comments-level debug' option
        */
        throw new UnsupportedOperationException("Method not decompiled: termopl.CorpusReader.getDisamb(termopl.Token):void");
    }

    public void getInterpretation(Token token) {
        boolean z = false;
        do {
            try {
                XMLEvent nextEvent = this.xmlReader.nextEvent();
                if (nextEvent.isStartElement()) {
                    if (nextEvent.asStartElement().getName().getLocalPart().equals("string")) {
                        String elementText = this.xmlReader.getElementText();
                        if (elementText.startsWith(Tagset.DEAULT_DELIMITER)) {
                            token.lemma = Tagset.DEAULT_DELIMITER;
                            token.ctag = elementText.substring(2);
                        } else {
                            int indexOf = elementText.indexOf(Tagset.DEAULT_DELIMITER);
                            token.lemma = elementText.substring(0, indexOf);
                            token.ctag = elementText.substring(indexOf + 1);
                        }
                        z = true;
                    }
                } else if (nextEvent.isEndElement() && nextEvent.asEndElement().getName().getLocalPart().equals("f")) {
                    z = true;
                }
            } catch (XMLStreamException e) {
                e.printStackTrace();
                return;
            }
        } while (!z);
    }

    public static Pair<LinkedList<Token>, LinkedList<MultiWordToken>> getSentence(RandomAccessFile randomAccessFile, int i, long j, int i2) {
        byte[] bArr = new byte[i2];
        String str = null;
        try {
            randomAccessFile.seek(j);
            randomAccessFile.read(bArr);
            str = new String(bArr, "UTF8").trim();
        } catch (Exception e) {
            e.printStackTrace();
        }
        if (i == 1) {
            str = "<sentence>" + str + "</sentence>";
        } else if (i == 2) {
            str = "<s>" + str + "</s>";
        }
        CorpusReader corpusReader = new CorpusReader(i, str);
        return new Pair<>(loadTokens(corpusReader), corpusReader.getMultiWordTokens());
    }

    public static LinkedList<Token> loadTokens(CorpusReader corpusReader) {
        LinkedList<Token> linkedList = new LinkedList<>();
        Token nextToken = corpusReader.getNextToken();
        while (true) {
            Token token = nextToken;
            if (token == null || token.stop()) {
                break;
            }
            linkedList.add(token);
            nextToken = corpusReader.getNextToken();
        }
        if (linkedList.isEmpty()) {
            return null;
        }
        return linkedList;
    }

    public static LinkedList<Token> replaceMWT(LinkedList<? extends Token> linkedList, LinkedList<MultiWordToken> linkedList2) {
        LinkedList<Token> linkedList3 = new LinkedList<>();
        linkedList3.addAll(linkedList);
        Iterator<MultiWordToken> it = linkedList2.iterator();
        while (it.hasNext()) {
            MultiWordToken next = it.next();
            Object obj = (Token) next.getTokens().getFirst();
            Object obj2 = (Token) next.getTokens().getLast();
            ListIterator<Token> listIterator = linkedList3.listIterator();
            Object obj3 = null;
            boolean z = false;
            while (listIterator.hasNext() && obj3 != obj2) {
                obj3 = (Token) listIterator.next();
                if (obj3 == obj) {
                    z = true;
                }
                if (obj3 == obj2) {
                    listIterator.set(next);
                } else if (z) {
                    listIterator.remove();
                }
            }
        }
        return linkedList3;
    }
}
