/*
 * Decompiled with CFR 0.152.
 */
package ipipan.spejd.readers;

import ipipan.spejd.entities.Entity;
import ipipan.spejd.entities.Interpretation;
import ipipan.spejd.entities.NoSpace;
import ipipan.spejd.entities.Segment;
import ipipan.spejd.readers.Reader;
import ipipan.spejd.util.Config;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.PrintStream;
import java.nio.charset.Charset;
import java.util.Collection;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Locale;
import morfologik.stemmers.Stempelator;

public class PlainTextReader
extends Reader {
    private Stempelator stemmer;
    private String fileContents;
    private String lastWord;
    private int position;
    private boolean haveMoreSentences;
    private boolean inNoSpace;
    private boolean inEndOfSentence = false;
    private boolean atBeginningOfFile = true;
    public int ogonkified = 0;
    public int ogonkified_total = 0;

    boolean isPunctuation(char ch) {
        int type = Character.getType(ch);
        return type == 20 || type == 21 || type == 22 || type == 23 || type == 24 || type == 30;
    }

    boolean isSentenceBreak(char ch) {
        return ch == '.' || ch == '?' || ch == '!';
    }

    public PlainTextReader(Config conf, Stempelator stemmer) {
        this.conf = conf;
        this.stemmer = stemmer;
        this.position = 0;
        this.haveMoreSentences = true;
    }

    public String readTextFile(String fileName, Charset inputCharset) throws Error {
        this.haveMoreSentences = true;
        this.position = 0;
        this.atBeginningOfFile = true;
        this.fileContents = "";
        try {
            int count;
            int BUFFER_SIZE = 16384;
            char[] buffer = new char[BUFFER_SIZE];
            System.setProperty("file.encoding", inputCharset.name());
            FileInputStream fis = new FileInputStream(fileName);
            InputStreamReader r = new InputStreamReader((InputStream)fis, inputCharset);
            while ((count = ((java.io.Reader)r).read(buffer, 0, BUFFER_SIZE)) >= 0) {
                this.fileContents = String.valueOf(this.fileContents) + new String(buffer, 0, count);
            }
            fis.close();
            ((java.io.Reader)r).close();
        }
        catch (IOException ioe) {
            throw new Error("Error reading text file: " + ioe.getMessage());
        }
        return this.fileContents;
    }

    private Collection<String> splitInterpretation(String interpretation) {
        HashSet<String> retVal = new HashSet<String>();
        String[] parts = interpretation.split("[|]");
        int i = 0;
        while (i < parts.length) {
            String[] colons = parts[i].split("[:]");
            this.iterateAndAdd(colons, 0, retVal, "");
            ++i;
        }
        return retVal;
    }

    private void iterateAndAdd(String[] table, int index, Collection<String> retVal, String soFar) {
        if (index >= table.length) {
            retVal.add(soFar.substring(1));
        } else {
            String[] dots = table[index].split("[.]");
            int j = 0;
            while (j < dots.length) {
                this.iterateAndAdd(table, index + 1, retVal, String.valueOf(soFar) + ":" + dots[j]);
                ++j;
            }
        }
    }

    private LinkedList<Interpretation> ogonkify(String orth) {
        List<String> candidates = this.generateCandidates(orth);
        LinkedList<Interpretation> new_interpretations = new LinkedList<Interpretation>();
        for (String candidate : candidates) {
            String[] interps = this.stemmer.stemAndForm(candidate.toLowerCase(new Locale("pl", "PL")));
            if (interps == null) continue;
            int j = 0;
            while (j < interps.length - 1) {
                if (interps[j] != null && interps[j + 1] != null) {
                    ++this.ogonkified;
                    ++this.ogonkified_total;
                    String[] stringArray = interps[j + 1].split("\\+");
                    int n = stringArray.length;
                    int n2 = 0;
                    while (n2 < n) {
                        String interp_tags = stringArray[n2];
                        String[] stringArray2 = this.conf.tagset.cToFtagArray(interp_tags);
                        int n3 = stringArray2.length;
                        int n4 = 0;
                        while (n4 < n3) {
                            String ftags = stringArray2[n4];
                            new_interpretations.add(new Interpretation(String.valueOf(ftags) + interps[j], false, this.conf));
                            ++n4;
                        }
                        ++n2;
                    }
                }
                j += 2;
            }
        }
        if (candidates.size() > 1) {
            --this.ogonkified;
            --this.ogonkified_total;
        }
        return new_interpretations;
    }

    public String getNextToken(int pos) {
        Character ch;
        int startfrom = pos;
        StringBuilder sb = new StringBuilder();
        while (pos < this.fileContents.length() && Character.isLetter((ch = Character.valueOf(this.fileContents.charAt(pos))).charValue()) && startfrom + 5 > pos) {
            sb.append(ch);
            ++pos;
        }
        return sb.toString();
    }

    @Override
    public Entity loadToken() {
        String orth = "";
        boolean inWord = false;
        while (this.position < this.fileContents.length()) {
            if (this.inEndOfSentence) {
                this.inEndOfSentence = false;
                return null;
            }
            Character ch = Character.valueOf(this.fileContents.charAt(this.position));
            if (Character.isWhitespace(ch.charValue())) {
                this.inNoSpace = false;
            } else if (this.inNoSpace) {
                this.inNoSpace = false;
                return new NoSpace(this.conf);
            }
            ++this.position;
            if (Character.isLetterOrDigit(ch.charValue())) {
                if (!inWord) {
                    inWord = true;
                }
                orth = String.valueOf(orth) + ch;
                continue;
            }
            if (this.isPunctuation(ch.charValue()) && !this.isSentenceBreak(ch.charValue())) {
                if (inWord) {
                    this.inNoSpace = true;
                    --this.position;
                    break;
                }
                Interpretation[] punct_interp = new Interpretation[]{new Interpretation(String.valueOf(this.conf.tagset.cToFtag("interp")) + ch.toString(), true, this.conf)};
                this.inNoSpace = true;
                return new Segment(null, ch.toString(), punct_interp, this.conf);
            }
            if (this.isSentenceBreak(ch.charValue())) {
                String nextTok;
                this.inNoSpace = true;
                if (inWord && orth.length() > 0) {
                    --this.position;
                    break;
                }
                boolean nextSentence = false;
                if (this.lastWord != null) {
                    nextSentence = true;
                    if (ch.charValue() == '.' && this.conf.acronymsAfter.contains(this.lastWord.toLowerCase())) {
                        nextSentence = false;
                    }
                }
                if (nextSentence) {
                    if (this.fileContents.length() > this.position + 1) {
                        char cr = this.fileContents.charAt(this.position);
                        if (this.isSentenceBreak(cr)) {
                            nextSentence = false;
                        }
                    } else {
                        nextSentence = false;
                    }
                }
                if (nextSentence && (nextTok = this.getNextToken(this.position)) != null && ch.charValue() == '.' && this.conf.acronymsBefore.contains(nextTok.toLowerCase())) {
                    nextSentence = false;
                }
                if (nextSentence) {
                    this.lastWord = null;
                    this.inEndOfSentence = true;
                } else {
                    this.inEndOfSentence = false;
                }
                Interpretation[] punct_interp = new Interpretation[]{new Interpretation(String.valueOf(this.conf.tagset.cToFtag("interp")) + ch.toString(), true, this.conf)};
                return new Segment(null, ch.toString(), punct_interp, this.conf);
            }
            if (inWord) break;
        }
        if (this.position > this.fileContents.length() - 1 && orth.length() == 0) {
            this.haveMoreSentences = false;
            this.inEndOfSentence = false;
            this.lastWord = null;
            this.inNoSpace = false;
            inWord = false;
            return null;
        }
        this.lastWord = orth;
        String[] interps = this.stemmer.stemAndForm(orth);
        if (interps == null) {
            String upCased = orth.substring(1);
            upCased = String.valueOf(Character.toUpperCase(orth.charAt(0))) + upCased.toLowerCase(new Locale("pl", "PL"));
            interps = this.stemmer.stemAndForm(upCased);
        }
        LinkedList<Interpretation> interpretations = new LinkedList<Interpretation>();
        if (interps != null) {
            int j = 0;
            while (j < interps.length - 1) {
                if (interps[j] != null && interps[j + 1] != null) {
                    String[] stringArray = interps[j + 1].split("\\+");
                    int n = stringArray.length;
                    int n2 = 0;
                    while (n2 < n) {
                        String interp_tags = stringArray[n2];
                        String[] stringArray2 = this.conf.tagset.cToFtagArray(interp_tags);
                        int n3 = stringArray2.length;
                        int n4 = 0;
                        while (n4 < n3) {
                            String ftags = stringArray2[n4];
                            interpretations.add(new Interpretation(String.valueOf(ftags) + interps[j], false, this.conf));
                            ++n4;
                        }
                        ++n2;
                    }
                }
                j += 2;
            }
        }
        if (this.conf.ogonkifyStrategy == 'M' && interpretations.isEmpty() || this.conf.ogonkifyStrategy == 'A') {
            interpretations.addAll(this.ogonkify(orth));
        }
        if (interpretations.isEmpty()) {
            interpretations.add(new Interpretation(String.valueOf(this.conf.tagset.cToFtag("ign")) + orth, false, this.conf));
        }
        return new Segment(null, orth, interpretations.toArray(new Interpretation[0]), this.conf);
    }

    public List<String> generateCandidates(String orth) {
        LinkedList<String> candidates = new LinkedList<String>();
        candidates.add("");
        int i = 0;
        if (orth.length() >= this.conf.ogonkifyMinLength && orth.length() <= this.conf.ogonkifyMaxLength) {
            while (i < orth.length()) {
                String nowAdding = orth.substring(i, i + 1);
                LinkedList<String> newcandidates = new LinkedList<String>();
                for (String candidate : candidates) {
                    newcandidates.add(String.valueOf(candidate) + nowAdding);
                    String[] substitutions = this.conf.ogonkifySubstitutions.get(nowAdding);
                    if (substitutions == null) continue;
                    String[] stringArray = substitutions;
                    int n = substitutions.length;
                    int n2 = 0;
                    while (n2 < n) {
                        String s = stringArray[n2];
                        newcandidates.add(String.valueOf(candidate) + s);
                        ++n2;
                    }
                }
                candidates = newcandidates;
                ++i;
            }
        }
        return candidates;
    }

    public void setFileContents(String fileContents) {
        this.fileContents = fileContents;
        this.haveMoreSentences = true;
        this.position = 0;
    }

    @Override
    public boolean nextSentence(PrintStream out) {
        if (this.atBeginningOfFile) {
            out.print("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<cesAna xmlns:xlink=\"http://www.w3.org/1999/xlink\" type=\"pre_morph\" version=\"IPI-1.2\">\n<chunkList>\n");
            this.atBeginningOfFile = false;
        }
        if (!this.haveMoreSentences) {
            out.println("</chunkList>\n</cesAna>\n");
        }
        return this.haveMoreSentences;
    }

    @Override
    public void close() {
    }
}

