/*
 * Decompiled with CFR 0.152.
 */
package termopl;

import java.io.File;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.ListIterator;
import java.util.Set;
import javax.swing.JOptionPane;
import termopl.Bigram;
import termopl.CorpusReader;
import termopl.FileDescr;
import termopl.Form;
import termopl.MatchedFragment;
import termopl.MatchedToken;
import termopl.MultiWordToken;
import termopl.Pair;
import termopl.Preferences;
import termopl.Range;
import termopl.SentenceParser;
import termopl.SentenceRef;
import termopl.SentenceRefEx;
import termopl.Tagset;
import termopl.Template;
import termopl.Term;
import termopl.TermEx;
import termopl.TermMatcher;
import termopl.TermoPL;
import termopl.TermoPLDocument;
import termopl.Token;
import termopl.UDToken;

public class ExtractorEngine
extends Thread {
    public static final String[] HEAD_POS = new String[]{"NOUN", "PROPN"};
    public static final String[] NON_HEAD_POS = new String[]{"ADJ", "ADP", "ADV", "DET", "NUM", "SCONJ"};
    public static final String[] OBLIGATORY_REL = new String[]{"case", "case:poss", "ccomp", "compound", "compound:prt", "det", "expl:pv", "fixed", "flat", "iobj", "obj", "amod:flat", "nmod:arg", "nmod:flat", "obl:agent", "obl:arg", "xcomp"};
    public static final String[] FACULTATIVE_REL = new String[]{"acl", "advmod", "advmod:emph", "amod", "appos", "nmod", "nmod:poss", "nummod", "nummod:gov", "obl"};
    public static final String[] HEAD_PHRASE_REL = new String[]{"amod", "amod:flat", "nummod"};
    public static final int MAX_TERM_LEN = 6;
    private TermoPLDocument doc;
    private Tagset tagset;
    private CorpusReader corpusReader;
    private SentenceParser parser;
    private BigramComp bigramComparator;
    private File[] searchFiles;
    private Set<String> stopWords;
    private Set<String> commonTerms;
    private Template compPreps;
    private TermMatcher matcher;
    private TermMatcher compprepMatcher;
    private LinkedList<FileDescr> analyzedFiles;
    private LinkedList<MatchedFragment> maxTerms;
    private HashMap<String, Term> termMap;
    private HashMap<String, Term> waitingMap;
    private HashMap<String, Integer> unigrams;
    private HashMap<Bigram, Float> bigrams;
    private Term[] terms;
    private int ntok;
    private int nbigrams;
    private int nsent;
    private int fileID;
    private int acceptDET;
    private int nouns;
    private int dets;
    private int maxTermLength;
    private boolean checkDetRatio;
    private boolean cancelled;
    private boolean error;
    private boolean allowDiscontinuities;

    public ExtractorEngine(TermoPLDocument doc, File[] searchFiles) {
        this.doc = doc;
        this.tagset = doc.getPreferences().getTagset();
        this.corpusReader = new CorpusReader();
        this.parser = null;
        this.searchFiles = searchFiles;
        this.stopWords = doc.getStopWords();
        this.compPreps = doc.getCompoundPrepositions();
        this.commonTerms = doc.getCommonTerms();
        this.bigramComparator = doc.getPreferences().useNPMIMethod ? new BigramComp() : null;
        this.termMap = null;
        this.allowDiscontinuities = false;
    }

    @Override
    public void run() {
        this.acceptDET = this.doc.acceptDET();
        this.checkDetRatio = false;
        this.cancelled = false;
        this.error = false;
        this.nouns = 0;
        this.dets = 0;
        this.maxTermLength = 6;
        this.nbigrams = 0;
        this.nsent = 0;
        this.ntok = 0;
        this.fileID = 0;
        this.terms = this.doc.getTerms();
        this.matcher = new TermMatcher(this.doc.getSearchTemplate(), this.tagset);
        this.compprepMatcher = this.compPreps != null ? new TermMatcher(this.doc.getCompoundPrepositions(), this.tagset) : null;
        this.maxTerms = new LinkedList();
        if (this.doc.getPreferences().useNPMIMethod) {
            this.unigrams = new HashMap(100000);
            this.bigrams = new HashMap(400000);
        } else {
            this.unigrams = null;
            this.bigrams = null;
        }
        this.terms = null;
        if (this.doc.getPreferences().useUD) {
            this.doc.changeProgress(2);
        } else {
            this.doc.changeProgress(1);
        }
        this.termMap = new HashMap();
        this.waitingMap = new HashMap();
        this.preprocessFiles();
        if (!this.cancelled && !this.error) {
            this.processFiles();
        }
        if (this.error) {
            this.doc.interrupt();
            if (TermoPL.batchMode) {
                System.err.println("Incompatible data format with the selected search method.");
            } else {
                JOptionPane.showMessageDialog(TermoPL.dialogOwner, "Incompatible file format with the selected search method.", "Error", 0);
            }
        }
        if (this.cancelled || this.error) {
            this.termMap = null;
            this.waitingMap = null;
            this.maxTerms = null;
            this.unigrams = null;
            this.bigrams = null;
            this.terms = null;
        } else {
            if (this.doc.getPreferences().useNPMIMethod) {
                this.calcNPMI();
            }
            this.doc.changeProgress(4);
            if (this.doc.getTerms() != null) {
                boolean bf = this.doc.getPreferences().calculateBaseForms;
                Term[] termArray = this.doc.getTerms();
                int n = termArray.length;
                int n2 = 0;
                while (n2 < n) {
                    Term t = termArray[n2];
                    String key = bf ? this.doc.calcSimplifiedForm(t) : t.str;
                    this.termMap.put(key, t);
                    ++n2;
                }
            }
            this.collectTerms();
            this.maxTerms = null;
            this.unigrams = null;
            this.bigrams = null;
            if (this.termMap.size() > 0) {
                this.terms = this.termMap.values().toArray(new Term[0]);
            }
            this.waitingMap = null;
            System.gc();
        }
    }

    public void collectTerms() {
        int count = 0;
        int count1 = 0;
        int count2 = 0;
        int len = this.maxTerms.size();
        this.report(0, 0.0f);
        while (!this.maxTerms.isEmpty()) {
            MatchedFragment mf = this.maxTerms.removeFirst();
            SentenceRef sr = mf.getRef();
            int docID = mf.getDocID();
            if (this.cancelled) break;
            LinkedList<MatchedToken> tokens = mf.getMatchedTokens();
            int s = tokens.size();
            this.collectTermCandidate(tokens, docID, sr, null, null);
            if (s > 1) {
                LinkedList<Token> source = mf.getTokens();
                Range r = new Range(0, s - 1);
                Preferences prefs = this.doc.getPreferences();
                if (prefs.useNPMIMethod) {
                    Bigram[] bgs = new Bigram[s - 1];
                    Token t1 = null;
                    int i = 0;
                    for (MatchedToken mt : tokens) {
                        Token t2 = mt.token;
                        if (t1 != null) {
                            bgs[i++] = new Bigram(t1.lemma, t2.lemma, i);
                        }
                        t1 = t2;
                    }
                    if (bgs.length > 1) {
                        Arrays.sort(bgs, this.bigramComparator);
                    }
                    switch (prefs.NPMIMethod) {
                        case 1: {
                            this.collect_NPMI1(tokens, source, docID, sr, bgs, r);
                            break;
                        }
                        case 2: {
                            this.collect_NPMI2(tokens, source, docID, sr, bgs, r);
                            break;
                        }
                        case 3: {
                            this.collect_NPMI3(tokens, source, docID, sr, bgs, r);
                        }
                    }
                } else if (prefs.trimFromLeftToRight) {
                    this.collectLR(tokens, docID, sr);
                } else {
                    LinkedList<Range> ranges = new LinkedList<Range>();
                    this.collect(tokens, source, docID, sr, r, ranges);
                }
            }
            ++count;
            if (++count1 < 1000) continue;
            count1 = 0;
            if (++count2 >= 25000) {
                count2 = 0;
                System.gc();
            }
            this.report(this.termMap.size(), (float)count / (float)len);
        }
        this.report(this.termMap.size(), 1.0f);
    }

    public KeyLen getKeyFromMatchedTokens(LinkedList<MatchedToken> tokenList) {
        StringBuffer buffer = new StringBuffer();
        int len = 0;
        ListIterator it = tokenList.listIterator();
        Token prev = null;
        while (it.hasNext()) {
            Token t = ((MatchedToken)it.next()).token;
            if (prev != null) {
                if (prev.spaceAfter) {
                    buffer.append(" " + t.lemma);
                    ++len;
                } else {
                    buffer.append(t.lemma);
                }
            } else {
                buffer.append(t.lemma);
                ++len;
            }
            prev = t;
        }
        return new KeyLen(buffer.toString(), len);
    }

    public KeyLen getKeyFromTokens(LinkedList<? extends Token> ph) {
        StringBuffer buffer = new StringBuffer();
        int len = 0;
        int prevIndex = -1;
        ListIterator it = ph.listIterator();
        Token prev = null;
        while (it.hasNext()) {
            Token t = (Token)it.next();
            int index = t instanceof UDToken ? ((UDToken)t).index : ((MultiWordToken)t).endToken();
            if (prev != null) {
                if (prev.spaceAfter || index > prevIndex + 1) {
                    buffer.append(" ");
                    buffer.append(t.lemma);
                    ++len;
                } else {
                    buffer.append(t.lemma);
                }
            } else {
                buffer.append(t.lemma);
                ++len;
            }
            prev = t;
            prevIndex = index;
        }
        return new KeyLen(buffer.toString(), len);
    }

    public void collectTermCandidate(LinkedList<MatchedToken> tokenList, int docID, SentenceRef sr, String leftContext, String rightContext) {
        for (MatchedToken mt : tokenList) {
            mt.parent = null;
        }
        KeyLen keylen = this.getKeyFromMatchedTokens(tokenList);
        Preferences prefs = this.doc.getPreferences();
        if (prefs.ignoreCase) {
            keylen.key = keylen.key.toLowerCase();
        }
        if (this.commonTerms == null || !this.commonTerms.contains(keylen.key)) {
            boolean inner;
            Term term = this.termMap.get(keylen.key);
            boolean bl = inner = leftContext != null || rightContext != null;
            if (term == null) {
                term = prefs.makeGroups ? new TermEx(keylen.key, keylen.len) : new Term(keylen.key, keylen.len);
                this.termMap.put(keylen.key, term);
            }
            term.addContext(leftContext, rightContext);
            term.addForm(Form.createFromMatchedTokens(tokenList), docID, inner, prefs.collectAllForms);
            if (prefs.makeIndex) {
                term.addSentenceRef(sr);
            }
        }
    }

    public boolean cutRight(Range r_in, Range r_out, LinkedList<MatchedToken> tokenList, LinkedList<MatchedToken> removedList) {
        boolean found = false;
        while (!found && tokenList.size() > 1) {
            --r_out.right;
            MatchedToken mt = tokenList.removeLast();
            removedList.addFirst(mt);
            if (mt.token.ner == 'E' || mt.token.ner == 'I') continue;
            mt = tokenList.getLast();
            if (!mt.token.spaceAfter) continue;
            found = true;
        }
        return found;
    }

    public boolean cutLeft(Range r_in, Range r_out, LinkedList<MatchedToken> tokenList, LinkedList<MatchedToken> removedList) {
        boolean found = false;
        while (!found && tokenList.size() > 1) {
            ++r_out.left;
            MatchedToken mt = tokenList.removeFirst();
            removedList.add(mt);
            if (!mt.token.spaceAfter || mt.token.ner == 'B' || mt.token.ner == 'I') continue;
            found = true;
        }
        return found;
    }

    public boolean cutLeft(LinkedList<MatchedToken> tokenList, LinkedList<MatchedToken> removedList) {
        boolean found = false;
        while (!found && tokenList.size() > 1) {
            MatchedToken mt = tokenList.removeFirst();
            removedList.add(mt);
            if (!mt.token.spaceAfter || mt.token.ner == 'B' || mt.token.ner == 'I') continue;
            found = true;
        }
        return found;
    }

    public int cut(Range r, Bigram[] bgs, LinkedList<MatchedToken> tokenList, LinkedList<MatchedToken> phr1, LinkedList<MatchedToken> phr2) {
        byte div = 0;
        boolean found = false;
        int pos = 0;
        while (pos < bgs.length) {
            div = bgs[pos].div;
            if (div > r.left && div <= r.right) {
                Token t = tokenList.get((int)(div - 1)).token;
                if (t.ner != 'B' && t.ner != 'I' && t.spaceAfter) {
                    found = true;
                    break;
                }
            }
            ++pos;
        }
        if (found) {
            pos = 0;
            for (MatchedToken mt : tokenList) {
                if (pos <= r.right) {
                    if (pos >= r.left) {
                        if (pos < div) {
                            phr1.add(mt);
                        } else {
                            phr2.add(mt);
                        }
                    }
                    ++pos;
                    continue;
                }
                break;
            }
        } else {
            div = 0;
        }
        return div;
    }

    public void collect(LinkedList<MatchedToken> tokenList, LinkedList<Token> source, int docID, SentenceRef sr, Range r, LinkedList<Range> ranges) {
        ranges.add(r);
        if (tokenList.size() > 1) {
            String rightContext;
            String leftContext;
            TermMatcher tm = new TermMatcher(this.doc.getSearchTemplate(), this.tagset);
            LinkedList<MatchedToken> removedList = new LinkedList<MatchedToken>();
            Range rg = new Range(r.left, r.right);
            int s = source.size();
            if (this.cutRight(r, rg, tokenList, removedList) && !ranges.contains(rg)) {
                tm.setMatchedTokens(tokenList);
                if (tm.match()) {
                    leftContext = rg.left <= 0 ? null : source.get((int)(rg.left - 1)).lemma;
                    rightContext = rg.right >= s - 1 ? null : source.get((int)(rg.right + 1)).lemma;
                    this.collectTermCandidate(tm.getMatchedFragment().getMatchedTokens(), docID, sr, leftContext, rightContext);
                }
                this.collect(tokenList, source, docID, sr, rg, ranges);
            }
            tokenList.addAll(removedList);
            removedList.clear();
            rg.left = r.left;
            rg.right = r.right;
            if (this.cutLeft(r, rg, tokenList, removedList) && !ranges.contains(rg)) {
                tm.setMatchedTokens(tokenList);
                if (tm.match()) {
                    leftContext = rg.left <= 0 ? null : source.get((int)(rg.left - 1)).lemma;
                    rightContext = rg.right >= s - 1 ? null : source.get((int)(rg.right + 1)).lemma;
                    this.collectTermCandidate(tm.getMatchedFragment().getMatchedTokens(), docID, sr, leftContext, rightContext);
                }
                this.collect(tokenList, source, docID, sr, rg, ranges);
            }
            tokenList.addAll(0, removedList);
        }
    }

    public void collectLR(LinkedList<MatchedToken> tokenList, int docID, SentenceRef sr) {
        if (tokenList.size() > 1) {
            TermMatcher tm = new TermMatcher(this.doc.getSearchTemplate(), this.tagset);
            LinkedList<MatchedToken> removedList = new LinkedList<MatchedToken>();
            if (this.cutLeft(tokenList, removedList)) {
                tm.setMatchedTokens(tokenList);
                if (tm.match()) {
                    this.collectTermCandidate(tm.getMatchedFragment().getMatchedTokens(), docID, sr, removedList.getLast().token.lemma, null);
                }
                this.collectLR(tokenList, docID, sr);
            }
            tokenList.addAll(0, removedList);
        }
    }

    public void collect_NPMI1(LinkedList<MatchedToken> tokenList, LinkedList<Token> source, int docID, SentenceRef sr, Bigram[] bgs, Range r) {
        LinkedList<MatchedToken> phr2;
        LinkedList<MatchedToken> phr1;
        int div;
        if (r.left < r.right && (div = this.cut(r, bgs, tokenList, phr1 = new LinkedList<MatchedToken>(), phr2 = new LinkedList<MatchedToken>())) > 0) {
            String rightContext;
            String leftContext;
            TermMatcher tm = new TermMatcher(this.doc.getSearchTemplate(), this.tagset);
            Range rg = new Range(r.left, div - 1);
            int s = source.size();
            tm.setMatchedTokens(phr1);
            if (tm.match()) {
                leftContext = rg.left <= 0 ? null : source.get((int)(rg.left - 1)).lemma;
                rightContext = rg.right >= s - 1 ? null : source.get((int)(rg.right + 1)).lemma;
                phr1 = tm.getMatchedFragment().getMatchedTokens();
                this.collectTermCandidate(phr1, docID, sr, leftContext, rightContext);
            }
            this.collect_NPMI1(tokenList, source, docID, sr, bgs, rg);
            rg = new Range(div, r.right);
            tm.setMatchedTokens(phr2);
            if (tm.match()) {
                leftContext = rg.left <= 0 ? null : source.get((int)(rg.left - 1)).lemma;
                rightContext = rg.right >= s - 1 ? null : source.get((int)(rg.right + 1)).lemma;
                phr2 = tm.getMatchedFragment().getMatchedTokens();
                this.collectTermCandidate(phr2, docID, sr, leftContext, rightContext);
            }
            this.collect_NPMI1(tokenList, source, docID, sr, bgs, rg);
        }
    }

    public void collect_NPMI2(LinkedList<MatchedToken> tokenList, LinkedList<Token> source, int docID, SentenceRef sr, Bigram[] bgs, Range r) {
        if (r.left < r.right) {
            TermMatcher tm = new TermMatcher(this.doc.getSearchTemplate(), this.tagset);
            LinkedList<MatchedToken> phr1 = null;
            LinkedList<MatchedToken> phr2 = null;
            Range rg1 = null;
            Range rg2 = null;
            Range frg1 = null;
            Range frg2 = null;
            int pos = 0;
            byte div = 1;
            int s = source.size();
            boolean firstTry = true;
            while (phr1 == null && phr2 == null) {
                String rightContext;
                String leftContext;
                boolean found = false;
                while (pos < bgs.length) {
                    div = bgs[pos].div;
                    if (div > r.left && div <= r.right) {
                        found = true;
                        ++pos;
                        break;
                    }
                    ++pos;
                }
                if (!found) break;
                rg1 = new Range(r.left, div - 1);
                rg2 = new Range(div, r.right);
                if (firstTry) {
                    frg1 = rg1;
                    frg2 = rg2;
                    firstTry = false;
                }
                phr1 = new LinkedList<MatchedToken>();
                phr2 = new LinkedList<MatchedToken>();
                int i = 0;
                for (MatchedToken mt : tokenList) {
                    if (i > r.right) break;
                    if (i >= r.left) {
                        if (i < div) {
                            phr1.add(mt);
                        } else {
                            phr2.add(mt);
                        }
                    }
                    ++i;
                }
                tm.setMatchedTokens(phr1);
                phr1 = tm.match() ? tm.getMatchedFragment().getMatchedTokens() : null;
                tm.setMatchedTokens(phr2);
                phr2 = tm.match() ? tm.getMatchedFragment().getMatchedTokens() : null;
                if (phr1 == null && phr2 == null) continue;
                if (phr1 != null) {
                    leftContext = rg1.left <= 0 ? null : source.get((int)(rg1.left - 1)).lemma;
                    rightContext = rg1.right >= s - 1 ? null : source.get((int)(rg1.right + 1)).lemma;
                    this.collectTermCandidate(phr1, docID, sr, leftContext, rightContext);
                    this.collect_NPMI2(tokenList, source, docID, sr, bgs, rg1);
                } else {
                    this.collect_NPMI2(tokenList, source, docID, sr, bgs, rg1);
                }
                if (phr2 != null) {
                    leftContext = rg2.left <= 0 ? null : source.get((int)(rg2.left - 1)).lemma;
                    rightContext = rg2.right >= s - 1 ? null : source.get((int)(rg2.right + 1)).lemma;
                    this.collectTermCandidate(phr2, docID, sr, leftContext, rightContext);
                    this.collect_NPMI2(tokenList, source, docID, sr, bgs, rg2);
                    continue;
                }
                this.collect_NPMI2(tokenList, source, docID, sr, bgs, rg2);
            }
            if (phr1 == null && phr2 == null) {
                this.collect_NPMI2(tokenList, source, docID, sr, bgs, frg1);
                this.collect_NPMI2(tokenList, source, docID, sr, bgs, frg2);
            }
        }
    }

    public void collect_NPMI3(LinkedList<MatchedToken> tokenList, LinkedList<Token> source, int docID, SentenceRef sr, Bigram[] bgs, Range r) {
        if (r.left < r.right) {
            TermMatcher tm = new TermMatcher(this.doc.getSearchTemplate(), this.tagset);
            LinkedList<MatchedToken> phr0 = null;
            LinkedList<MatchedToken> phr1 = null;
            LinkedList<MatchedToken> phr2 = null;
            Range rg1 = null;
            Range rg2 = null;
            Range frg1 = null;
            Range frg2 = null;
            Range brg0 = null;
            Range brg1 = null;
            Range brg2 = null;
            int pos = 0;
            byte div = 1;
            int s = source.size();
            float goodNPMI = 0.0f;
            boolean checkGoodPhrase = false;
            boolean firstTry = true;
            while (phr1 == null || phr2 == null) {
                if (!checkGoodPhrase && phr0 != null) {
                    checkGoodPhrase = true;
                }
                boolean found = false;
                while (pos < bgs.length) {
                    div = bgs[pos].div;
                    if (div > r.left && div <= r.right) {
                        found = true;
                        ++pos;
                        break;
                    }
                    ++pos;
                }
                if (!found) break;
                rg1 = new Range(r.left, div - 1);
                rg2 = new Range(div, r.right);
                if (firstTry) {
                    frg1 = rg1;
                    frg2 = rg2;
                    firstTry = false;
                }
                phr1 = new LinkedList<MatchedToken>();
                phr2 = new LinkedList<MatchedToken>();
                int i = 0;
                for (MatchedToken mt : tokenList) {
                    if (i > r.right) break;
                    if (i >= r.left) {
                        if (i < div) {
                            phr1.add(mt);
                        } else {
                            phr2.add(mt);
                        }
                    }
                    ++i;
                }
                tm.setMatchedTokens(phr1);
                if (tm.match()) {
                    phr1 = tm.getMatchedFragment().getMatchedTokens();
                    if (phr0 == null) {
                        goodNPMI = this.bigrams.get(bgs[pos - 1]).floatValue();
                        phr0 = phr1;
                        brg0 = brg1 = rg1;
                        brg2 = rg2;
                    }
                } else {
                    if (phr0 == null) {
                        brg1 = rg1;
                        brg2 = rg2;
                    }
                    phr1 = null;
                }
                tm.setMatchedTokens(phr2);
                if (tm.match()) {
                    phr2 = tm.getMatchedFragment().getMatchedTokens();
                    if (phr0 != null) continue;
                    goodNPMI = this.bigrams.get(bgs[pos - 1]).floatValue();
                    phr0 = phr2;
                    brg0 = brg2 = rg2;
                    brg1 = rg1;
                    continue;
                }
                if (phr0 == null) {
                    brg1 = rg1;
                    brg2 = rg2;
                }
                phr2 = null;
            }
            if (phr1 != null && phr2 != null) {
                if (checkGoodPhrase) {
                    float betterNPMI = this.bigrams.get(bgs[pos - 1]).floatValue();
                    if (goodNPMI * (float)this.doc.getPreferences().NPMIfactor / 100.0f > betterNPMI) {
                        phr0 = null;
                    }
                } else {
                    phr0 = null;
                }
                if (phr0 == null) {
                    String leftContext = rg1.left <= 0 ? null : source.get((int)(rg1.left - 1)).lemma;
                    String rightContext = rg1.right >= s - 1 ? null : source.get((int)(rg1.right + 1)).lemma;
                    this.collectTermCandidate(phr1, docID, sr, leftContext, rightContext);
                    this.collect_NPMI3(tokenList, source, docID, sr, bgs, rg1);
                    leftContext = rg2.left <= 0 ? null : source.get((int)(rg2.left - 1)).lemma;
                    rightContext = rg2.right >= s - 1 ? null : source.get((int)(rg2.right + 1)).lemma;
                    this.collectTermCandidate(phr2, docID, sr, leftContext, rightContext);
                    this.collect_NPMI3(tokenList, source, docID, sr, bgs, rg2);
                } else {
                    String leftContext = brg0.left <= 0 ? null : source.get((int)(brg0.left - 1)).lemma;
                    String rightContext = brg0.right >= s - 1 ? null : source.get((int)(brg0.right + 1)).lemma;
                    this.collectTermCandidate(phr0, docID, sr, leftContext, rightContext);
                    this.collect_NPMI3(tokenList, source, docID, sr, bgs, brg0);
                    if (brg0 == brg2) {
                        this.collect_NPMI3(tokenList, source, docID, sr, bgs, brg1);
                    } else {
                        this.collect_NPMI3(tokenList, source, docID, sr, bgs, brg2);
                    }
                }
            } else if (phr0 != null) {
                String leftContext = brg0.left <= 0 ? null : source.get((int)(brg0.left - 1)).lemma;
                String rightContext = brg0.right >= s - 1 ? null : source.get((int)(brg0.right + 1)).lemma;
                this.collectTermCandidate(phr0, docID, sr, leftContext, rightContext);
                this.collect_NPMI3(tokenList, source, docID, sr, bgs, brg0);
                if (brg0 == brg2) {
                    this.collect_NPMI3(tokenList, source, docID, sr, bgs, brg1);
                } else {
                    this.collect_NPMI3(tokenList, source, docID, sr, bgs, brg2);
                }
            } else {
                this.collect_NPMI3(tokenList, source, docID, sr, bgs, frg1);
                this.collect_NPMI3(tokenList, source, docID, sr, bgs, frg2);
            }
        }
    }

    public void calcNPMI() {
        Iterator<Bigram> it = this.bigrams.keySet().iterator();
        while (it.hasNext() && !this.cancelled) {
            Bigram key = it.next();
            float bf = this.bigrams.get(key).floatValue() / (float)this.nbigrams;
            float uf1 = (float)this.unigrams.get(key.first).intValue() / (float)this.ntok;
            float uf2 = (float)this.unigrams.get(key.second).intValue() / (float)this.ntok;
            float npmi = (float)(Math.log(bf / (uf1 * uf2)) / -Math.log(bf));
            this.bigrams.put(key, Float.valueOf(npmi));
        }
    }

    public void preprocessFiles() {
        Preferences prefs = this.doc.getPreferences();
        if (prefs.useUD && this.doc.acceptDET() == -1) {
            if (prefs.detHandling == 1) {
                if (prefs.detectDeterminers) {
                    this.checkDetRatio = true;
                } else {
                    this.acceptDET = 1;
                }
            } else {
                this.acceptDET = prefs.detHandling == 2 ? 2 : 0;
            }
        }
        int i = 0;
        while (i < this.searchFiles.length) {
            if (this.cancelled || this.error) break;
            this.searchFiles[i] = this.preprocessFile(this.searchFiles[i]);
            ++i;
        }
        if (!this.error && this.checkDetRatio) {
            int r;
            this.acceptDET = this.nouns > 0 ? ((r = (int)Math.ceil(100.0f * (float)this.dets / (float)this.nouns)) > prefs.detRatio ? 1 : 0) : 0;
        }
    }

    public File preprocessFile(File file) {
        File outFile = file;
        this.corpusReader.setCurrentFile(file);
        this.corpusReader.checkFormat();
        int format = this.corpusReader.getFormat();
        if (format == -1) {
            boolean tag = true;
            String dir = file.getAbsoluteFile().getParent();
            String name = file.getName();
            int pos = name.lastIndexOf(".");
            if (pos > 0) {
                name = name.substring(0, pos);
            }
            outFile = new File(String.valueOf(dir) + File.separator + name + ".conllu");
            if (this.doc.getPreferences().reuseTaggedFiles && outFile.exists()) {
                tag = false;
            }
            if (tag) {
                this.runParser(this.doc.getPreferences().language, file);
            }
        }
        if (this.checkDetRatio) {
            Token t;
            this.reportPreprocessing(outFile.getName());
            this.corpusReader.openFile();
            do {
                if ((t = this.corpusReader.getNextToken()) == null || t.stop()) continue;
                if (t instanceof UDToken) {
                    UDToken tok = (UDToken)t;
                    if (tok.UDRel.equals("det")) {
                        ++this.dets;
                    }
                    if (!ExtractorEngine.headPOS(tok)) continue;
                    ++this.nouns;
                    continue;
                }
                this.error = true;
                break;
            } while (t != null && !this.cancelled);
            this.corpusReader.closeFile();
            if (this.doc.getPreferences().useUD) {
                this.doc.changeProgress(2);
            } else {
                this.doc.changeProgress(1);
            }
        }
        return outFile;
    }

    public void runParser(String language, File file) {
        this.parser = new SentenceParser(this.doc.getPreferences(), file.getAbsolutePath());
        this.reportTagging(file.getName());
        if (this.parser.getError() == null) {
            this.parser.run();
        }
        if (this.doc.getPreferences().useUD) {
            this.doc.changeProgress(2);
        } else {
            this.doc.changeProgress(1);
        }
        if (this.parser.getError() != null) {
            this.parser.report();
            this.doc.cancel();
        } else if (!this.parser.isParsed()) {
            this.parser.report("Error occured while parsing.");
            this.doc.cancel();
        }
        this.parser = null;
    }

    public void processFiles() {
        this.analyzedFiles = null;
        if (this.searchFiles != null) {
            this.analyzedFiles = new LinkedList();
            File[] fileArray = this.searchFiles;
            int n = this.searchFiles.length;
            int n2 = 0;
            while (n2 < n) {
                File file = fileArray[n2];
                if (this.cancelled) break;
                this.processFile(file);
                ++n2;
            }
        }
    }

    public void processFile(File file) {
        File outFile = null;
        this.corpusReader.setCurrentFile(file);
        this.corpusReader.checkFormat();
        int format = this.corpusReader.getFormat();
        if (format == -1) {
            String dir = file.getAbsoluteFile().getParent();
            String name = file.getName();
            int pos = name.lastIndexOf(".");
            if (pos > 0) {
                name = name.substring(0, pos);
            }
            outFile = new File(String.valueOf(dir) + File.separator + name + ".conllu");
            this.corpusReader.setCurrentFile(outFile, 3);
        }
        if (this.doc.getPreferences().useUD) {
            this.report(this.termMap.size(), 0.0f);
        } else {
            this.report(this.maxTerms.size());
        }
        this.corpusReader.openFile();
        this.analyze();
        this.corpusReader.closeFile();
        if (outFile != null) {
            if (!this.doc.getPreferences().reuseTaggedFiles) {
                outFile.delete();
            }
            this.corpusReader.setCurrentFile(file, format);
        }
        this.analyzedFiles.add(new FileDescr(file, format));
        ++this.fileID;
        if (this.doc.getPreferences().useUD) {
            this.report(this.corpusReader.getProcessedFileName(), this.termMap.size());
        } else {
            this.report(this.maxTerms.size());
        }
    }

    public void reportTagging(String fName) {
        this.doc.reportTagging(fName);
    }

    public void reportPreprocessing(String fName) {
        this.doc.reportPreprocessing(fName);
    }

    public void report(int count) {
        this.doc.report(this.corpusReader.getProcessedFileName(), count);
    }

    public void report(String fName, int count) {
        this.doc.report(fName, count);
    }

    public void report(int count, float progress) {
        this.doc.report(count, progress);
    }

    public void report(int processed, int max, float value) {
        this.doc.report(processed, max, value);
    }

    public void analyze() {
        Token t;
        LinkedList<Token> tokenList = null;
        int count1 = 0;
        int count2 = 0;
        Preferences prefs = this.doc.getPreferences();
        do {
            if ((t = this.corpusReader.getNextToken()) != null) {
                if (t.stop()) {
                    if (tokenList != null) {
                        ++this.nsent;
                        if (prefs.makeIndex) {
                            if (prefs.useUD) {
                                this.searchUD(tokenList, this.fileID, this.corpusReader.getSentenceStart(), this.corpusReader.getSentenceLength());
                            } else {
                                this.search(tokenList, this.fileID, this.corpusReader.getSentenceStart(), this.corpusReader.getSentenceLength());
                            }
                        } else if (prefs.useUD) {
                            this.searchUD(tokenList);
                        } else {
                            this.search(tokenList);
                        }
                        tokenList = null;
                        this.corpusReader.initMultiWordTokens();
                    }
                } else {
                    if (prefs.useNPMIMethod) {
                        Integer c = this.unigrams.get(t.lemma);
                        if (c == null) {
                            this.unigrams.put(t.lemma, 1);
                        } else {
                            this.unigrams.put(t.lemma, c + 1);
                        }
                        Token previous = this.corpusReader.getPreviousToken();
                        if (previous != null) {
                            Bigram bigram = new Bigram(previous.lemma, t.lemma);
                            Float f = this.bigrams.get(bigram);
                            if (f == null) {
                                this.bigrams.put(bigram, Float.valueOf(1.0f));
                            } else {
                                this.bigrams.put(bigram, Float.valueOf(f.floatValue() + 1.0f));
                            }
                            ++this.nbigrams;
                        }
                    }
                    if (tokenList == null) {
                        tokenList = new LinkedList<Token>();
                    }
                    tokenList.add(t);
                    ++this.ntok;
                }
            }
            if (++count1 < 1000) continue;
            count1 = 0;
            if (++count2 >= 25000) {
                count2 = 0;
                System.gc();
            }
            if (prefs.useUD) {
                this.report(this.corpusReader.getProcessedFileName(), this.termMap.size());
                continue;
            }
            this.report(this.maxTerms.size());
        } while (t != null && !this.cancelled);
        if (!this.cancelled && tokenList != null) {
            ++this.nsent;
            if (prefs.makeIndex) {
                if (prefs.useUD) {
                    this.searchUD(tokenList, this.fileID, this.corpusReader.getSentenceStart(), this.corpusReader.getSentenceLength());
                } else {
                    this.search(tokenList, this.fileID, this.corpusReader.getSentenceStart(), this.corpusReader.getSentenceLength());
                }
            } else if (prefs.useUD) {
                this.searchUD(tokenList);
            } else {
                this.search(tokenList);
            }
            if (prefs.useUD) {
                this.report(this.corpusReader.getProcessedFileName(), this.termMap.size());
            } else {
                this.report(this.maxTerms.size());
            }
        }
    }

    public boolean isStopWord(Token t) {
        if (this.stopWords != null) {
            String w = t.lemma;
            return this.stopWords.contains(w);
        }
        return false;
    }

    public boolean containsMWT(LinkedList<UDToken> phrase, LinkedList<MultiWordToken> mwtList) {
        if (mwtList != null) {
            for (MultiWordToken mwt : mwtList) {
                boolean s = false;
                for (UDToken t : phrase) {
                    if (s && t.index == mwt.endToken()) {
                        return true;
                    }
                    if (t.index != mwt.startToken()) continue;
                    s = true;
                }
            }
        }
        return false;
    }

    public void search(LinkedList<Token> tokenList) {
        if (this.stopWords != null || this.compPreps != null) {
            this.filterTokens(tokenList);
        }
        this.matcher.setTokens(tokenList);
        while (this.matcher.find()) {
            MatchedFragment mf = this.matcher.getMatchedFragment();
            if (this.doc.useDocID()) {
                mf.setDocID(this.corpusReader.getDocNumber());
            }
            this.maxTerms.add(mf);
        }
    }

    public void search(LinkedList<Token> tokenList, int fid, long start, int len) {
        if (this.stopWords != null || this.compPreps != null) {
            this.filterTokens(tokenList);
        }
        this.matcher.setTokens(tokenList);
        while (this.matcher.find()) {
            MatchedFragment mf = this.matcher.getMatchedFragment();
            SentenceRef ref = fid == 0 ? new SentenceRef(start, len, 0) : new SentenceRefEx(fid, start, len, 0);
            mf.setRef(ref);
            if (this.doc.useDocID()) {
                mf.setDocID(this.corpusReader.getDocNumber());
            }
            this.maxTerms.add(mf);
        }
    }

    public void searchUD(LinkedList<Token> tokenList) {
        if (this.stopWords != null || this.compPreps != null || this.doc.acceptDET() >= 0) {
            this.filterTokens(tokenList);
        }
        LinkedList<UDToken> obligatoryNodes = new LinkedList<UDToken>();
        LinkedList<UDToken> termNodes = this.getTermTokens(tokenList, obligatoryNodes);
        UDStructure struct = this.createUDStructure(tokenList, termNodes, obligatoryNodes);
        LinkedList<PhraseWithContext> accepted = this.selectTermCandidates(struct, termNodes, obligatoryNodes, tokenList);
        LinkedList<MultiWordToken> mwtList = this.corpusReader.getMultiWordTokens();
        if (accepted != null) {
            for (PhraseWithContext cntxph : accepted) {
                Term term;
                boolean inner;
                LinkedList<UDToken> phrase = cntxph.phrase();
                String context = cntxph.context();
                LinkedList<Token> ph = null;
                boolean continuous = this.isContinuous(phrase);
                if (this.containsMWT(phrase, mwtList)) {
                    ph = CorpusReader.replaceMWT(phrase, mwtList);
                }
                Preferences prefs = this.doc.getPreferences();
                KeyLen keylen = ph == null ? this.getKeyFromTokens(phrase) : this.getKeyFromTokens(ph);
                if (prefs.ignoreCase) {
                    keylen.key = keylen.key.toLowerCase();
                }
                if (this.commonTerms != null && this.commonTerms.contains(keylen.key)) continue;
                boolean bl = inner = context != null;
                if (this.allowDiscontinuities) {
                    term = this.termMap.get(keylen.key);
                    if (term == null) {
                        term = prefs.makeGroups ? new TermEx(keylen.key, keylen.len) : new Term(keylen.key, keylen.len);
                        this.termMap.put(keylen.key, term);
                    }
                } else if (continuous) {
                    term = this.waitingMap.get(keylen.key);
                    if (term != null) {
                        this.waitingMap.remove(keylen.key);
                        this.termMap.put(keylen.key, term);
                    } else {
                        term = this.termMap.get(keylen.key);
                        if (term == null) {
                            term = prefs.makeGroups ? new TermEx(keylen.key, keylen.len) : new Term(keylen.key, keylen.len);
                            this.termMap.put(keylen.key, term);
                        }
                    }
                } else {
                    term = this.termMap.get(keylen.key);
                    if (term == null && (term = this.waitingMap.get(keylen.key)) == null) {
                        term = prefs.makeGroups ? new TermEx(keylen.key, keylen.len) : new Term(keylen.key, keylen.len);
                        this.waitingMap.put(keylen.key, term);
                    }
                }
                term.addContext(context, null);
                if (ph == null) {
                    term.addForm(Form.createFromTokens(phrase), this.corpusReader.getDocNumber(), inner, prefs.collectAllForms);
                    continue;
                }
                term.addForm(Form.createFromTokens(ph), this.corpusReader.getDocNumber(), inner, prefs.collectAllForms);
            }
        }
    }

    public void searchUD(LinkedList<Token> tokenList, int fid, long start, int len) {
        if (this.stopWords != null || this.compPreps != null || this.doc.acceptDET() >= 0) {
            this.filterTokens(tokenList);
        }
        LinkedList<UDToken> obligatoryNodes = new LinkedList<UDToken>();
        LinkedList<UDToken> termNodes = this.getTermTokens(tokenList, obligatoryNodes);
        UDStructure struct = this.createUDStructure(tokenList, termNodes, obligatoryNodes);
        LinkedList<PhraseWithContext> accepted = this.selectTermCandidates(struct, termNodes, obligatoryNodes, tokenList);
        LinkedList<MultiWordToken> mwtList = this.corpusReader.getMultiWordTokens();
        if (accepted != null) {
            for (PhraseWithContext cntxph : accepted) {
                Term term;
                boolean inner;
                LinkedList<UDToken> phrase = cntxph.phrase();
                String context = cntxph.context();
                LinkedList<Token> ph = null;
                boolean continuous = this.isContinuous(phrase);
                if (this.containsMWT(phrase, mwtList)) {
                    ph = CorpusReader.replaceMWT(phrase, mwtList);
                }
                Preferences prefs = this.doc.getPreferences();
                KeyLen keylen = ph == null ? this.getKeyFromTokens(phrase) : this.getKeyFromTokens(ph);
                if (prefs.ignoreCase) {
                    keylen.key = keylen.key.toLowerCase();
                }
                if (this.commonTerms != null && this.commonTerms.contains(keylen.key)) continue;
                boolean bl = inner = context != null;
                if (this.allowDiscontinuities) {
                    term = this.termMap.get(keylen.key);
                    if (term == null) {
                        term = prefs.makeGroups ? new TermEx(keylen.key, keylen.len) : new Term(keylen.key, keylen.len);
                        this.termMap.put(keylen.key, term);
                    }
                } else if (continuous) {
                    term = this.waitingMap.get(keylen.key);
                    if (term != null) {
                        this.waitingMap.remove(keylen.key);
                        this.termMap.put(keylen.key, term);
                    } else {
                        term = this.termMap.get(keylen.key);
                        if (term == null) {
                            term = prefs.makeGroups ? new TermEx(keylen.key, keylen.len) : new Term(keylen.key, keylen.len);
                            this.termMap.put(keylen.key, term);
                        }
                    }
                } else {
                    term = this.termMap.get(keylen.key);
                    if (term == null && (term = this.waitingMap.get(keylen.key)) == null) {
                        term = prefs.makeGroups ? new TermEx(keylen.key, keylen.len) : new Term(keylen.key, keylen.len);
                        this.waitingMap.put(keylen.key, term);
                    }
                }
                if (prefs.makeIndex) {
                    SentenceRef ref = fid == 0 ? new SentenceRef(start, len, phrase.getFirst().index) : new SentenceRefEx(fid, start, len, phrase.getFirst().index);
                    term.addSentenceRef(ref);
                }
                term.addContext(context, null);
                if (ph == null) {
                    term.addForm(Form.createFromTokens(phrase), this.corpusReader.getDocNumber(), inner, prefs.collectAllForms);
                    continue;
                }
                term.addForm(Form.createFromTokens(ph), this.corpusReader.getDocNumber(), inner, prefs.collectAllForms);
            }
        }
    }

    public LinkedList<UDToken> getTermTokens(LinkedList<Token> tokenList, LinkedList<UDToken> obligatoryNodes) {
        LinkedList<UDToken> termNodes = new LinkedList<UDToken>();
        LinkedList<UDToken> hyphGroup = new LinkedList<UDToken>();
        UDToken prev = null;
        for (Token t : tokenList) {
            UDToken token = (UDToken)t;
            if (ExtractorEngine.headPOS(token) || ExtractorEngine.nonHeadPOS(token)) {
                termNodes.add(token);
                if (prev != null && prev.form.equals("-") && !prev.spaceAfter) {
                    hyphGroup.add(token);
                }
            } else if (!token.skip() && token.UDPos.equals("PRON") && token.UDTag.contains("reflex=yes")) {
                termNodes.add(token);
            } else if (token.form.equals("-")) {
                if (prev != null && !prev.spaceAfter && !token.spaceAfter) {
                    hyphGroup.add(prev);
                    hyphGroup.add(token);
                }
            } else if (prev != null && prev.form.equals("-") && !prev.spaceAfter) {
                hyphGroup.add(token);
            }
            if (token.spaceAfter && !hyphGroup.isEmpty()) {
                int s = ((UDToken)hyphGroup.getFirst()).index;
                int e = ((UDToken)hyphGroup.getLast()).index;
                for (UDToken g : hyphGroup) {
                    if (!termNodes.contains(g)) {
                        termNodes.add(g);
                    }
                    if (g.UDLink < s || g.UDLink > e) continue;
                    obligatoryNodes.add(g);
                }
                hyphGroup = new LinkedList();
            }
            prev = token;
        }
        return termNodes;
    }

    public boolean acceptDET(UDToken n) {
        if (n.UDPos.equals("DET")) {
            switch (this.acceptDET) {
                case 0: {
                    return false;
                }
                case 1: {
                    return true;
                }
                case 2: {
                    if (!n.UDTag.contains("definite=")) break;
                    return true;
                }
            }
            return false;
        }
        return true;
    }

    public UDStructure createUDStructure(LinkedList<Token> tokenList, LinkedList<UDToken> termNodes, LinkedList<UDToken> obligatoryNodes) {
        UDStructure struct = new UDStructure();
        for (Token ti : tokenList) {
            UDToken ni = (UDToken)ti;
            int j = 0;
            for (Token tj : tokenList) {
                UDToken nj = (UDToken)tj;
                if (nj == ni || ni.UDLink != ++j || !termNodes.contains(nj) || !ExtractorEngine.obligatoryRel(ni) && !ExtractorEngine.facultativeRel(ni) && !obligatoryNodes.contains(ni)) continue;
                struct.add(nj, ni);
            }
        }
        boolean done = false;
        while (!done) {
            LinkedList<UDToken> toBeRemoved = new LinkedList<UDToken>();
            done = true;
            for (UDToken e : termNodes) {
                LinkedList<UDToken> relatedNodes = struct.getRelatedNodes(e);
                if (relatedNodes == null) continue;
                ListIterator it = relatedNodes.listIterator();
                while (it.hasNext()) {
                    UDToken n = (UDToken)it.next();
                    if (ExtractorEngine.obligatoryRel(n) || obligatoryNodes.contains(n)) {
                        if (termNodes.contains(n)) continue;
                        toBeRemoved.add(e);
                        done = false;
                        continue;
                    }
                    if (termNodes.contains(n)) continue;
                    it.remove();
                }
            }
            if (done) continue;
            for (UDToken e : toBeRemoved) {
                termNodes.remove(e);
            }
        }
        return struct;
    }

    public LinkedList<PhraseWithContext> selectTermCandidates(UDStructure struct, LinkedList<UDToken> termNodes, LinkedList<UDToken> obligatoryNodes, LinkedList<Token> tokenList) {
        SubphrasesIndex phIndex = new SubphrasesIndex();
        LinkedList<PhraseWithContext> acceptedPhrases = new LinkedList<PhraseWithContext>();
        while (!termNodes.isEmpty()) {
            ListIterator it = termNodes.listIterator();
            while (it.hasNext()) {
                UDToken t = (UDToken)it.next();
                LinkedList<UDToken> list = struct.getRelatedNodes(t);
                if (list != null && !list.isEmpty()) continue;
                LinkedList<Pair<UDToken, LinkedList<PhraseWithContext>>> sub = phIndex.get(t);
                LinkedList<UDToken> ph = new LinkedList<UDToken>();
                LinkedList<UDToken> maxph = new LinkedList<UDToken>();
                LinkedList<PhraseWithContext> allPhrases = new LinkedList<PhraseWithContext>();
                ph.add(t);
                maxph.add(t);
                allPhrases.add(new PhraseWithContext(ph, maxph));
                if (sub != null) {
                    this.adjustMaxTermLength(sub);
                    ListIterator<Pair<UDToken, LinkedList<PhraseWithContext>>> subIt = sub.listIterator();
                    allPhrases = this.composePhrases(subIt, allPhrases, maxph, obligatoryNodes);
                }
                for (UDToken ut : termNodes) {
                    LinkedList<UDToken> related = struct.getRelatedNodes(ut);
                    if (related == null || !related.contains(t)) continue;
                    phIndex.add(ut, new Pair<UDToken, LinkedList<PhraseWithContext>>(t, allPhrases));
                    related.remove(t);
                    break;
                }
                for (PhraseWithContext cntxph : allPhrases) {
                    cntxph.setMaxPhrase(maxph);
                }
                if (ExtractorEngine.headPOS(t)) {
                    acceptedPhrases.addAll(allPhrases);
                }
                it.remove();
            }
        }
        for (PhraseWithContext cntxph : acceptedPhrases) {
            LinkedList<UDToken> phrase = cntxph.phrase();
            Collections.sort(phrase);
            this.trimPhrase(phrase, obligatoryNodes);
            cntxph.setPhrase(phrase);
        }
        LinkedList<LinkedList<UDToken>> allMaxPhrases = new LinkedList<LinkedList<UDToken>>();
        for (PhraseWithContext cntxph1 : acceptedPhrases) {
            LinkedList<UDToken> maxPhrase = cntxph1.maxPhrase();
            if (maxPhrase == null || allMaxPhrases.contains(maxPhrase)) continue;
            int len1 = maxPhrase.size();
            boolean found1 = false;
            this.trimPhrase(maxPhrase, obligatoryNodes);
            allMaxPhrases.add(maxPhrase);
            for (PhraseWithContext cntxph2 : acceptedPhrases) {
                LinkedList<UDToken> phrase = cntxph2.phrase();
                int len2 = phrase.size();
                if (len1 != len2) continue;
                ListIterator it1 = maxPhrase.listIterator();
                ListIterator it2 = phrase.listIterator();
                boolean found2 = true;
                while (it1.hasNext()) {
                    if (it1.next() == it2.next()) continue;
                    found2 = false;
                    break;
                }
                if (!found2) continue;
                found1 = true;
                break;
            }
            if (found1) continue;
            cntxph1.setMaxPhrase(null);
        }
        return this.filterNER(acceptedPhrases);
    }

    public void adjustMaxTermLength(LinkedList<Pair<UDToken, LinkedList<PhraseWithContext>>> sub) {
        int n = 0;
        for (Pair pair : sub) {
            if (!ExtractorEngine.facultativeRel((UDToken)pair.first)) continue;
            ++n;
        }
        this.maxTermLength = n >= 5 ? 2 : 6;
    }

    public LinkedList<PhraseWithContext> filterNER(LinkedList<PhraseWithContext> phrases) {
        ListIterator it = phrases.listIterator();
        while (it.hasNext()) {
            LinkedList<UDToken> ph = ((PhraseWithContext)it.next()).phrase();
            boolean ner = false;
            boolean bad = false;
            int index = 0;
            for (UDToken t : ph) {
                if (t.ner == 'B') {
                    if (ner) {
                        bad = true;
                        break;
                    }
                    ner = true;
                    index = t.index;
                    continue;
                }
                if (t.ner == 'I') {
                    if (ner) {
                        if (t.index > index + 1) {
                            bad = true;
                            break;
                        }
                        index = t.index;
                        continue;
                    }
                    bad = true;
                    break;
                }
                if (t.ner == 'E') {
                    if (ner) {
                        if (t.index > index + 1) {
                            bad = true;
                            break;
                        }
                        ner = false;
                        continue;
                    }
                    bad = true;
                    break;
                }
                if (!ner) continue;
                bad = true;
                break;
            }
            if (ner) {
                bad = true;
            }
            if (!bad) continue;
            it.remove();
        }
        return phrases;
    }

    LinkedList<PhraseWithContext> composePhrases(ListIterator<Pair<UDToken, LinkedList<PhraseWithContext>>> subIt, LinkedList<PhraseWithContext> allPhrases, LinkedList<UDToken> maxph, LinkedList<UDToken> obligatoryNodes) {
        if (subIt.hasNext()) {
            Pair<UDToken, LinkedList<PhraseWithContext>> pair = subIt.next();
            LinkedList subphrases = (LinkedList)pair.second;
            if (subphrases != null) {
                LinkedList<PhraseWithContext> newList = new LinkedList<PhraseWithContext>();
                for (PhraseWithContext cntxph1 : allPhrases) {
                    LinkedList<UDToken> ph1 = cntxph1.phrase();
                    int len1 = ph1.size();
                    for (PhraseWithContext cntxph2 : subphrases) {
                        LinkedList<UDToken> ph2 = cntxph2.phrase();
                        int len2 = ph2.size();
                        if (len1 + len2 <= this.maxTermLength) {
                            LinkedList<UDToken> ph = new LinkedList<UDToken>(ph1);
                            LinkedList<PhraseWithContext> subPhrases = cntxph1.subPhrases() == null ? new LinkedList<PhraseWithContext>() : new LinkedList<PhraseWithContext>(cntxph1.subPhrases());
                            subPhrases.add(cntxph2);
                            ph.addAll(ph2);
                            for (UDToken t : ph2) {
                                if (maxph.contains(t)) continue;
                                maxph.add(t);
                            }
                            newList.add(new PhraseWithContext(ph, maxph, subPhrases));
                        }
                        if (obligatoryNodes.contains(pair.first) || !ExtractorEngine.facultativeRel((UDToken)pair.first)) continue;
                        boolean found = false;
                        for (PhraseWithContext cntxph : newList) {
                            if (ph1 != cntxph.phrase()) continue;
                            found = true;
                            break;
                        }
                        if (found) continue;
                        newList.add(cntxph1);
                    }
                }
                allPhrases = this.composePhrases(subIt, newList, maxph, obligatoryNodes);
            }
        } else {
            Collections.sort(maxph);
            for (PhraseWithContext cntxph : allPhrases) {
                cntxph.setMaxPhrase(maxph);
            }
        }
        return allPhrases;
    }

    public void trimPhrase(LinkedList<UDToken> phrase, LinkedList<UDToken> obligatoryNodes) {
        ListIterator it = phrase.listIterator();
        while (it.hasNext()) {
            UDToken h;
            UDToken t = (UDToken)it.next();
            String pos = t.UDPos;
            if (obligatoryNodes.contains(t)) break;
            if (pos.equals("DET")) {
                it.remove();
            }
            if (!pos.equals("ADP")) continue;
            UDToken n = ExtractorEngine.getHeadOf(t, phrase);
            if (n != (h = ExtractorEngine.getHeadOf(phrase))) break;
            it.remove();
        }
    }

    public boolean isContinuous(LinkedList<UDToken> phrase) {
        int prevIndex = 0;
        for (UDToken t : phrase) {
            if (prevIndex > 0 && t.index > prevIndex + 1) {
                return false;
            }
            prevIndex = t.index;
        }
        return true;
    }

    public static UDToken getHeadOf(LinkedList<UDToken> phrase) {
        int s = phrase.getFirst().index;
        int e = phrase.getLast().index;
        for (UDToken token : phrase) {
            if (token.UDLink >= s && token.UDLink <= e) continue;
            return token;
        }
        return null;
    }

    public static UDToken getHeadOf(UDToken t, LinkedList<UDToken> phrase) {
        int index = t.UDLink;
        for (UDToken token : phrase) {
            if (index != token.index) continue;
            return token;
        }
        return null;
    }

    public static boolean headPOS(UDToken t) {
        if (t.skip()) {
            return false;
        }
        if (t.UDPos.equals("VERB") && t.UDTag.contains("verbform=ger")) {
            return true;
        }
        return ExtractorEngine.onTheList(t.UDPos, HEAD_POS);
    }

    public static boolean nonHeadPOS(UDToken t) {
        if (t.skip()) {
            return false;
        }
        return ExtractorEngine.onTheList(t.UDPos, NON_HEAD_POS);
    }

    public static boolean obligatoryRel(UDToken t) {
        if (t.skip()) {
            return false;
        }
        return ExtractorEngine.onTheList(t.UDRel, OBLIGATORY_REL);
    }

    public static boolean facultativeRel(UDToken t) {
        if (t.skip()) {
            return false;
        }
        return ExtractorEngine.onTheList(t.UDRel, FACULTATIVE_REL);
    }

    public static boolean headPhraseRel(UDToken t) {
        return ExtractorEngine.onTheList(t.UDRel, HEAD_PHRASE_REL);
    }

    public static boolean onTheList(String s, String[] list) {
        String[] stringArray = list;
        int n = list.length;
        int n2 = 0;
        while (n2 < n) {
            String e = stringArray[n2];
            if (e.equals(s)) {
                return true;
            }
            ++n2;
        }
        return false;
    }

    public void filterTokens(LinkedList<Token> tokenList) {
        if (this.compPreps != null) {
            this.compprepMatcher.setTokens(tokenList);
            while (this.compprepMatcher.find()) {
                LinkedList<MatchedToken> mt = this.compprepMatcher.getMatchedFragment().getMatchedTokens();
                for (MatchedToken t : mt) {
                    t.token.markAsSkipToken();
                }
            }
        }
        if (this.stopWords != null) {
            for (Token t : tokenList) {
                if (!this.isStopWord(t)) continue;
                t.markAsSkipToken();
            }
        }
        if (this.doc.getPreferences().useUD && this.acceptDET >= 0) {
            for (Token t : tokenList) {
                UDToken ut = (UDToken)t;
                if (this.acceptDET(ut)) continue;
                t.markAsSkipToken();
            }
        }
    }

    public void cancel() {
        if (this.parser != null) {
            this.parser.cancel();
        }
        this.cancelled = true;
    }

    public boolean isCancelled() {
        return this.cancelled;
    }

    public Term[] getTerms() {
        return this.terms;
    }

    public HashMap<String, Term> getTermMap() {
        return this.termMap;
    }

    public int getNumberOfSentences() {
        return this.nsent;
    }

    public int getNumberOfTokens() {
        return this.ntok;
    }

    public int getNumberOfTerms() {
        if (this.terms == null) {
            return 0;
        }
        return this.terms.length;
    }

    public LinkedList<FileDescr> getAnalyzedFiles() {
        return this.analyzedFiles;
    }

    private class BigramComp
    implements Comparator<Bigram> {
        private BigramComp() {
        }

        @Override
        public int compare(Bigram b1, Bigram b2) {
            float f2;
            float f1 = ((Float)ExtractorEngine.this.bigrams.get(b1)).floatValue();
            if (f1 < (f2 = ((Float)ExtractorEngine.this.bigrams.get(b2)).floatValue())) {
                return -1;
            }
            if (f1 > f2) {
                return 1;
            }
            return 0;
        }
    }

    private class KeyLen {
        public String key;
        public int len;

        public KeyLen(String key, int len) {
            this.key = key;
            this.len = len;
        }
    }

    private class PhraseWithContext {
        private LinkedList<UDToken> ph;
        private LinkedList<UDToken> maxph;
        private LinkedList<PhraseWithContext> subPhrases;

        public PhraseWithContext(LinkedList<UDToken> ph, LinkedList<UDToken> maxph) {
            this.ph = ph;
            this.maxph = maxph;
            this.subPhrases = null;
        }

        public PhraseWithContext(LinkedList<UDToken> ph, LinkedList<UDToken> maxph, LinkedList<PhraseWithContext> subPhrases) {
            this(ph, maxph);
            this.subPhrases = subPhrases;
        }

        public void setPhrase(LinkedList<UDToken> ph) {
            this.ph = ph;
        }

        public LinkedList<UDToken> phrase() {
            return this.ph;
        }

        public LinkedList<UDToken> maxPhrase() {
            return this.maxph;
        }

        public LinkedList<PhraseWithContext> subPhrases() {
            return this.subPhrases;
        }

        public void setMaxPhrase(LinkedList<UDToken> maxph) {
            this.maxph = maxph;
            if (this.subPhrases != null) {
                for (PhraseWithContext cntxph : this.subPhrases) {
                    cntxph.setMaxPhrase(maxph);
                }
            }
        }

        public String context() {
            if (this.maxph == null) {
                return null;
            }
            StringBuffer buffer = new StringBuffer();
            String lcntx = null;
            String rcntx = null;
            for (UDToken t : this.maxph) {
                if (this.ph.contains(t)) {
                    if (lcntx == null) continue;
                    buffer.append(lcntx);
                    rcntx = null;
                    lcntx = null;
                    continue;
                }
                if (lcntx == null) {
                    rcntx = t.lemma;
                }
                lcntx = t.lemma;
            }
            if (rcntx != null) {
                buffer.append(rcntx);
            }
            if (buffer.length() == 0) {
                return null;
            }
            return buffer.toString();
        }
    }

    private class SubphrasesIndex {
        private HashMap<Integer, LinkedList<Pair<UDToken, LinkedList<PhraseWithContext>>>> phIndex = new HashMap();

        public void add(UDToken t, Pair<UDToken, LinkedList<PhraseWithContext>> pair) {
            LinkedList<Pair<UDToken, LinkedList<PhraseWithContext>>> subphrases = this.phIndex.get(t.index);
            if (subphrases == null) {
                subphrases = new LinkedList();
                this.phIndex.put(t.index, subphrases);
            }
            subphrases.add(pair);
        }

        public LinkedList<Pair<UDToken, LinkedList<PhraseWithContext>>> get(UDToken t) {
            return this.phIndex.get(t.index);
        }
    }

    private class UDStructure {
        private HashMap<Integer, LinkedList<UDToken>> struct = new HashMap();

        public void add(UDToken t1, UDToken t2) {
            LinkedList<UDToken> list = this.struct.get(t1.index);
            if (list == null) {
                list = new LinkedList();
                this.struct.put(t1.index, list);
            }
            list.add(t2);
        }

        public LinkedList<UDToken> getRelatedNodes(UDToken t) {
            return this.struct.get(t.index);
        }
    }
}

