package pl.waw.ipipan.zil.core.md.detection.zero;

import ipipan.clarin.tei.api.entities.TEIMention;
import ipipan.clarin.tei.api.entities.TEIMorph;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import pl.waw.ipipan.zil.core.md.entities.Mention;
import pl.waw.ipipan.zil.core.md.entities.Sentence;
import pl.waw.ipipan.zil.core.md.entities.SyntacticGroup;
import pl.waw.ipipan.zil.core.md.entities.SyntacticWord;
import pl.waw.ipipan.zil.core.md.entities.Token;

/* loaded from: input_file:main/md-1.2-SNAPSHOT.jar:pl/waw/ipipan/zil/core/md/detection/zero/FeatureGeneration.class */
public class FeatureGeneration {
    private static final Set<String> CLAUSE_SPLIT_LEMMAS = new HashSet(Arrays.asList("i", "albo", "lub", "oraz", "bądź", "ani", "czy", "niż", "tudzież", ",", ";", "-", "–", ":"));
    private static final Set<String> CLAUSE_SPLIT_LEMMAS2 = new HashSet(Arrays.asList("a", "ale", "lecz", "jednak", "jednakże", "zaś", "wszakże", "owszem", "natomiast", "tylko", "dlatego", "jedynie", "przecież", "tymczasem", "ponieważ", "więc", "dlatego", "toteż", "zatem"));
    private static final Set<String> CLAUSE_SPLIT_LEMMAS_STRICT = new HashSet(Arrays.asList("?", "!"));
    private static final Map<String, String> CLAUSE_SPLIT_LEMMAS_PAIRWISE = new HashMap();
    private static final Set<String> NOUN_TAGS;
    private static final Set<String> PRONOUN_TAGS;
    private static final Set<String> VERB_TAGS;
    private static final Set<String> ZAIMKI_WZGLEDNE_LEMMAS;

    static {
        CLAUSE_SPLIT_LEMMAS_PAIRWISE.put("(", ")");
        CLAUSE_SPLIT_LEMMAS_PAIRWISE.put("\"", "\"");
        CLAUSE_SPLIT_LEMMAS_PAIRWISE.put("'", "'");
        NOUN_TAGS = new HashSet(Arrays.asList("subst", "depr", "ppron12", "ppron3", "ger", "num", "numcol"));
        PRONOUN_TAGS = new HashSet(Arrays.asList("ppron12", "ppron3"));
        VERB_TAGS = new HashSet(Arrays.asList("fin", "bedzie", "aglt", "praet", "winien"));
        ZAIMKI_WZGLEDNE_LEMMAS = new HashSet(Arrays.asList("jaki", "który"));
    }

    public static void generateFeatures(Map<String, Object> map, Token token, Sentence sentence, Set<String> set) {
        map.put("verbCtag", token.getChosenInterpretation().getCtag());
        map.put("verbNumber", token.getChosenInterpretation().getNumber());
        map.put("verbGender", token.getChosenInterpretation().getGender());
        map.put("verbPerson", token.getChosenInterpretation().getPerson());
        map.put("quasi", Boolean.valueOf(set.contains(token.getChosenInterpretation().getBase())));
        map.put("nextCtag", getNeighbouringTag(sentence, token, 1));
        map.put("prevCtag", getNeighbouringTag(sentence, token, -1));
        map.put("isPrevPraet", Boolean.valueOf(isPrevPraet(token, sentence)));
        map.put("isPrevComma", isPrevComma(token, sentence));
        map.put("isPrev2Pred", Boolean.valueOf(isPrev2Pred(token, sentence)));
        map.put("isNextInf", Boolean.valueOf(isNextInf(token, sentence)));
        List<Token> clause = getClause(sentence, token);
        map.put("sentLength", Integer.valueOf(sentence.size()));
        map.put("clauseLength", Integer.valueOf(clause.size()));
        addFeatures(map, clause, "clause", token);
        addFeatures(map, sentence, "sent", token);
        for (int i = 1; i < 6; i++) {
            addFeatures(map, getWindow(sentence, token, i, 0), "window_" + i + "_0", token);
        }
        for (int i2 = 1; i2 < 6; i2++) {
            addFeatures(map, getWindow(sentence, token, 0, i2), "window_0_" + i2, token);
        }
        for (int i3 = 1; i3 < 6; i3++) {
            addFeatures(map, getWindow(sentence, token, i3, i3), "window_" + i3 + "_" + i3, token);
        }
    }

    private static boolean isNextInf(Token token, Sentence sentence) {
        boolean z = false;
        Iterator<Token> it = sentence.iterator();
        while (it.hasNext()) {
            Token next = it.next();
            if (z) {
                return next.getChosenInterpretation().getCtag().equals("inf");
            }
            if (token.equals(next)) {
                z = true;
            }
        }
        return false;
    }

    private static boolean isPrev2Pred(Token token, Sentence sentence) {
        Token token2 = null;
        Token token3 = null;
        Iterator<Token> it = sentence.iterator();
        while (it.hasNext()) {
            Token next = it.next();
            if (token.equals(next)) {
                break;
            }
            token3 = token2;
            token2 = next;
        }
        if (token2 == null || !token2.getChosenInterpretation().getCtag().equals("pred")) {
            return token3 != null && token3.getChosenInterpretation().getCtag().equals("pred");
        }
        return true;
    }

    private static Object isPrevComma(Token token, Sentence sentence) {
        Token token2 = null;
        Iterator<Token> it = sentence.iterator();
        while (it.hasNext()) {
            Token next = it.next();
            if (token.equals(next)) {
                break;
            }
            token2 = next;
        }
        return token2 != null && token2.getChosenInterpretation().getBase().equals(",");
    }

    private static String getNeighbouringTag(Sentence sentence, Token token, int i) {
        int indexOf = sentence.indexOf(token) + i;
        return (indexOf >= sentence.size() || indexOf < 0) ? "None" : sentence.get(indexOf).getChosenInterpretation().getCtag();
    }

    private static void addFeatures(Map<String, Object> map, List<Token> list, String str, Token token) {
        boolean z = false;
        boolean z2 = false;
        boolean z3 = false;
        boolean z4 = false;
        boolean z5 = false;
        boolean z6 = false;
        boolean z7 = false;
        boolean z8 = false;
        boolean z9 = false;
        boolean z10 = false;
        Token token2 = null;
        for (Token token3 : list) {
            if (!isNoun(token3) || isJakJako(token2)) {
                token2 = token3;
            } else {
                if (isNom(token3)) {
                    if (z) {
                        z8 = true;
                    }
                    z = true;
                }
                if (agreedNum(token3, token)) {
                    z2 = true;
                }
                if (agreedGenderOrPerson(token3, token)) {
                    if (z3) {
                        z10 = true;
                    }
                    z3 = true;
                }
                if (isNom(token3) && agreedNum(token3, token)) {
                    if (agreedGenderOrPerson(token3, token)) {
                        z7 = true;
                    }
                    z4 = true;
                }
                if (agreedGenderOrPerson(token3, token)) {
                    if (isNom(token3)) {
                        if (z6) {
                            z9 = true;
                        }
                        z6 = true;
                    } else if (agreedNum(token3, token)) {
                        z5 = true;
                    }
                }
                token2 = token3;
            }
        }
        map.put("cand_2_nom_" + str, Boolean.valueOf(z8));
        map.put("cand_2_POG_" + str, Boolean.valueOf(z10));
        map.put("cand_2_nom+POG_" + str, Boolean.valueOf(z9));
        map.put("cand_nom_" + str, Boolean.valueOf(z));
        map.put("cand_num_" + str, Boolean.valueOf(z2));
        map.put("cand_POG_" + str, Boolean.valueOf(z3));
        map.put("cand_nom+num_" + str, Boolean.valueOf(z4));
        map.put("cand_nom+num+POG_" + str, Boolean.valueOf(z7));
        map.put("cand_nom+POG_" + str, Boolean.valueOf(z6));
        map.put("cand_num+POG_" + str, Boolean.valueOf(z5));
    }

    private static List<Token> getWindow(Sentence sentence, Token token, int i, int i2) {
        int indexOf = sentence.indexOf(token);
        return new ArrayList(sentence.subList(Math.max(0, indexOf - i), Math.min(sentence.size(), indexOf + i2 + 1)));
    }

    private static boolean isPrevPraet(Token token, Sentence sentence) {
        Token token2 = null;
        Iterator<Token> it = sentence.iterator();
        while (it.hasNext()) {
            Token next = it.next();
            if (token.equals(next)) {
                break;
            }
            token2 = next;
        }
        return token2 != null && token2.getChosenInterpretation().getCtag().equals("praet");
    }

    public static List<Token> getClause(Sentence sentence, Token token) {
        for (List<Token> list : getClauses(sentence)) {
            Iterator<Token> it = list.iterator();
            while (it.hasNext()) {
                if (it.next().equals(token)) {
                    return list;
                }
            }
        }
        return null;
    }

    public static List<List<Token>> getClauses(Sentence sentence) {
        HashSet hashSet = new HashSet();
        for (SyntacticGroup syntacticGroup : sentence.getGroups()) {
            Iterator<Token> it = syntacticGroup.getTokens().subList(0, syntacticGroup.getTokens().size() - 1).iterator();
            while (it.hasNext()) {
                hashSet.add(it.next());
            }
        }
        for (SyntacticWord syntacticWord : sentence.getSyntacticWords()) {
            Iterator<Token> it2 = syntacticWord.getTokens().subList(0, syntacticWord.getTokens().size() - 1).iterator();
            while (it2.hasNext()) {
                hashSet.add(it2.next());
            }
        }
        LinkedList linkedList = new LinkedList();
        ArrayList arrayList = new ArrayList();
        boolean z = false;
        Iterator<Token> it3 = sentence.iterator();
        while (it3.hasNext()) {
            Token next = it3.next();
            String base = next.getChosenInterpretation().getBase();
            if (!hashSet.contains(next) && (CLAUSE_SPLIT_LEMMAS_STRICT.contains(base) || ((CLAUSE_SPLIT_LEMMAS.contains(base) || CLAUSE_SPLIT_LEMMAS2.contains(base)) && z))) {
                linkedList.add(arrayList);
                arrayList = new ArrayList();
                z = false;
            } else if (isVerb(next)) {
                z = true;
            }
            arrayList.add(next);
        }
        if (arrayList.size() > 0) {
            if (z) {
                linkedList.add(arrayList);
            } else {
                ((List) linkedList.getLast()).addAll(arrayList);
            }
        }
        List list = null;
        Iterator it4 = linkedList.iterator();
        while (it4.hasNext()) {
            List list2 = (List) it4.next();
            boolean z2 = false;
            int i = 1;
            Iterator it5 = list2.iterator();
            while (true) {
                if (!it5.hasNext()) {
                    break;
                }
                Token token = (Token) it5.next();
                if (i > 2) {
                    break;
                }
                if (ZAIMKI_WZGLEDNE_LEMMAS.contains(token.getChosenInterpretation().getBase())) {
                    z2 = true;
                    break;
                }
                i++;
            }
            if (list == null || !z2) {
                list = list2;
            } else {
                list.addAll(list2);
                it4.remove();
            }
        }
        return linkedList;
    }

    private static boolean agreedNum(Token token, Token token2) {
        return token2.getNumber().equals(token.getNumber());
    }

    private static boolean agreedGenderOrPerson(Token token, Token token2) {
        if (isPraet(token2)) {
            return token2.getGender().equals(token.getGender());
        }
        return (PRONOUN_TAGS.contains(token) ? token.getPerson() : "ter").equals(token2.getPerson());
    }

    private static boolean isJakJako(Token token) {
        String base = token == null ? null : token.getBase();
        if (token != null) {
            return base.equals("jak") || base.equals("jako");
        }
        return false;
    }

    private static boolean isPraet(Token token) {
        return token.getCtag().equals("praet");
    }

    private static boolean isNom(Token token) {
        return "nom".equals(token.getCase());
    }

    private static boolean isNoun(Token token) {
        return NOUN_TAGS.contains(token.getCtag());
    }

    public static boolean isVerb(Token token) {
        return VERB_TAGS.contains(token.getCtag());
    }

    public static boolean isVerb(Mention mention) {
        boolean z = true;
        Iterator<Token> it = mention.getSegments().iterator();
        while (true) {
            if (!it.hasNext()) {
                break;
            }
            if (!isVerb(it.next())) {
                z = false;
                break;
            }
        }
        return z;
    }

    public static boolean isVerb(TEIMention tEIMention) {
        boolean z = true;
        Iterator<TEIMorph> it = tEIMention.getMorphs().iterator();
        while (true) {
            if (!it.hasNext()) {
                break;
            }
            if (!isVerb(it.next())) {
                z = false;
                break;
            }
        }
        return z;
    }

    private static boolean isVerb(TEIMorph tEIMorph) {
        return VERB_TAGS.contains(tEIMorph.getChosenInterpretation().getCtag());
    }
}
