package pl.waw.ipipan.zil.summ.nicolas.mention;

import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import pl.waw.ipipan.zil.multiservice.thrift.types.TInterpretation;
import pl.waw.ipipan.zil.multiservice.thrift.types.TMention;
import pl.waw.ipipan.zil.multiservice.thrift.types.TParagraph;
import pl.waw.ipipan.zil.multiservice.thrift.types.TSentence;
import pl.waw.ipipan.zil.multiservice.thrift.types.TText;
import pl.waw.ipipan.zil.summ.nicolas.Constants;
import pl.waw.ipipan.zil.summ.nicolas.features.FeatureExtractor;
import pl.waw.ipipan.zil.summ.nicolas.features.FeatureHelper;
import pl.waw.ipipan.zil.summ.nicolas.features.Interpretation;
import pl.waw.ipipan.zil.summ.nicolas.utils.ResourceUtils;
import weka.core.Attribute;
import weka.core.TestInstances;
import weka.core.Utils;

/* loaded from: input_file:pl/waw/ipipan/zil/summ/nicolas/mention/MentionFeatureExtractor.class */
public class MentionFeatureExtractor extends FeatureExtractor {
    private static final String SCORE_ATTRIBUTE_NAME = "score";
    private static final String OTHER_VALUE = "other";
    private static final String NULL_VALUE = "null";
    private final List<String> frequentBases = ResourceUtils.loadFrequentBases();
    static final /* synthetic */ boolean $assertionsDisabled;

    public MentionFeatureExtractor() throws IOException {
        addNumericAttributeNormalized("chain_length");
        addNumericAttribute("text_token_count");
        addNumericAttribute("text_sent_count");
        addNumericAttribute("text_par_count");
        addNumericAttribute("text_mention_count");
        addNumericAttribute("text_cluster_count");
        Iterator it = Lists.newArrayList("mention", "chain_first_mention").iterator();
        while (it.hasNext()) {
            String str = (String) it.next();
            addNumericAttributeNormalized(str + "_index");
            addNumericAttributeNormalized(str + "_index_in_sent");
            addNumericAttributeNormalized(str + "_index_in_par");
            addNumericAttributeNormalized(str + "_index_in_chain");
            addBinaryAttribute(str + "_capitalized");
            addBinaryAttribute(str + "_all_caps");
            addNumericAttributeNormalized(str + "_char_count");
            addNumericAttributeNormalized(str + "_token_count");
            addBinaryAttribute(str + "_is_zero");
            addBinaryAttribute(str + "_is_named");
            addBinaryAttribute(str + "_is_pronoun");
            addNominalAttribute(str + "_ctag", Constants.POS_TAGS);
            addNominalAttribute(str + "_person", Lists.newArrayList(OTHER_VALUE, "null", "pri", "sec", "ter"));
            addNominalAttribute(str + "_case", Lists.newArrayList(OTHER_VALUE, "null", "nom", "acc", "dat", "gen", "loc", "inst", "voc"));
            addNominalAttribute(str + "_number", Lists.newArrayList(OTHER_VALUE, "null", "sg", "pl"));
            addNominalAttribute(str + "_gender", Lists.newArrayList(OTHER_VALUE, "null", "f", "m1", "m2", "m3", "n"));
            addBinaryAttribute(str + "_is_nested");
            addBinaryAttribute(str + "_is_nesting");
            addNumericAttributeNormalized(str + "_par_idx");
            addNumericAttributeNormalized(str + "_par_token_count");
            addNumericAttributeNormalized(str + "_par_sent_count");
            addNumericAttributeNormalized(str + "_sent_token_count");
            addNumericAttributeNormalized(str + "_sent_mention_count");
            addNumericAttributeNormalized(str + "_sent_idx");
            addNumericAttributeNormalized(str + "_sent_idx_in_par");
            addBinaryAttribute(str + "_sent_ends_with_dot");
            addBinaryAttribute(str + "_sent_ends_with_questionmark");
            Iterator<String> it2 = this.frequentBases.iterator();
            while (it2.hasNext()) {
                addBinaryAttribute(str + "_" + encodeBase(it2.next()));
            }
        }
        addNominalAttribute(SCORE_ATTRIBUTE_NAME, Lists.newArrayList("bad", "good"));
        fillSortedAttributes(SCORE_ATTRIBUTE_NAME);
    }

    private String encodeBase(String str) {
        return "base_equal_" + str.replaceAll(TestInstances.DEFAULT_SEPARATORS, "_").replaceAll("\"", "Q");
    }

    public Map<TMention, Map<Attribute, Double>> calculateFeatures(TText tText) {
        HashMap newHashMap = Maps.newHashMap();
        FeatureHelper featureHelper = new FeatureHelper(tText);
        addScoreFeature(newHashMap, featureHelper.getMentions());
        for (TMention tMention : featureHelper.getMentions()) {
            Map<Attribute, Double> map = newHashMap.get(tMention);
            addMentionAttributes(featureHelper, tMention, map, "mention");
            addMentionAttributes(featureHelper, featureHelper.getFirstChainMention(tMention), map, "chain_first_mention");
            map.put(getAttributeByName("chain_length"), Double.valueOf(featureHelper.getChainLength(tMention)));
            map.put(getAttributeByName("text_char_count"), Double.valueOf(((List) ((List) tText.getParagraphs().stream().flatMap(tParagraph -> {
                return tParagraph.getSentences().stream();
            }).collect(Collectors.toList())).stream().flatMap(tSentence -> {
                return tSentence.getTokens().stream();
            }).collect(Collectors.toList())).stream().mapToDouble(tToken -> {
                return tToken.getOrth().length();
            }).sum()));
            map.put(getAttributeByName("text_token_count"), Double.valueOf(r0.size()));
            map.put(getAttributeByName("text_sent_count"), Double.valueOf(r0.size()));
            map.put(getAttributeByName("text_par_count"), Double.valueOf(r0.size()));
            map.put(getAttributeByName("text_mention_count"), Double.valueOf(featureHelper.getMentions().size()));
            map.put(getAttributeByName("text_cluster_count"), Double.valueOf(featureHelper.getClusters().size()));
        }
        addNormalizedAttributeValues(newHashMap);
        return newHashMap;
    }

    private void addMentionAttributes(FeatureHelper featureHelper, TMention tMention, Map<Attribute, Double> map, String str) {
        TInterpretation chosenInterpretation = FeatureHelper.getChosenInterpretation(featureHelper.getMentionHeadToken(tMention));
        if (!$assertionsDisabled && chosenInterpretation.getBase().equals("")) {
            throw new AssertionError();
        }
        map.put(getAttributeByName(str + "_index"), Double.valueOf(featureHelper.getMentionIndex(tMention)));
        map.put(getAttributeByName(str + "_index_in_sent"), Double.valueOf(featureHelper.getMentionIndexInSent(tMention)));
        map.put(getAttributeByName(str + "_index_in_par"), Double.valueOf(featureHelper.getMentionIndexInPar(tMention)));
        map.put(getAttributeByName(str + "_index_in_chain"), Double.valueOf(featureHelper.getMentionIndexInChain(tMention)));
        map.put(getAttributeByName(str + "_token_count"), Double.valueOf(tMention.getChildIdsSize()));
        map.put(getAttributeByName(str + "_is_zero"), Double.valueOf(toBinary(tMention.isZeroSubject())));
        map.put(getAttributeByName(str + "_is_pronoun"), Double.valueOf(toBinary(chosenInterpretation.getCtag().matches("ppron.*"))));
        map.put(getAttributeByName(str + "_is_named"), Double.valueOf(toBinary(featureHelper.isMentionNamedEntity(tMention))));
        Interpretation interpretation = new Interpretation(chosenInterpretation);
        addNominalAttributeValue(interpretation.getCtag(), map, str + "_ctag");
        addNominalAttributeValue(interpretation.getPerson(), map, str + "_person");
        addNominalAttributeValue(interpretation.getNumber(), map, str + "_number");
        addNominalAttributeValue(interpretation.getGender(), map, str + "_gender");
        addNominalAttributeValue(interpretation.getCase(), map, str + "_case");
        map.put(getAttributeByName(str + "_is_nested"), Double.valueOf(toBinary(featureHelper.isNested(tMention))));
        map.put(getAttributeByName(str + "_is_nesting"), Double.valueOf(toBinary(featureHelper.isNesting(tMention))));
        String mentionOrth = featureHelper.getMentionOrth(tMention);
        String substring = mentionOrth.substring(0, 1);
        map.put(getAttributeByName(str + "_capitalized"), Double.valueOf(toBinary(substring.toUpperCase().equals(substring))));
        map.put(getAttributeByName(str + "_all_caps"), Double.valueOf(toBinary(mentionOrth.toUpperCase().equals(mentionOrth))));
        map.put(getAttributeByName(str + "_char_count"), Double.valueOf(mentionOrth.length()));
        TParagraph mentionParagraph = featureHelper.getMentionParagraph(tMention);
        map.put(getAttributeByName(str + "_par_idx"), Double.valueOf(featureHelper.getParIndex(mentionParagraph)));
        map.put(getAttributeByName(str + "_par_token_count"), Double.valueOf(mentionParagraph.getSentences().stream().map(tSentence -> {
            return Integer.valueOf(tSentence.getTokens().size());
        }).mapToDouble(num -> {
            return num.intValue();
        }).sum()));
        map.put(getAttributeByName(str + "_par_sent_count"), Double.valueOf(mentionParagraph.getSentences().size()));
        TSentence mentionSentence = featureHelper.getMentionSentence(tMention);
        map.put(getAttributeByName(str + "_sent_token_count"), Double.valueOf(mentionSentence.getTokensSize()));
        map.put(getAttributeByName(str + "_sent_mention_count"), Double.valueOf(mentionSentence.getMentions().size()));
        map.put(getAttributeByName(str + "_sent_idx"), Double.valueOf(featureHelper.getSentIndex(mentionSentence)));
        map.put(getAttributeByName(str + "_sent_idx_in_par"), Double.valueOf(featureHelper.getSentIndexInPar(mentionSentence)));
        map.put(getAttributeByName(str + "_sent_ends_with_dot"), Double.valueOf(toBinary(".".equals(featureHelper.getSentenceLastTokenOrth(mentionSentence)))));
        map.put(getAttributeByName(str + "_sent_ends_with_questionmark"), Double.valueOf(toBinary("?".equals(featureHelper.getSentenceLastTokenOrth(mentionSentence)))));
        String mentionBase = featureHelper.getMentionBase(tMention);
        for (String str2 : this.frequentBases) {
            map.put(getAttributeByName(str + "_" + encodeBase(str2)), Double.valueOf(toBinary(mentionBase.equals(str2))));
        }
    }

    private void addNominalAttributeValue(String str, Map<Attribute, Double> map, String str2) {
        Attribute attributeByName = getAttributeByName(str2);
        int indexOfValue = attributeByName.indexOfValue(str);
        if (indexOfValue == -1) {
            LOG.warn("{}: '{}' value not found for attribute '{}'", getClass().getSimpleName(), str, str2);
        }
        map.put(attributeByName, Double.valueOf(indexOfValue == -1 ? attributeByName.indexOfValue(OTHER_VALUE) : indexOfValue));
    }

    private void addScoreFeature(Map<TMention, Map<Attribute, Double>> map, List<TMention> list) {
        for (TMention tMention : list) {
            HashMap newHashMap = Maps.newHashMap();
            newHashMap.put(getAttributeByName(SCORE_ATTRIBUTE_NAME), Double.valueOf(Utils.missingValue()));
            map.put(tMention, newHashMap);
        }
    }

    static {
        $assertionsDisabled = !MentionFeatureExtractor.class.desiredAssertionStatus();
    }
}
