package pl.waw.ipipan.zil.core.scoreference.readers;

import java.io.File;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import org.apache.log4j.Logger;
import pl.waw.ipipan.zil.core.scoreference.basic.AnnotationPair;
import pl.waw.ipipan.zil.core.scoreference.basic.SingleTextAnnotation;
import pl.waw.ipipan.zil.core.scoreference.basic.SingleTextAnnotationImpl;
import pl.waw.ipipan.zil.nkjp.teiapi.api.entities.TEICoreference;
import pl.waw.ipipan.zil.nkjp.teiapi.api.entities.TEICorpusText;
import pl.waw.ipipan.zil.nkjp.teiapi.api.entities.TEIMention;
import pl.waw.ipipan.zil.nkjp.teiapi.api.entities.TEIMorph;
import pl.waw.ipipan.zil.nkjp.teiapi.api.entities.TEIParagraph;
import pl.waw.ipipan.zil.nkjp.teiapi.api.entities.TEISegment;
import pl.waw.ipipan.zil.nkjp.teiapi.api.entities.TEISentence;
import pl.waw.ipipan.zil.nkjp.teiapi.api.exceptions.TEIException;
import pl.waw.ipipan.zil.nkjp.teiapi.api.io.TEI_IO;

/* loaded from: input_file:main/scoreference-1.0-SNAPSHOT.jar:pl/waw/ipipan/zil/core/scoreference/readers/Tei.class */
public class Tei implements Reader {
    private static final Logger logger = Logger.getLogger(Tei.class);

    @Override // pl.waw.ipipan.zil.core.scoreference.readers.Reader
    public void loadAnnotationsFromDirs(File file, File file2, AnnotationPair annotationPair, boolean z, boolean z2) {
        ArrayList<File> arrayList = new ArrayList(Arrays.asList(file.listFiles()));
        Collections.sort(arrayList);
        for (File file3 : arrayList) {
            File file4 = new File(file2.getPath() + File.separator + file3.getName());
            if (file4.exists()) {
                if (file3.isDirectory()) {
                    loadAnnotationsFromDirs(file3, file4, annotationPair, z, z2);
                } else if (file3.getName().matches("ann_coreference.xml(\\.gz)?")) {
                    loadAnnotationsFrom2Files(file3.getParentFile(), file4.getParentFile(), annotationPair, z, z2);
                }
            } else if (file3.getName().matches("ann_coreference.xml(\\.gz)?")) {
                logger.warn("No sys anno found for gold anno: " + file3);
            }
        }
    }

    private static void loadAnnotationsFrom2Files(File file, File file2, AnnotationPair annotationPair, boolean z, boolean z2) {
        try {
            annotationPair.addPair(getAnnotation(file, z), getAnnotation(file2, z), z2);
        } catch (TEIException e) {
            logger.error(e.getLocalizedMessage());
        }
    }

    private static SingleTextAnnotation getAnnotation(File file, boolean z) throws TEIException {
        TEICorpusText readFromNKJPDirectory = TEI_IO.getInstance().readFromNKJPDirectory(file);
        SingleTextAnnotationImpl singleTextAnnotationImpl = new SingleTextAnnotationImpl(file.getPath());
        HashSet hashSet = new HashSet();
        Map<String, Integer> parOffsets = getParOffsets(readFromNKJPDirectory);
        int i = 0;
        HashSet hashSet2 = new HashSet();
        Iterator<TEISentence> it = readFromNKJPDirectory.getAllSentences().iterator();
        while (it.hasNext()) {
            for (TEIMention tEIMention : it.next().getAllMentions()) {
                if (z) {
                    tEIMention.getMorphs().clear();
                    tEIMention.getMorphs().addAll(tEIMention.getHeadMorphs());
                }
                String mentionId = getMentionId(tEIMention, parOffsets);
                if (hashSet2.contains(mentionId)) {
                    logger.debug("Duplicate mention " + tEIMention.getId() + " in text " + file.getName() + ". A mention with exact same borders exists! Will be skipped for evaluation.");
                    i++;
                } else {
                    hashSet2.add(mentionId);
                }
            }
        }
        HashSet hashSet3 = new HashSet();
        int i2 = 0;
        for (TEICoreference tEICoreference : readFromNKJPDirectory.getAllCoreferences()) {
            if (tEICoreference.getType().equals("ident")) {
                HashSet hashSet4 = new HashSet();
                for (TEIMention tEIMention2 : tEICoreference.getMentions()) {
                    String mentionId2 = getMentionId(tEIMention2, parOffsets);
                    if (!hashSet3.contains(mentionId2)) {
                        hashSet3.add(mentionId2);
                        hashSet4.add(mentionId2);
                        if (tEIMention2.isZeroSubject()) {
                            hashSet.add(mentionId2);
                        }
                        i2++;
                    }
                }
                if (hashSet4.size() > 0) {
                    singleTextAnnotationImpl.addMentionGroup((String[]) hashSet4.toArray(new String[0]));
                }
            }
        }
        int i3 = 0;
        Iterator<TEISentence> it2 = readFromNKJPDirectory.getAllSentences().iterator();
        while (it2.hasNext()) {
            for (TEIMention tEIMention3 : it2.next().getAllMentions()) {
                String mentionId3 = getMentionId(tEIMention3, parOffsets);
                if (!hashSet3.contains(mentionId3)) {
                    hashSet3.add(mentionId3);
                    singleTextAnnotationImpl.addSingletons(mentionId3);
                    i3++;
                    if (tEIMention3.isZeroSubject()) {
                        hashSet.add(mentionId3);
                    }
                }
            }
        }
        singleTextAnnotationImpl.setZeroSubjectIds(hashSet);
        logger.debug(i2 + " non-singletons, " + i3 + " singletons, " + i + " skipped duplicates in text " + file.getName());
        return singleTextAnnotationImpl;
    }

    private static Map<String, Integer> getParOffsets(TEICorpusText tEICorpusText) {
        HashMap hashMap = new HashMap();
        int i = 0;
        for (TEIParagraph tEIParagraph : tEICorpusText.getParagraphs()) {
            hashMap.put(tEIParagraph.getId(), Integer.valueOf(i));
            for (TEIMorph tEIMorph : tEIParagraph.getMorphs()) {
                i += tEIMorph.getCorrespSegment().getLength() + 1;
                if (tEIMorph.hasNps()) {
                    i--;
                }
            }
        }
        return hashMap;
    }

    private static String getMentionId(TEIMention tEIMention, Map<String, Integer> map) {
        StringBuffer stringBuffer = new StringBuffer();
        Iterator<TEIMorph> it = tEIMention.getMorphs().iterator();
        while (it.hasNext()) {
            TEISegment correspSegment = it.next().getCorrespSegment();
            stringBuffer.append(((correspSegment.getOffset() + map.get(correspSegment.getParagraph().getId()).intValue()) + ":" + correspSegment.getLength()) + "#");
        }
        return stringBuffer.toString();
    }
}
