/*
 * Decompiled with CFR 0.152.
 */
package utils;

import corpusapi.ContinueMode;
import corpusapi.Corpus;
import corpusapi.CorpusFactory;
import corpusapi.SenseSegmentGroup;
import corpusapi.tei.TEICorpus;
import corpusapi.tei.TEICorpusText;
import corpusapi.tei.TEISegment;
import corpusapi.tei.TEISenseEntry;
import corpusapi.tei.TEISenseInventory;
import corpusapi.tei.TEISenseSegmentGroup;
import evaluation.AnnotationStats;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
import utils.Counter;
import utils.FileManager;

public class CorpusManager {
    public static TEICorpus getCorpusFromConfigFile(String configFilePath) throws Exception {
        System.out.println("Loading corpus from: " + configFilePath);
        CorpusFactory factory = CorpusFactory.getInstance();
        TEICorpus corpus = (TEICorpus)factory.getCorpus(configFilePath, true);
        corpus.open();
        System.out.println("Corpus with " + corpus.getCorpusTextIds().size() + " texts loaded.");
        return corpus;
    }

    public static TEICorpus getWypluwkaForFinalEvaluation() throws Exception {
        String configFile = "data/corpora/wypluwkaPart2.xml";
        return CorpusManager.getCorpusFromConfigFile("data/corpora/wypluwkaPart2.xml");
    }

    public static TEICorpus getWypluwkaForDevelopment() throws Exception {
        String configFile = "data/corpora/wypluwkaPart1.xml";
        return CorpusManager.getCorpusFromConfigFile("data/corpora/wypluwkaPart1.xml");
    }

    public static void getSampleFromCorpus(TEICorpus c, int textCount, String targetPath) {
        Random r = new Random(1L);
        List cids = c.getCorpusTextIds();
        while (cids.size() > textCount) {
            cids.remove(r.nextInt(cids.size()));
        }
        System.out.println("Randomly selected a sample of " + textCount + " texts.");
        System.out.println("Copying selected texts into " + targetPath);
        for (String tid : cids) {
            TEICorpusText t = c.getCorpusText(tid);
            String from = t.getPath();
            String to = String.valueOf(targetPath) + from.replaceAll("\\.\\./", "");
            try {
                FileManager.copyDirectory(new File(from), new File(to));
            }
            catch (IOException e) {
                System.out.println("FROM " + from);
                System.out.println("TO " + to);
                e.printStackTrace();
                System.exit(1);
            }
        }
        System.out.println("Done");
    }

    public static void splitCorpus(TEICorpus c, float proportion, String targetPath1, String targetPath2) {
        String to;
        String from;
        TEICorpusText t;
        Random r = new Random(1L);
        List originalTextIds = c.getCorpusTextIds();
        ArrayList cids1 = new ArrayList(originalTextIds);
        ArrayList cids2 = new ArrayList(originalTextIds);
        int size1 = (int)((float)originalTextIds.size() * proportion);
        int size2 = originalTextIds.size() - size1;
        while (cids1.size() > size1) {
            cids1.remove(r.nextInt(cids1.size()));
        }
        cids2.removeAll(cids1);
        System.out.println("All texts  : " + originalTextIds.size() + "\t in original corpus");
        System.out.println("First part : " + size1 + " texts \t being copied into: " + targetPath1);
        System.out.println("Second part: " + size2 + " texts \t being copied into: " + targetPath2);
        for (String tid : cids1) {
            t = c.getCorpusText(tid);
            from = t.getPath();
            to = String.valueOf(targetPath1) + from.replaceAll("\\.\\./", "");
            try {
                FileManager.copyDirectory(new File(from), new File(to));
            }
            catch (IOException e) {
                System.out.println("FROM " + from);
                System.out.println("TO " + to);
                e.printStackTrace();
                System.exit(1);
            }
        }
        for (String tid : cids2) {
            t = c.getCorpusText(tid);
            from = t.getPath();
            to = String.valueOf(targetPath2) + from.replaceAll("\\.\\./", "");
            try {
                FileManager.copyDirectory(new File(from), new File(to));
            }
            catch (IOException e) {
                System.out.println("FROM " + from);
                System.out.println("TO " + to);
                e.printStackTrace();
                System.exit(1);
            }
        }
        System.out.println("Done");
    }

    public static AnnotationStats getSenseStatisticsForCorpus(Corpus corpus, TEISenseInventory dict) {
        AnnotationStats result = new AnnotationStats(dict);
        for (String corpusTextId : corpus.getCorpusTextIds()) {
            TEICorpusText corpusText = (TEICorpusText)corpus.getCorpusText(corpusTextId);
            try {
                try {
                    TEISenseSegmentGroup currentSsg = (TEISenseSegmentGroup)corpusText.getFirstSegmentGroup(SenseSegmentGroup.class);
                    while (currentSsg != null) {
                        String senseId = currentSsg.getSenseId().split("#")[1];
                        String lexeme = senseId.split("\\.")[0];
                        if (!lexeme.equalsIgnoreCase("null")) {
                            Counter c = (Counter)result.get(lexeme);
                            if (c == null) {
                                c = new Counter();
                                result.put(lexeme, c);
                            }
                            c.increase(senseId);
                        }
                        currentSsg = currentSsg.getNext(ContinueMode.ALWAYS_CONTINUE);
                    }
                }
                catch (Exception e) {
                    e.printStackTrace();
                    System.out.println(String.valueOf(corpusText.getPath()) + " Skipping text because of: " + e.getMessage().replaceAll("\\n", " "));
                    corpusText.closeCorpusText();
                    continue;
                }
            }
            catch (Throwable throwable) {
                corpusText.closeCorpusText();
                throw throwable;
            }
            corpusText.closeCorpusText();
        }
        for (String senseId : dict.getSenseEntries().keySet()) {
            String orth = ((TEISenseEntry)dict.getSenseEntries().get(senseId)).getOrthForm();
            String pos = ((TEISenseEntry)dict.getSenseEntries().get(senseId)).getPOS();
            result.senseIdToOrth.put(senseId, orth);
            result.senseIdToPos.put(senseId, pos);
        }
        return result;
    }

    public static void printCorpusStats(TEICorpus corpus) {
        int textCount = 0;
        int segmentCount = 0;
        for (String corpusTextId : corpus.getCorpusTextIds()) {
            TEICorpusText corpusText = null;
            try {
                try {
                    corpusText = corpus.getCorpusText(corpusTextId);
                    ++textCount;
                    TEISegment seg = corpusText.getFirstSegment();
                    while (seg != null) {
                        ++segmentCount;
                        seg = seg.getNext();
                    }
                }
                catch (Exception e) {
                    e.printStackTrace();
                    System.exit(1);
                    corpusText.closeCorpusText();
                    continue;
                }
            }
            catch (Throwable throwable) {
                corpusText.closeCorpusText();
                throw throwable;
            }
            corpusText.closeCorpusText();
        }
        System.out.println("Texts: " + textCount);
        System.out.println("Segments: " + segmentCount);
    }
}

