package pl.waw.ipipan.zil.core.scoreference.utils;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Random;
import java.util.TreeSet;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.apache.commons.io.FileUtils;
import org.apache.log4j.Logger;
import org.w3c.dom.Document;
import org.xml.sax.SAXException;
import pl.waw.ipipan.zil.nkjp.teiapi.api.io.IOUtils;

/* loaded from: input_file:main/scoreference-1.0-SNAPSHOT.jar:pl/waw/ipipan/zil/core/scoreference/utils/Splitter.class */
public class Splitter {
    private static final Logger logger = Logger.getLogger(Splitter.class);

    public static void main(String[] strArr) {
        if (strArr.length != 3) {
            logger.error("Wrong number of arguments! Try: " + Splitter.class.getSimpleName() + " corpusDir targetDir percent");
            return;
        }
        File file = new File(strArr[0]);
        File file2 = new File(strArr[1]);
        Integer valueOf = Integer.valueOf(Integer.parseInt(strArr[2]));
        HashMap hashMap = new HashMap();
        for (File file3 : IOUtils.getNKJPDirs(file)) {
            try {
                String textType = getTextType(file3);
                if (!hashMap.containsKey(textType)) {
                    hashMap.put(textType, new TreeSet());
                }
                ((TreeSet) hashMap.get(textType)).add(file3);
            } catch (IOException | ParserConfigurationException | SAXException e) {
                logger.error("Error processing text:" + file3 + ":" + e);
            }
        }
        File file4 = new File(file2, "train");
        File file5 = new File(file2, "test");
        file4.mkdir();
        file5.mkdir();
        Random random = new Random(1L);
        for (String str : hashMap.keySet()) {
            TreeSet treeSet = (TreeSet) hashMap.get(str);
            int size = treeSet.size();
            int max = Math.max(1, (size * valueOf.intValue()) / 100);
            int i = size - max;
            ArrayList arrayList = new ArrayList(treeSet);
            Collections.shuffle(arrayList, random);
            List subList = arrayList.subList(0, i);
            List subList2 = arrayList.subList(i, arrayList.size());
            logger.info("Text type:" + str);
            logger.info("\t" + size + " both");
            logger.info("\t" + i + " train");
            logger.info("\t" + max + " test");
            Iterator it = subList.iterator();
            while (it.hasNext()) {
                try {
                    FileUtils.copyDirectoryToDirectory((File) it.next(), file4);
                } catch (IOException e2) {
                    e2.printStackTrace();
                }
            }
            Iterator it2 = subList2.iterator();
            while (it2.hasNext()) {
                try {
                    FileUtils.copyDirectoryToDirectory((File) it2.next(), file5);
                } catch (IOException e3) {
                    e3.printStackTrace();
                }
            }
        }
    }

    private static String getTextType(File file) throws ParserConfigurationException, SAXException, IOException {
        return loadDocument(new File(file, "header.xml")).getElementsByTagName("catRef").item(0).getAttributes().getNamedItem("target").getTextContent();
    }

    public static Document loadDocument(File file) throws ParserConfigurationException, SAXException, IOException {
        DocumentBuilderFactory newInstance = DocumentBuilderFactory.newInstance();
        newInstance.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
        Document parse = newInstance.newDocumentBuilder().parse(file);
        parse.normalize();
        return parse;
    }
}
