/*
 * Decompiled with CFR 0.152.
 */
package wsdde.corpus;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.Random;
import java.util.Set;
import java.util.StringTokenizer;
import java.util.Vector;
import weka.core.Attribute;
import weka.core.FastVector;
import weka.core.Instance;
import weka.core.Instances;
import wsdde.Counter;
import wsdde.corpus.ContextInfo;
import wsdde.corpus.KWIC;
import wsdde.corpus.POSInfo;
import wsdde.general.Utils;
import wsdde.generator.FeatureGenerator;

public class WSDCorpus
extends Vector<KWIC> {
    public String corpusName;
    public FastVector senses;
    public Counter counter;
    public HashMap<String, FeatureGenerator> whichGenerator;
    public HashMap<String, Integer> featurePosition;
    public static HashMap<String, WSDCorpus> loadedCorpora = new HashMap();
    private static final long serialVersionUID = 4188059128143522859L;

    public WSDCorpus(String corpusName) {
        this.corpusName = corpusName;
        this.counter = new Counter();
        this.whichGenerator = new LinkedHashMap<String, FeatureGenerator>();
        this.featurePosition = new HashMap();
    }

    public static WSDCorpus getCorpus(String filename) {
        if (!loadedCorpora.containsKey(filename)) {
            loadedCorpora.put(filename, WSDCorpus.loadTXT(filename));
        }
        return loadedCorpora.get(filename);
    }

    @Override
    public void clear() {
        this.whichGenerator = new LinkedHashMap<String, FeatureGenerator>();
        this.featurePosition = new HashMap();
        this.counter = new Counter();
        for (KWIC kwic : this) {
            kwic.counter.clear();
            kwic.predictedSense = "";
        }
    }

    public void generateFeatures(Vector<FeatureGenerator> fgs) {
        for (FeatureGenerator fg : fgs) {
            fg.generate(this);
        }
    }

    public void computeMainCounter() {
        for (KWIC kwic : this) {
            this.counter.addCounter(kwic.counter, true);
        }
        Set keys = this.counter.keySet();
        int i = 0;
        for (String key : keys) {
            this.featurePosition.put(key, i++);
        }
    }

    public void setMainCounter(Counter c) {
        this.counter = c;
        this.featurePosition.clear();
        Set keys = this.counter.keySet();
        int i = 0;
        for (String key : keys) {
            this.featurePosition.put(key, i++);
        }
    }

    public void sensesFromKWICS() {
        LinkedHashSet<String> hs = new LinkedHashSet<String>();
        for (KWIC kwic : this) {
            hs.add(kwic.sense);
        }
        this.senses = new FastVector();
        for (String sense : hs) {
            this.senses.addElement(sense);
        }
    }

    public void setSenses(FastVector senses) {
        this.senses = senses;
    }

    public void setWhichGenerator(LinkedHashMap<String, FeatureGenerator> ffg) {
        this.whichGenerator = ffg;
    }

    @Override
    public String toString() {
        StringBuffer sb = new StringBuffer();
        sb.append("SENSES ");
        int i = 0;
        while (i < this.senses.size()) {
            sb.append(this.senses.elementAt(i).toString());
            sb.append(' ');
            ++i;
        }
        sb.append("\n\n");
        for (KWIC kwic : this) {
            sb.append(kwic.toString());
        }
        return sb.toString();
    }

    public Instances toWeka() {
        FastVector headers = new FastVector();
        for (String attribute : this.counter.keySet()) {
            FeatureGenerator fgtmp = this.whichGenerator.get(attribute);
            if (fgtmp == null) {
                System.out.println(attribute);
            }
            if (fgtmp.isBinary()) {
                FastVector attVals = new FastVector();
                attVals.addElement("0");
                attVals.addElement("1");
                Attribute a = new Attribute(attribute, attVals);
                headers.addElement(a);
                continue;
            }
            headers.addElement(new Attribute(attribute));
        }
        headers.addElement(new Attribute("ZNACZENIE", this.senses));
        Instances data = new Instances("nazwa", headers, 0);
        for (KWIC kwic : this) {
            double[] row = new double[this.counter.size() + 1];
            Set keys = kwic.counter.keySet();
            for (String key : keys) {
                if (this.featurePosition.get(key) == null) continue;
                int position = this.featurePosition.get(key);
                if (this.whichGenerator.get(key).isBinary()) {
                    row[position] = 1.0;
                    continue;
                }
                int inKWIC = (Integer)kwic.counter.get(key);
                int setSize = this.size();
                int hitRows = (Integer)this.counter.get(key);
                row[position] = (double)inKWIC * Math.log((double)setSize / ((double)hitRows + 1.0));
            }
            int senseNumber = this.senses.indexOf(kwic.sense);
            row[row.length - 1] = senseNumber < 0 ? Instance.missingValue() : (double)senseNumber;
            data.add(new Instance(1.0, row));
        }
        return data;
    }

    public static WSDCorpus merge(Collection<WSDCorpus> corpora) {
        return null;
    }

    public Vector<WSDCorpus> splitInto(int nparts) {
        int toSplit = this.size();
        int[] howToSplit = new int[nparts];
        int i = 0;
        while (i < toSplit) {
            int n = i % nparts;
            howToSplit[n] = howToSplit[n] + 1;
            ++i;
        }
        return this.exactlySplit(howToSplit);
    }

    public Vector<WSDCorpus> exactlySplit(int[] howToSplit) {
        this.sensesFromKWICS();
        Random r = new Random();
        WSDCorpus copy = new WSDCorpus(null);
        for (KWIC kwic : this) {
            copy.add(kwic);
        }
        Vector<WSDCorpus> vs = new Vector<WSDCorpus>();
        int[] nArray = howToSplit;
        int n = howToSplit.length;
        int n2 = 0;
        while (n2 < n) {
            int examples = nArray[n2];
            WSDCorpus v = new WSDCorpus(null);
            v.senses = (FastVector)this.senses.copy();
            int i = 0;
            while (i < examples) {
                v.add((KWIC)copy.remove(r.nextInt(copy.size())));
                ++i;
            }
            vs.add(v);
            ++n2;
        }
        return vs;
    }

    public void addCorpus(WSDCorpus wsdcorpus) {
        this.addAll(wsdcorpus);
    }

    public static void saveTXT(WSDCorpus corpus, String filename) {
        Utils.saveInFile(corpus.toString(), filename);
    }

    public static WSDCorpus loadTXT(String filename) {
        WSDCorpus w = new WSDCorpus(filename);
        try {
            BufferedReader br = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(filename), "UTF-8"));
            String strLine = br.readLine();
            StringTokenizer sensesString = new StringTokenizer(strLine, " ");
            sensesString.nextToken();
            w.senses = new FastVector();
            while (sensesString.hasMoreTokens()) {
                w.senses.addElement(sensesString.nextToken().trim());
            }
            KWIC k = null;
            Vector<String> typeOfInfo = new Vector<String>(Arrays.asList("words", "lemmas", "posinfos"));
            boolean status = false;
            while ((strLine = br.readLine()) != null) {
                String sense;
                StringTokenizer st = new StringTokenizer(strLine = strLine.trim(), " ");
                if (!st.hasMoreTokens()) continue;
                String begining = st.nextToken();
                if (begining.equalsIgnoreCase("context")) {
                    int id = Integer.parseInt(st.nextToken());
                    k = new KWIC(id);
                }
                if (begining.equalsIgnoreCase("sense") && !(sense = st.nextToken().trim()).equalsIgnoreCase("null")) {
                    k.sense = sense;
                }
                if (begining.equalsIgnoreCase("predicted_sense") && !(sense = st.nextToken().trim()).equalsIgnoreCase("null")) {
                    k.predictedSense = sense;
                }
                if (begining.equalsIgnoreCase("end_of_context")) {
                    w.add(k);
                }
                if (!begining.equalsIgnoreCase("tok")) continue;
                int num = Integer.parseInt(st.nextToken());
                Vector<ContextInfo> info = new Vector<ContextInfo>(Arrays.asList(new ContextInfo(st.nextToken().trim()), new ContextInfo(st.nextToken().trim()), new POSInfo(st.nextToken().trim())));
                k.wstaw(typeOfInfo, info, num == 0);
            }
            br.close();
        }
        catch (Exception e) {
            e.printStackTrace();
            System.exit(1);
        }
        return w;
    }

    public static void main(String[] args) {
        Vector<WSDCorpus> ws = WSDCorpus.loadTXT("powod.wsdc").splitInto(2);
        WSDCorpus.saveTXT(ws.get(0), "tmin0.wsdcorp");
        WSDCorpus.saveTXT(ws.get(1), "tmax1.wsdcorp");
    }
}

