/*
 * Copyright (C) 2009 by Instytut Podstaw Informatyki Polskiej
 * Akademii Nauk (IPI PAN; Institute of Computer Science, Polish
 * Academy of Sciences; cf. www.ipipan.waw.pl).  All rights reserved.
 *
 * This file is part of WSDDE.
 *
 * WSDDE is free software: it may be distributed and/or modified under
 * the terms of the GNU General Public License version 3 as published
 * by the Free Software Foundation and appearing in the file doc/gpl.txt
 * included in the packaging of this file.
 *
 * A commercial license is available from IPI PAN (contact
 * Michal.Ciesiolka@ipipan.waw.pl or ipi@ipipan.waw.pl for more
 * information).  Licensees holding a valid commercial license from IPI
 * PAN may use this file in accordance with that license.
 *
 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING
 * THE WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE.
 */

package wsdde;

import java.lang.reflect.Method;
import java.nio.charset.Charset;


import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.StringTokenizer;
import java.util.Vector;

import wsdde.corpus.Pseudowords;
import wsdde.corpus.WSDCorpus;
import wsdde.corpus.knowledge.KnowledgeSource;
import wsdde.corpus.knowledge.SimpleTokenizerManager;
import wsdde.corpus.knowledge.SimpleTokenizerParser;
import wsdde.corpus.knowledge.TAKIPIManager;
import wsdde.general.*;

import org.apache.commons.*;
import org.apache.commons.cli.*;;

/**
 * Main Class, the entry point of the command line
 * 
 * @author "Rafal Mlodzki"
 *
 */
public class WSDDE {
		
	public static LinkedHashMap<String,Options> hm = new LinkedHashMap<String, Options>(); 
	public static Options opt;

	
//TESTING ONLY	
	
	public static String [] coe() {
		String args[] = new String [] {"-coe","-i","desc.xml","-db"};
		return args;
	}
	
	public static String [] gmd() {
		String args[] = new String [] {"-gmd","-i","meta2.xml","-o","desc2.xml"};
		return args;
	}
	
	public static String [] xsd() {
		String args[] = new String [] {"-xsd","-o",".xsd"};
		return args;
	}
	
//TESTING ONLY END	
	
	public static void main(String args[]) {

	//	 args = gmd();
		  //args = xsd();
		//args = coe();
		  opt = new Options();
		  Option o;

//MAIN COMMANDS		  
		  
		  OptionGroup bins = new OptionGroup();
		  bins.addOption(new Option("pws","Use the 'pws' option to generate a pseudowords corpus"));
		  bins.addOption(new Option("enr","Use the 'enr' option to enrich a wsd corpus using knowledge source(s) (in the current version only with segmentation, base forms and POSs using TAKIPI or a simple tokenizer)"));
		  bins.addOption(new Option("spl","Use the 'spl' option to random split an enriched corpus into smaller corpora"));
		  bins.addOption(new Option("xsd","Use the 'xsd' option to show XML definitions"));
		  bins.addOption(new Option("gmd","Use the 'gmd' option to generate an experiment's description from the meta-description"));
		  bins.addOption(new Option("coe","Use the 'coe' option to carry out an experiment"));
		  bins.addOption(new Option("clu","Use the 'clu' option to load built WSD method from the file and use it"));		   
		  bins.setRequired(true);
		  opt.addOptionGroup(bins);

		  
//PSEUDOWORDS
		Options os;
		
		os= new Options();
		o = OptionBuilder.withArgName("word1=count1 [wordn=countn]").hasArgs().withValueSeparator(' ').create("wc");
		o.setDescription("pair(s) of a word and its number of occurences");
		o.setRequired(true);
		os.addOption(o);
		
		o = new Option("o",true,"output file; if not set, default file name is used (word1[-wordn].xml) ");
		o.setRequired(false);
		o.setArgName("outputfile");
		os.addOption(o);
		
		hm.put("pws", os);
		
		  
//ENRICH		  
		  
		os = new Options();
		o = new Option("i",true,"input file");
		o.setRequired(true);
		o.setArgName("inputfile");
		os.addOption(o);
		
		o = new Option("o",true,"output file; if not set, default file name is used (inputfile_enr.xml)");
		o.setRequired(false);
		o.setArgName("outputfile");
		os.addOption(o);
		  
		OptionGroup enrichers = new OptionGroup();
		enrichers.addOption(new Option("takipi","enrich wsd corpus with segmentation, base forms and POSs, needs TAKIPI"));
		enrichers.addOption(new Option("simple","enrich wsd corpus with the simple segmentation and stemming"));
		enrichers.setRequired(true);
		os.addOptionGroup(enrichers);
  
		hm.put("enr", os);
		  
//SPLIT
		os= new Options();
		o = OptionBuilder.withArgName("size1 [sizen]").hasArgs().withValueSeparator(' ').create("ss");
		o.setDescription("list of the sizes (number of examples) of corporas; the sum must be smaller than the number of contexts in the corpus; only corpora in 'enriched' (noXML) could be split");
		o.setRequired(true);
		os.addOption(o);
		
		o = new Option("i",true,"corpus to split");
		o.setRequired(true);
		o.setArgName("inputfile");
		os.addOption(o);
		
		o = new Option("p",true,"common prefix for splitted corpora filenames; if not set, inputfile is used");
		o.setRequired(false);
		o.setArgName("prefix");
		os.addOption(o);
		
		hm.put("spl", os);
		
		
//XSD
		os = new Options();
		o = new Option("o",true,"output file postfix, it will be prefixed with 'desc_' and '_meta' for schema for an experiment's description and meta description respectively; if not set, schemas are printed on the screen.");
		o.setRequired(false);
		o.setArgName("outputfile");
		os.addOption(o);
		hm.put("xsd", os);
		
//GENERATE DESCRIPTION FROM METADESCRIPTION
		os = new Options();
		o = new Option("i",true,"input file with a meta description of the experiment");
		o.setRequired(true);
		o.setArgName("inputfile");
		os.addOption(o);
		
		o = new Option("o",true,"output file where description of the experiment should be saved, if not set, default name is used (inputfile_desc.xml)");
		o.setRequired(false);
		o.setArgName("outputfile");
		os.addOption(o);
		
		hm.put("gmd", os);
		
//CARRY OUT AN EXPERIMENT
		os = new Options();
		o = new Option("i",true,"input file is an xml description of the experiment");
		o.setRequired(true);
		o.setArgName("inputfile");
		os.addOption(o);
		
		o = new Option("o",true,"output file for xml results");
		o.setRequired(false);
		o.setArgName("outputfile");
		
		//enrichers = new OptionGroup();
		//enrichers.addOption(o);
		os.addOption(o);
		
		o = new Option("db",false,"save to the database");
		o.setRequired(false);//TODO EWENT. nazwa tabeli
		os.addOption(o);
		//enrichers.addOption(o);
		//enrichers.setRequired(true);
		//os.addOptionGroup(enrichers);
		
		
		o = new Option("sm",false,"save built wsd methods as well; if a method is evaluated with CV, then the saved model is the new one built on ALL examples");
		o.setRequired(false);
		os.addOption(o);
		
		hm.put("coe", os);
		

//LOAD AND USE CLASSIFIER
		os = new Options();
		o = new Option("m",true,"file with a WSD method to load");
		o.setRequired(true);
		o.setArgName("WSDmethod");
		os.addOption(o);
		
		o = new Option("c",true,"file with a corpus to be classified");
		o.setRequired(true);
		o.setArgName("corpus");
		os.addOption(o);
		
		o = new Option("o",true,"output file for the classified corpus; if empty, inputfile is overwritten");
		o.setRequired(false);
		o.setArgName("filename");
		os.addOption(o);
		
		hm.put("clu", os);
		
//NO MORE OPTIONS; CHECK THE MAIN COMMAND		
		  
		if (args.length<1) {
			  usages();
			  System.exit(0);
		  }
		  
		  String [] command = new String[1];
		  command[0] = args[0];
		  
		  
		  
//PARSE MAIN COMMAND		  
		  
		  BasicParser parser = new BasicParser();
		  CommandLine cl = null;
		  try {
				cl = parser.parse(opt, command);	
		  } catch (Exception e) {
			  System.out.println( "Parsing failed.  Reason: " + e.getMessage() + "\n");
			  usages();
			  System.exit(0);
		  }
		  
		  command[0] = cl.getOptions()[0].getOpt();
		  String [] comargs = new String[args.length-1];
		  for (int i = 1; i < args.length; i++) {
			  comargs[i-1]=args[i];
		  }
		  CommandLine clargs;
		  
		  
		  
		  
//INTERPRET SELECTED OPTIONS		  
		  
		  try {
			  clargs = parser.parse(hm.get(command[0]), comargs);
			  
			  //PWS
			  if (cl.hasOption("pws")) {
				Vector<String> ws = new Vector<String>();
				Vector<Integer> cs = new Vector<Integer>();

				for (String string : clargs.getOptionValues("wc")) {
					String word = string.substring(0,string.indexOf('='));
					int count = Integer.parseInt(string.substring(string.indexOf('=')+1));
					ws.add(word);
					cs.add(count);
				}
				int [] counts = new int[cs.size()];
				StringBuffer pseudoword = new StringBuffer();
				for (int i=0; i<cs.size(); i++) {
					counts[i] = cs.get(i);
					pseudoword.append(ws.get(i));
				}
				pseudoword.append(".xml");
				
				String filename = clargs.getOptionValue("o", pseudoword.toString());
				String pwcorp = Pseudowords.getPseudowordsCorpus((String[])ws.toArray(new String[0]), counts);
				Utils.saveInFile(pwcorp, filename);
				System.exit(0);
			  }
			  
			  //ENR

			  if (cl.hasOption("enr")) {
				  
				  String inputfile = clargs.getOptionValue("i");
				  String outputfile = inputfile+"_enr.xml";
				  
				  if (clargs.hasOption("o")) {
					  outputfile = clargs.getOptionValue("o");
				  }
				  
				  KnowledgeSource ks;
				  if (clargs.hasOption("takipi")) {
					  ks = new TAKIPIManager();
				  } else {//simple
					  ks = new SimpleTokenizerManager();
					  
				  }
				  ks.enrichAndSave(inputfile, outputfile);
				  System.exit(0);  
			  }
			  //SPL

			  if (cl.hasOption("spl")) {
				  
				  	//Vector<String> ws = new Vector<String>();
					Vector<Integer> cs = new Vector<Integer>();

					int [] split = new int[clargs.getOptionValues("ss").length];
					for (int i = 0; i < split.length; i++) {
						split[i]=Integer.parseInt(clargs.getOptionValues("ss")[i]);
					}
					String inputfile = clargs.getOptionValue("i");
					
					WSDCorpus toSplit = WSDCorpus.loadTXT(inputfile);
					Vector<WSDCorpus> ws = toSplit.exactlySplit(split);
					
					if (clargs.hasOption("p")) {
						inputfile = clargs.getOptionValue("p");
					}
					
					for (int i=0; i<ws.size(); i++) {
						WSDCorpus.saveTXT(ws.get(i), inputfile+"_"+i+".wsdc");
					}
					System.exit(0);
			  }

			  //XSD
			  if (cl.hasOption("xsd")) {
				  if (clargs.hasOption("o")) {
					  String outputfile = clargs.getOptionValue("o");
					  Utils.saveInFile(XML.getDescXSD(), "desc_"+outputfile);
					  Utils.saveInFile(XML.getMetaDescXSD(), "meta_"+outputfile);
					  Utils.saveInFile(XML.getMetaDBDescXSD(), "meta_db_"+outputfile);
				  } else {
					  System.out.println("EXPERIMENT'S DESCRIPTION\n\n"+XML.getDescXSD()+"\nMETA DESCRIPTION FROM PARAMETERS\n\n"+XML.getMetaDescXSD()+"\n\nMETA DESCRIPTION FROM DB\n"+XML.getMetaDBDescXSD());
				  }
				  System.exit(0);
			  }
			  
			  //GMD

			  if (cl.hasOption("gmd")) {
				  
				  String inputfile = clargs.getOptionValue("i");
				  String outputfile = inputfile+"_desc.xml";
				  
				  if (clargs.hasOption("o")) {
					  outputfile = clargs.getOptionValue("o");
				  }
				  
				  boolean wyj1 = false; //brzydko; zwiazane z tym, ze powinien byc jeden xsd a nie dwa osobne...
				  boolean wyj2 = false;
				  String error1="", error2="";
				  try {
					  if (XML.validateXSD(XML.getMetaDescXSD(), inputfile)) {
						  String outputXml = ExperimentDescription.getXMLFromMetaDescription(inputfile);
						  Utils.saveInFile(outputXml, outputfile);
					  }
				  } catch (Exception e) {
					  wyj1 = true;
					  error1=e.getMessage();
				  }
				  try {
					  if (XML.validateXSD(XML.getMetaDBDescXSD(), inputfile)) {
						  String outputXml = ExperimentDescription.getNewExperimentDBDescritpionFromXMLFile(inputfile);
						  Utils.saveInFile(outputXml, outputfile);
					  }
				  } catch (Exception e) {
					wyj2 = true;
					error2=e.getMessage();
				  }
				  if (wyj1 && wyj2) {
					  System.out.println(error1 + "\n\nOR\n\n"+ error2);
				  } 
				  System.exit(0);  
			  }


			//COE

			  if (cl.hasOption("coe")) {
				  
				  String inputfile = clargs.getOptionValue("i");
				  String outputfile = inputfile+"_res.xml";
				  
				  if (clargs.hasOption("o")) {
					  outputfile = clargs.getOptionValue("o");
				  }
				  try {
					  if (XML.validateXSD(XML.getDescXSD(), inputfile)) {
						  WSDExperiment wsde = new WSDExperiment(inputfile);
						  wsde.writeToDB(clargs.hasOption("db"));
						  wsde.saveModels = clargs.hasOption("sm");
						  
						  String outputXml = wsde.conduct();
						  Utils.saveInFile(outputXml, outputfile);
					  }
				  } catch (Exception e) {
					  System.out.println(e.getMessage());
				}
				   
				  System.exit(0);  
			  }

			  
			 //CLU 
			 if (cl.hasOption("clu")) {
			  
			  String inputfile = clargs.getOptionValue("c");
			  String outputfile = inputfile;
			  
			  if (clargs.hasOption("o")) {
				  outputfile = clargs.getOptionValue("o");
			  }
			  
			  String model = clargs.getOptionValue("m");
			  WSDModel wsdm = WSDModel.loadModel(model);
			  WSDCorpus corp = WSDCorpus.loadTXT(inputfile);
			  corp.corpusName = outputfile; //name for save
			  wsdm.classify(corp, new EvaluationResult(), true);
			  
			   
			  System.exit(0);  
		  }
		  
		  usages();		  
		  
		  } catch (ParseException e) {
			  System.out.println( "Parsing failed.  Reason: " + e.getMessage() + "\n");
			  usages();
		}
	
		  System.exit(0);
		  		  
	  }

	public static void usages() {
		 HelpFormatter formatter = new HelpFormatter();
		 for (String key : hm.keySet()) {
			 System.out.println("\n"+opt.getOption(key).getDescription()+"\n"); 
			 formatter.printHelp( "java -jar wsdde.jar -"+key,hm.get(key),true);
			  System.out.println();	
		}
		
	}
	
	
		
}
