/*
 * 
 *  Copyright (C) 2011 Mateusz Kopec
 *
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program.  If not, see http://www.gnu.org/licenses/.
 *
 */
package resources;

import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Serializable;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import utils.FileManager;

/**
 * Synonym dictionary for semantic similarity of plwordnet
 * 
 * @author Mateusz Kopec
 * 
 */
public class SemSimPlwordnet extends SynonymDictionary implements Serializable {

	private static final long serialVersionUID = 2145615861956784458L;

	private String cacheFilename = "data" + File.separator + "cache" + File.separator + "semsim_plwordnet";

	private Map<String, SynonymEntry> synonyms;

	/**
	 * Constructor
	 */
	public SemSimPlwordnet() {
		synonyms = new HashMap<String, SynonymEntry>();
		loadCache();
	}

	/**
	 * Saves cache in a file
	 */
	public void saveCache() {
		FileManager.saveObject(synonyms, cacheFilename);
	}

	/**
	 * Loads cache from a file
	 */
	@SuppressWarnings("unchecked")
	public void loadCache() {
		Map<String, SynonymEntry> cache = (HashMap<String, SynonymEntry>) FileManager.loadObject(cacheFilename);
		if (cache != null)
			synonyms = cache;
	}

	static int c = 0;

	/* (non-Javadoc)
	 * @see resources.SynonymDictionary#getSynonymsForLemma(java.lang.String, boolean)
	 */
	@Override
	public Collection<String> getSynonymsForLemma(String lemma, boolean onlyMonosemous) {
		SynonymEntry se = synonyms.get(lemma);
		if (se == null) { // not searched yet

			System.out.println(c++ + " SemSim : Searching for synonyms of " + lemma);
			if (c % 1000 == 0) {
				System.out.println("Saving cache");
				this.saveCache();
			}

			se = new SynonymEntry();
			synonyms.put(lemma, se);

			List<String> syns = new ArrayList<String>();

			String nextLine;
			URL url = null;
			URLConnection urlConn = null;
			InputStreamReader inStream = null;
			BufferedReader buff = null;

			try {
				url = new URL("http://plwordnet.pwr.wroc.pl/wordnet/msr/" + lemma);

				urlConn = url.openConnection();
				inStream = new InputStreamReader(urlConn.getInputStream());
				buff = new BufferedReader(inStream);

				String td = "<td class=\"l\">";
				while (true) {
					nextLine = buff.readLine();
					if (nextLine == null)
						break;

					if (nextLine.contains("<table class=\"msr\">")) {
						while (true) {
							nextLine = buff.readLine();
							if (nextLine.contains("</table>"))
								break;
							if (nextLine.contains(td)) {
								String[] spl = nextLine.split("[><]");
								String word = spl[8];
								syns.add(word);
							}
						}
						break;
					}
				}

			} catch (MalformedURLException e) {
				e.printStackTrace();
				System.exit(1);
			} catch (IOException e) {
				e.printStackTrace();
				System.exit(1);
			}

			for (String syn : syns)
				se.synonyms.add(syn);
		}
		return se.synonyms;
	}
}
