package ipipan.poliqarp.stat;

import java.io.*;
import java.util.Comparator;
import java.util.TreeMap;

class PosAttribute {
   String name;
   boolean optional;

   PosAttribute(String s) {
      if (s.startsWith("[") && s.endsWith("]")) {
         optional = true;
         name = s.substring(1, s.length()-1);
      } else {
         optional = false;
         name = s;
      }
   }
}

/**
 * A representation of corpora tagset.  Provides information about
 * possible attributes for different parts of speech, aliases for the
 * attributes and possible attribute values.
 *
 * TODO: replace constructor referring to corpora configuration file
 * with another one getting the same information through connection to
 * poliqarp server.
 */
public class Tagset {
   public static final String undefinedValue = "[-]";

   TreeMap<String, PosAttribute[]> posAttributes;
   TreeMap<String, String[]> attrValues;
   TreeMap<String, String> aliases;
   TreeMap<String, Integer> valOrder;

   private static final int LOAD_IGNORE = 0;
   private static final int LOAD_POS    = 1;
   private static final int LOAD_NAMED  = 2;
   private static final int LOAD_ATTR   = 3;

   /**
    * Creates a Tagset from a corpora config file.  TODO: should be
    * replaced by a proper query to poliqarp server.
    */
   public Tagset(String filename) {
      posAttributes = new TreeMap<String, PosAttribute[]>();
      aliases       = new TreeMap<String, String>();
      valOrder      = new TreeMap<String, Integer>();
      loadCfgFile(filename + ".cfg");
      System.out.println("Tagset(): " + posAttributes.size() + " pos, " +
         aliases.size() + " aliases loaded.");
   }

   void loadCfgFile(String filename) {
      BufferedReader in;
      String s;
      int mode = LOAD_IGNORE;

      try {
         in = new BufferedReader(new InputStreamReader(new
                                 FileInputStream(filename), "utf-8"));
         for (s = in.readLine(); s != null; s = in.readLine()) {
            s = s.trim();
            if (s.startsWith("#"))
               continue;
            if (s.length() == 0)
               continue;

            if (s.startsWith("[")) {
               if (s.equals("[POS]")) {
                  mode = LOAD_POS;
                  continue;
               }
               if (s.equals("[NAMED-ENTITY]")) {
                  mode = LOAD_NAMED;
                  continue;
               }
               if (s.equals("[ATTR]")) {
                  mode = LOAD_ATTR;
                  continue;
               }
               mode = LOAD_IGNORE;
               continue;
            }

            switch (mode) {
               case LOAD_POS:
                  setPosAttributes(s);
                  continue;
               case LOAD_NAMED:
                  addAliases(s);
                  continue;
               case LOAD_ATTR:
                  setAttrValues(s);
                  continue;
            }
         }
         in.close();
      } catch (IOException e) {
         e.printStackTrace();
      }
   }

   String[] splitLine(String line) {
      return line.split("(\\s*=\\s*)|\\s+");
   }

   void setPosAttributes(String line) {
      String[] l = splitLine(line);
      PosAttribute[] pa = new PosAttribute[l.length-1];
      for (int i = 1; i < l.length; i++) {
         pa[i-1] = new PosAttribute(l[i]);
         aliases.put(l[i], l[i]);
      }
      posAttributes.put(l[0], pa);
   }

   void addAliases(String line) {
      String[] l = splitLine(line);
      for (int i = 1; i < l.length; i++)
         aliases.put(l[i], l[0]);
   }

   void setAttrValues(String line) {
      String[] l = splitLine(line);
      for (int i = 1; i < l.length; i++)
         valOrder.put(l[i], i);
   }

   /**
    * Finds a real (full) name for a specified alias or named entity.
    *
    * @return full name or null if alias not found
    */
   public String getBaseName(String alias) {
      return aliases.get(alias);
   }

   public Comparator<String> getAttrValueComparator() {
      return new AttrValueComparator(valOrder);
   }

   /**
    * Parses tag to find value of a specified attribute.
    *
    * @param ctag a list of tags separated by colons.  The first tag
    * has to be part of speech.
    * @param attr the full name of the attribute we are looking for
    * @return value of the attribute <code>attr</code>
    */
   public String getAttrValue(String ctag, String attr) {
      int i;
      String[] tag = ctag.split(":");
      if (attr.equals(aliases.get("pos")))
         return tag[0];
      PosAttribute[] pa = posAttributes.get(tag[0]);
      if (pa == null)
         System.err.println("Tagset.getAttrValue(): no attributes for <"
            + tag[0] + ">");
      if (pa.length != tag.length-1)
         System.err.println("Tagset.getAttrValue(): no of attributes does not match: "
            + pa.length + " vs " + (tag.length-1));
      for (i = 0; i < pa.length; i ++)
         if (pa[i].name.equals(attr))
            break;
      if (i == pa.length)
         return undefinedValue;
      else
         return tag[i+1];
   }
}

