/*
 * TreeXMLParser.java
 *
 * Autor: Piotr Achinger <piotr.achinger at gmail.com>
 */
package dendrarium.trees.xml;

import dendrarium.trees.*;
import dendrarium.utils.XMLUtils;
import java.io.ByteArrayInputStream;
import java.io.InputStream;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import org.w3c.dom.Text;

/**
 * Parser plikow w formacie forest.xml.
 * 
 * Wynikiem parsowania jest obiekt Forest i wszystkie podlegajace mu
 * obiekty Node i NodeChildren etc.
 *
 * Zakladamy, ze wejsciowy plik jest w kodowaniu UTF-8.
 *  
 * @author Piotr Achinger <piotr.achinger at gmail.com>
 */
public class TreeXMLParser {

    /**
     * Parsowanie pliku XML
     *
     * @param file plik xml
     * @return struktura obiektowa lasu
     * @throws java.lang.Exception
     */
    public Forest parse(InputStream is) throws Exception {
        Document doc = XMLUtils.parseXML(is);

        return parseForestElement(doc.getDocumentElement());
    }

    /**
     * Parsowanie stringa zawierajacego XML.
     */
    public Forest parse(String s) throws Exception {
        return this.parse(new ByteArrayInputStream(s.getBytes("UTF-8")));
    }

    private int getIntAttr(Element element, String attr) {
        return new Double(element.hasAttribute(attr) ? element.getAttribute(attr) : "0").intValue();
    }

    /**
     * Dodane przeze mnie (janek37), bo czasem liczba drzew nie mieści się w intach
     */
    private long getLongAttr(Element element, String attr) {
        return new Double(element.hasAttribute(attr) ? element.getAttribute(attr) : "0").longValue();
    }

    private Forest parseForestElement(Element forestElement) {

        String sent_id;
        int grammar_no;
        String text;
        long trees;
        long nodes;
        long inferences;
        double cputime;
        String startnodeLabel = "";
        long startnodeFrom = 0;
        long startnodeTo = 0;

        sent_id = forestElement.getAttribute("sent_id");
        if (forestElement.hasAttribute("grammar_no")) {
            grammar_no = Integer.parseInt(forestElement.getAttribute("grammar_no"));
        } else {
            grammar_no = 0;
        }
        text = XMLUtils.getElementText((Element) forestElement.getElementsByTagName("text").item(0));

        Element stats = (Element) forestElement.getElementsByTagName("stats").item(0);
        trees = getLongAttr(stats, "trees");
        nodes = getIntAttr(stats, "nodes");
        inferences = getIntAttr(stats, "inferences");
        cputime = stats.hasAttribute("cputime") ? Double.parseDouble(stats.getAttribute("cputime")) : 0.0;

        Element startnode = (Element) forestElement.getElementsByTagName("startnode").item(0);
        if (startnode != null) {
            startnodeFrom = getIntAttr(startnode, "from");
            startnodeTo = getIntAttr(startnode, "to");
            startnodeLabel = XMLUtils.getElementText(startnode);
        }

        Forest forest = new Forest(sent_id, text, trees, nodes, inferences, cputime, grammar_no,
                startnodeLabel, startnodeFrom, startnodeTo);

        /* AnswerData */
        NodeList a = forestElement.getElementsByTagName("answer");
        if (a.getLength() != 0) {
            AnswerData answerData = parseAnswerElement((Element) a.item(0));
            forest.setAnswerData(answerData);
        }

        NodeList nl = forestElement.getElementsByTagName("node");

        for (int i = 0 ; i < nl.getLength() ; ++i) {
            Element nodeElement = (Element) nl.item(i);

            Node node = parseNodeElement1(nodeElement);
            forest.addNode(node);
        }

        for (int i = 0 ; i < nl.getLength() ; ++i) {
            Element nodeElement = (Element) nl.item(i);

            parseNodeElement2(forest, nodeElement);
        }

        if (forest.getNodeMap().isEmpty()) {
            /* uwaga - zdanie bez rozbiorow! -- sztuczny korzen */
            NonterminalNode fake = new NonterminalNode(0, 0, 1, 1, "category");
            fake.addAttribute("tag", "x");

            TerminalNode fakeTerminal = new TerminalNode(1, 0, 1, 1, "orth", "base");
            fakeTerminal.addAttribute("tag", "x");

            /* uwaga -- nie laczymy terminala z nieterminalem, zeby Disambiguator dobrze dzialal
             * jak las jest pusty
             */
//            NodeChildren nc = new NodeChildren("rule", 0);
//
//            nc.addChild(fakeTerminal, true);
//
//            fake.addChildren(nc);
//            fake.setChosenChildren(nc);

            forest.addNode(fake);
            forest.addNode(fakeTerminal);

            forest.setEmpty(true);
        }

        forest.setRoot((NonterminalNode) forest.getNodeMap().get(0)); /* zakladamy, ze korzen ma id = 0 */
        forest.postConstruct();

        return forest;
    }

    private AnswerData parseAnswerElement(Element answerElement) {
        String typeName = answerElement.getAttribute("type");
        AnswerType type = AnswerType.valueOf(typeName);

        String username = answerElement.getAttribute("username");

        String comment = XMLUtils.getElementText((Element) answerElement.getElementsByTagName("comment").item(0));

        AnswerData answerData = new AnswerData(username, type, comment);

        return answerData;
    }

    private Node parseNodeElement1(Element nodeElement) {
        int id;
        int from;
        int to;
        long subtrees;

        id = new Integer(nodeElement.getAttribute("nid"));
        from = new Integer(nodeElement.getAttribute("from"));
        to = new Integer(nodeElement.getAttribute("to"));
        subtrees = new Long(nodeElement.getAttribute("subtrees"));

        Node node;

        if (nodeElement.getElementsByTagName("terminal").getLength() != 0) {
            Element terminalElement = (Element) nodeElement.getElementsByTagName("terminal").item(0);

            String interp_id = terminalElement.hasAttribute("interp_id") ? terminalElement.getAttribute("interp_id") : "";
            String token_id = terminalElement.hasAttribute("token_id") ? terminalElement.getAttribute("token_id") : "";
            boolean disamb = (!terminalElement.hasAttribute("disamb")) || terminalElement.getAttribute("disamb").equalsIgnoreCase("true");
            boolean nps = terminalElement.hasAttribute("nps") && terminalElement.getAttribute("nps").equalsIgnoreCase("true");

            String orth = XMLUtils.getElementText((Element) terminalElement.getElementsByTagName("orth").item(0));
            String base = XMLUtils.getElementText((Element) terminalElement.getElementsByTagName("base").item(0));

            TerminalNode terminal = new TerminalNode(id, from, to, subtrees, orth, base);

            parseAttributes(terminalElement.getElementsByTagName("f"), terminal);

            terminal.setDisamb(disamb);
            terminal.setNps(nps);
            terminal.setInterp_id(interp_id);
            terminal.setToken_id(token_id);

            node = terminal;
        } else {
            Element nonterminalElement = (Element) nodeElement.getElementsByTagName("nonterminal").item(0);

            String category = XMLUtils.getElementText((Element) nonterminalElement.getElementsByTagName("category").item(0));

            node = new NonterminalNode(id, from, to, subtrees, category);

            parseAttributes(nonterminalElement.getElementsByTagName("f"), node);
        }

        return node;
    }

    private void parseAttributes(NodeList nl, Node node) {
        for (int i = 0 ; i < nl.getLength() ; ++i) {
            Element fElement = (Element) nl.item(i);

            String key = fElement.getAttribute("type");
            String value = XMLUtils.getElementText(fElement);

            node.addAttribute(key, value);
        }
    }

    private void parseNodeElement2(Forest forest, Element nodeElement) {
        int id;

        id = new Integer(nodeElement.getAttribute("nid"));
        Node node = forest.nodeById(id);

        NodeList nl = nodeElement.getElementsByTagName("children");

        for (int i = 0 ; i < nl.getLength() ; ++i) {
            Element childrenElement = (Element) nl.item(i);

            NodeChildren children = parseChildrenElement(forest, childrenElement, (NonterminalNode) node, i);
            ((NonterminalNode) node).addChildren(children);
        }
    }

    private NodeChildren parseChildrenElement(Forest forest,
            Element childrenElement, NonterminalNode parent, int index) {
        String rule;
        Boolean chosen;

        rule = childrenElement.getAttribute("rule");

        chosen = childrenElement.hasAttribute("chosen") && childrenElement.getAttribute("chosen").equalsIgnoreCase("true");

        NodeChildren nc = new NodeChildren(rule, index);

        if (chosen) {
            parent.setChosenChildren(nc);
        }

        NodeList nl = childrenElement.getElementsByTagName("child");

        for (int i = 0 ; i < nl.getLength() ; ++i) {
            Element childElement = (Element) nl.item(i);

            int id = new Integer(childElement.getAttribute("nid"));
            Node node = forest.nodeById(id);

            boolean head = childElement.hasAttribute("head") && childElement.getAttribute("head").equalsIgnoreCase("true");

            nc.addChild(node, head);
        }

        return nc;
    }
}
