/*
 * TreeXMLExporter.java
 *
 * Autor: Piotr Achinger <piotr.achinger at gmail.com>
 */
package dendrarium.trees.xml;

import dendrarium.trees.*;
import com.sun.org.apache.xml.internal.serialize.OutputFormat;
import com.sun.org.apache.xml.internal.serialize.XMLSerializer;
import dendrarium.utils.Pair;
import java.io.IOException;
import java.io.StringWriter;
import java.util.List;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.AttributesImpl;

/**
 * Konwerter obiektu Forest do XML.
 * 
 * Konwersja parametryzowana jest formatem eksportu (ForestExportFormat).
 *
 * @author Piotr Achinger <piotr.achinger at gmail.com>
 */
public class TreeXMLExporter {

    public static final ForestExportFormat DEFAULT_FORMAT = ForestExportFormat.ONLY_CHOSEN;

    public String export(Forest forest) {
        return export(forest, DEFAULT_FORMAT);
    }

    public String export(Forest forest, ForestExportFormat format) {
        if (format != ForestExportFormat.ONLY_CHOSEN && format != ForestExportFormat.FULL && format != ForestExportFormat.NO_METADATA) {
            return "TreeXMLExporter ERROR: Format " + format + " is not supported.\n";
        }

        Boolean onlyChosen = format == ForestExportFormat.ONLY_CHOSEN;

        try {
            Set<Integer> chosenNodes = null;
// edit: ks
//            if (onlyChosen) {
            chosenNodes = forest.chosenNodes();
//            }

            StringWriter writer = new StringWriter();
            OutputFormat of = new OutputFormat("XML", "UTF-8", true);
            of.setIndent(1);
            of.setIndenting(true);
            XMLSerializer serializer = new XMLSerializer(writer, of);
            ContentHandler hd = serializer.asContentHandler();
            AttributesImpl atts = new AttributesImpl();

            hd.startDocument();

            atts.clear();
            atts.addAttribute("", "", "sent_id", "CDATA", forest.getSent_id());
            atts.addAttribute("", "", "grammar_no", "CDATA", "" + forest.getGrammar_no());
            hd.startElement("", "", "forest", atts);

            atts.clear();
            hd.startElement("", "", "text", atts);
            hd.characters(forest.getText().toCharArray(), 0, forest.getText().length());
            hd.endElement("", "", "text");

            if (!forest.isEmpty()) {
                atts.clear();
                atts.addAttribute("", "", "from", "CDATA", "" + forest.getStartnodeFrom());
                atts.addAttribute("", "", "to", "CDATA", "" + forest.getStartnodeTo());
                hd.startElement("", "", "startnode", atts);
                hd.characters(forest.getStartnodeLabel().toCharArray(), 0, forest.getStartnodeLabel().length());
                hd.endElement("", "", "startnode");
            }

            if (onlyChosen) {
                atts.clear();
                atts.addAttribute("", "", "trees", "CDATA", "1");
                atts.addAttribute("", "", "nodes", "CDATA", "" + chosenNodes.size());
                hd.startElement("", "", "stats", atts);
                hd.endElement("", "", "stats");
            } else {
                atts.clear();
                atts.addAttribute("", "", "trees", "CDATA", "" + forest.getTrees());
                atts.addAttribute("", "", "nodes", "CDATA", "" + forest.getNodes());
                atts.addAttribute("", "", "inferences", "CDATA", "" + forest.getInferences());
                atts.addAttribute("", "", "cputime", "CDATA", "" + forest.getCputime());
                hd.startElement("", "", "stats", atts);
                hd.endElement("", "", "stats");
            }

            /* answer data */
            if (format != ForestExportFormat.NO_METADATA) {
                atts.clear();
                hd.startElement("", "", "answer-data", atts);

                putAnswerData(hd, "base-answer", forest.getAnswerData());
                putAnswerData(hd, "extra-answer", forest.getExtraAnswerData1());
                putAnswerData(hd, "extra-answer", forest.getExtraAnswerData2());

                hd.endElement("", "", "answer-data");
            }

            for (int i = 0 ; i < forest.getNodeList().size() ; ++i) {
                if (forest.isEmpty()) {
                    break;
                }
                if (onlyChosen && !chosenNodes.contains(i)) {
                    continue;
                }

                Node node = forest.nodeById(i);

                atts.clear();
                atts.addAttribute("", "", "nid", "CDATA", "" + node.getId());
                atts.addAttribute("", "", "from", "CDATA", "" + node.getFrom());
                atts.addAttribute("", "", "to", "CDATA", "" + node.getTo());
                atts.addAttribute("", "", "subtrees", "CDATA", onlyChosen ? "1" : "" + node.getSubtrees());
                atts.addAttribute("", "", "chosen", "CDATA", chosenNodes.contains(i) ? "true" : "false");

                hd.startElement("", "", "node", atts);

                if (node instanceof TerminalNode) {
                    TerminalNode t = (TerminalNode) node;

                    atts.clear();
                    atts.addAttribute("", "", "token_id", "CDATA", "" + t.getToken_id());
                    atts.addAttribute("", "", "interp_id", "CDATA", "" + t.getInterp_id());
                    atts.addAttribute("", "", "disamb", "CDATA", t.isDisamb() ? "true" : "false");
                    atts.addAttribute("", "", "nps", "CDATA", t.isNps() ? "true" : "false");
                    hd.startElement("", "", "terminal", atts);

                    atts.clear();
                    hd.startElement("", "", "orth", atts);
                    hd.characters(t.getOrth().toCharArray(), 0, t.getOrth().length());
                    hd.endElement("", "", "orth");

                    atts.clear();
                    hd.startElement("", "", "base", atts);
                    hd.characters(t.getBase().toCharArray(), 0, t.getBase().length());
                    hd.endElement("", "", "base");

                    putAttributes(hd, t.getAttributes());

                    hd.endElement("", "", "terminal");

                } else {
                    NonterminalNode t = (NonterminalNode) node;

                    atts.clear();
                    hd.startElement("", "", "nonterminal", atts);

                    atts.clear();
                    hd.startElement("", "", "category", atts);
                    hd.characters(t.getCategory().toCharArray(), 0, t.getCategory().length());
                    hd.endElement("", "", "category");

                    putAttributes(hd, t.getAttributes());

                    hd.endElement("", "", "category");

                    for (NodeChildren children : t.getChildren()) {
                        if (onlyChosen && children != t.getChosenChildren()) {
                            continue;
                        }

                        atts.clear();
                        atts.addAttribute("", "", "rule", "CDATA", children.getRule());
                        if (children == t.getChosenChildren()) {
                            atts.addAttribute("", "", "chosen", "CDATA", "true");
                        }
                        hd.startElement("", "", "children", atts);

                        int index = 0;
                        for (Node child : children.getChildren()) {
                            atts.clear();
                            atts.addAttribute("", "", "nid", "CDATA", "" + child.getId());
                            atts.addAttribute("", "", "from", "CDATA", "" + child.getFrom());
                            atts.addAttribute("", "", "to", "CDATA", "" + child.getTo());
                            atts.addAttribute("", "", "head", "CDATA",
                                    children.isHead(index) ? "true" : "false");
                            hd.startElement("", "", "child", atts);
                            hd.endElement("", "", "child");
                            index++;
                        }

                        hd.endElement("", "", "children");
                    }
                }

                hd.endElement("", "", "node");
            }

            hd.endElement("", "", "forest");

            hd.endDocument();

            writer.flush();
            return writer.toString();
        } catch (IOException ex) {
            Logger.getLogger(TreeXMLExporter.class.getName()).
                    log(Level.SEVERE, null, ex);
            return "";
        } catch (SAXException ex) {
            Logger.getLogger(TreeXMLExporter.class.getName()).
                    log(Level.SEVERE, null, ex);
            return "";
        }
    }

    private void putAnswerData(ContentHandler hd, String elemName,
            AnswerData answerData) throws SAXException {
        if (answerData == null) {
            return;
        }
        AttributesImpl atts = new AttributesImpl();

        atts.clear();
        atts.addAttribute("", "", "type", "CDATA", "" + answerData.getType());
        atts.addAttribute("", "", "username", "CDATA", "" + answerData.getUsername());
        hd.startElement("", "", elemName, atts);

        atts.clear();
        hd.startElement("", "", "comment", atts);
        hd.characters(answerData.getComment().toCharArray(), 0, answerData.getComment().length());
        hd.endElement("", "", "comment");

        hd.endElement("", "", elemName);
    }

    private void putAttributes(ContentHandler hd,
            List<Pair<String, String>> attributes) throws SAXException {

        AttributesImpl atts = new AttributesImpl();

        for (Pair<String, String> attr : attributes) {
            String key = attr.getF1();
            String value = attr.getF2();

            atts.clear();
            atts.addAttribute("", "", "type", "CDATA", key);
            hd.startElement("", "", "f", atts);
            hd.characters(value.toCharArray(), 0, value.length());
            hd.endElement("", "", "f");
        }

    }
}
