Example usage for org.jdom2.xpath XPathFactory instance

List of usage examples for org.jdom2.xpath XPathFactory instance

Introduction

In this page you can find the example usage for org.jdom2.xpath XPathFactory instance.

Prototype

public static final XPathFactory instance() 

Source Link

Document

Obtain an instance of an XPathFactory using the default mechanisms to determine what XPathFactory implementation to use.

Usage

From source file:elh.eus.absa.CorpusReader.java

License:Open Source License

private void extractOpinionsAbsaSemEval2014(InputStream fileName) {
    SAXBuilder sax = new SAXBuilder();
    XPathFactory xFactory = XPathFactory.instance();
    try {/* w  w w .j  ava  2 s. c om*/
        Document doc = sax.build(fileName);
        XPathExpression<Element> expr = xFactory.compile("//sentence", Filters.element());
        List<Element> sentences = expr.evaluate(doc);
        Integer sId = 0; //sentence id
        Integer oId = 0; //opinion id         
        for (Element sent : sentences) {
            sId++;
            StringBuilder sb = new StringBuilder();
            String sentString = sent.getChildText("text");
            sb = sb.append(sentString);
            Element aspectTerms = sent.getChild("aspectTerms");
            if (aspectTerms != null) {
                List<Element> aspectTermList = aspectTerms.getChildren();
                for (Element aspectElem : aspectTermList) {
                    oId++;
                    String trgt = aspectElem.getAttributeValue("target");
                    Integer offsetFrom = Integer.parseInt(aspectElem.getAttributeValue("from"));
                    Integer offsetTo = Integer.parseInt(aspectElem.getAttributeValue("to"));
                    String polarity = aspectElem.getAttributeValue("polarity");
                    //String cat = aspectElem.getAttributeValue("category");

                    //create and add opinion to the structure
                    Opinion op = new Opinion("o" + oId, trgt, offsetFrom, offsetTo, polarity, null, "s" + sId);
                    this.addOpinion(op);
                }

                //System.out.println(sb.toString());
            }
        }
    } catch (JDOMException | IOException e) {
        e.printStackTrace();
    }
}

From source file:elh.eus.absa.CorpusReader.java

License:Open Source License

/**
 * Read semeval-absa 2015 shared task formatted corpus and extract opinions.
 * /* w w  w.ja  v  a  2 s. co m*/
 * @param InputStream fileName: corpus 
 * @param boolean nullSentOps: whether null opinions should be created for sentence with no opinion
 *                              (only used for semeval-absa 2015 formatted corpora)
 * 
 */
private void extractOpinionsAbsaSemEval2015(InputStream fileName, boolean nullSentenceOps) {
    SAXBuilder sax = new SAXBuilder();
    XPathFactory xFactory = XPathFactory.instance();
    try {
        Document doc = sax.build(fileName);
        XPathExpression<Element> expr = xFactory.compile("//sentence", Filters.element());
        List<Element> sentences = expr.evaluate(doc);
        String rId = "";
        String sId = ""; //sentence id
        Integer oId = 0; //opinion id
        for (Element sent : sentences) {
            sId = sent.getAttributeValue("id");
            rId = sId.replaceAll(":[0-9]+$", "");

            if (rId.equalsIgnoreCase(sId)) {
                rId = sId.replaceAll("#[0-9]+$", "");
            }

            //store the sentence and the corresponding review
            addRevSent(rId, sId);
            StringBuilder sb = new StringBuilder();
            String sentString = sent.getChildText("text");
            //add sentence to the reader object
            this.addSentence(sId, sentString);

            sb = sb.append(sentString);
            Element opinions = sent.getChild("Opinions");
            if (opinions != null) {
                List<Element> opinionList = opinions.getChildren();
                //there is no opinions
                if (opinionList.isEmpty()) {
                    //System.err.println("kkkkk");
                    //create sentence at list, even if it has no opinion elements
                    sId = sent.getAttributeValue("id");
                    addRevSent(rId, sId);
                    String sentStr = sent.getChildText("text");
                    //add sentence to the reader object
                    this.addSentence(sId, sentStr);
                    if (nullSentenceOps) {
                        oId++;
                        //create and add opinion to the structure
                        Opinion op = new Opinion("o" + oId, "NULL", 0, 0, "NULL", "NULL", sId);
                        this.addOpinion(op);
                    }
                }

                for (Element opElem : opinionList) {
                    oId++;
                    String trgt = opElem.getAttributeValue("target");
                    Integer offsetFrom = 0;
                    Integer offsetTo = 0;
                    try {
                        offsetFrom = Integer.parseInt(opElem.getAttributeValue("from"));
                        offsetTo = Integer.parseInt(opElem.getAttributeValue("to"));

                    } catch (NumberFormatException ne) {
                    }
                    String polarity = opElem.getAttributeValue("polarity");
                    String cat = opElem.getAttributeValue("category");

                    //create and add opinion to the structure
                    Opinion op = new Opinion("o" + oId, trgt, offsetFrom, offsetTo, polarity, cat, sId);
                    this.addOpinion(op);

                    //debugging
                    sb.append("\n\t> " + "o" + oId + " " + trgt + " " + offsetFrom + "-" + offsetTo + " "
                            + polarity + " " + cat);
                }
                //System.out.println(sb.toString());
            } else {
                //System.err.println("kkkkk");
                //create sentence at list, even if it has no opinion elements
                sId = sent.getAttributeValue("id");
                addRevSent(rId, sId);
                String sentStr = sent.getChildText("text");
                //add sentence to the reader object
                this.addSentence(sId, sentStr);
                if (nullSentenceOps) {
                    oId++;
                    //create and add opinion to the structure
                    Opinion op = new Opinion("o" + oId, "NULL", 0, 0, "NULL", "NULL", sId);
                    this.addOpinion(op);
                }
            }
        }
    } catch (JDOMException | IOException e) {
        e.printStackTrace();
    }
}

From source file:elh.eus.absa.CorpusReader.java

License:Open Source License

/**
 *    Extract sentence texts from tabulated format. The function assumes the text is PoS tagged in
 *  Conll tabulated format.//from w ww  .j  ava2 s .  co m
 * 
 * @param fileName string: input corpus file path
 */
private void extractOpinionsTabText(InputStream fileName) {
    SAXBuilder sax = new SAXBuilder();
    XPathFactory xFactory = XPathFactory.instance();
    try {
        Document doc = sax.build(fileName);
        XPathExpression<Element> expr = xFactory.compile("//sentence", Filters.element());
        List<Element> sentences = expr.evaluate(doc);
        String rId = "";
        String sId = ""; //sentence id
        Integer oId = 0; //opinion id
        for (Element sent : sentences) {
            sId = sent.getAttributeValue("id");
            rId = sId;
            oId++;

            /*store the sentence and the corresponding review
             * (in this case this info is redundant, because a whole review is represented by a sentence)  
             */
            addRevSent(rId, sId);
            //StringBuilder sb = new StringBuilder();
            String sentString = sent.getChildText("text");
            //add sentence to the reader object
            this.addSentence(sId, sentString);

            String polarity = sent.getAttributeValue("polarity");
            if (polarity == null) {
                System.err.println("no polarity annotation for review " + rId + "."
                        + " Review won't be taken into account");
                continue;
            }

            String trgt = "";
            String cat = "global";
            Integer offsetFrom = 0;
            Integer offsetTo = 0;

            //create and add opinion to the structure
            Opinion op = new Opinion("o" + oId, trgt, offsetFrom, offsetTo, polarity, cat, sId);
            this.addOpinion(op);

            //debugging
            //sb.append("\n\t> "+"o"+oId+" "+trgt+" "+offsetFrom+"-"+offsetTo+" "+polarity+" "+cat);
        }
        //System.out.println(sentString);         
    } catch (JDOMException | IOException e) {
        e.printStackTrace();
    }
}

From source file:es.ehu.si.ixa.pipe.convert.Convert.java

License:Apache License

public void absaSemEvalToNER(String fileName) {
    SAXBuilder sax = new SAXBuilder();
    XPathFactory xFactory = XPathFactory.instance();
    try {/* w  w  w  . ja  va2  s . c  om*/
        Document doc = sax.build(fileName);
        XPathExpression<Element> expr = xFactory.compile("//sentence", Filters.element());
        List<Element> sentences = expr.evaluate(doc);
        for (Element sent : sentences) {

            StringBuilder sb = new StringBuilder();
            String sentString = sent.getChildText("text");
            sb = sb.append(sentString);
            Element aspectTerms = sent.getChild("aspectTerms");
            if (aspectTerms != null) {
                List<List<Integer>> offsetList = new ArrayList<List<Integer>>();
                List<Integer> offsets = new ArrayList<Integer>();
                List<Element> aspectTermList = aspectTerms.getChildren();
                if (!aspectTermList.isEmpty()) {
                    for (Element aspectElem : aspectTermList) {
                        Integer offsetFrom = Integer.parseInt(aspectElem.getAttributeValue("from"));
                        Integer offsetTo = Integer.parseInt(aspectElem.getAttributeValue("to"));
                        offsets.add(offsetFrom);
                        offsets.add(offsetTo);
                    }
                }
                Collections.sort(offsets);
                for (int i = 0; i < offsets.size(); i++) {
                    List<Integer> offsetArray = new ArrayList<Integer>();
                    offsetArray.add(offsets.get(i++));
                    if (offsets.size() > i) {
                        offsetArray.add(offsets.get(i));
                    }
                    offsetList.add(offsetArray);
                }
                int counter = 0;
                for (List<Integer> offsetSent : offsetList) {
                    Integer offsetFrom = offsetSent.get(0);
                    Integer offsetTo = offsetSent.get(1);
                    String aspectString = sentString.substring(offsetFrom, offsetTo);
                    sb.replace(offsetFrom + counter, offsetTo + counter,
                            "<START:term> " + aspectString + " <END>");
                    counter += 19;
                }
            }
            System.out.println(sb.toString());
        }
    } catch (JDOMException | IOException e) {
        e.printStackTrace();
    }
}

From source file:es.ehu.si.ixa.pipe.convert.Convert.java

License:Apache License

public void absaSemEvalToNER2015(String fileName) {
    SAXBuilder sax = new SAXBuilder();
    XPathFactory xFactory = XPathFactory.instance();
    try {/*from   w  ww.j  av a2s. c  om*/
        Document doc = sax.build(fileName);
        XPathExpression<Element> expr = xFactory.compile("//sentence", Filters.element());
        List<Element> sentences = expr.evaluate(doc);
        for (Element sent : sentences) {

            String sentString = sent.getChildText("text");
            StringBuilder sb = new StringBuilder();
            sb = sb.append(sentString);
            Element opinionsElement = sent.getChild("Opinions");
            if (opinionsElement != null) {
                List<List<Integer>> offsetList = new ArrayList<List<Integer>>();
                List<Integer> offsets = new ArrayList<Integer>();
                List<Element> oteList = opinionsElement.getChildren();
                for (Element aspectElem : oteList) {
                    if (!aspectElem.getAttributeValue("target").equals("NULL")) {
                        Integer offsetFrom = Integer.parseInt(aspectElem.getAttributeValue("from"));
                        Integer offsetTo = Integer.parseInt(aspectElem.getAttributeValue("to"));
                        offsets.add(offsetFrom);
                        offsets.add(offsetTo);
                    }
                }
                List<Integer> offsetsWithoutDuplicates = new ArrayList<Integer>(new HashSet<Integer>(offsets));
                Collections.sort(offsetsWithoutDuplicates);

                for (int i = 0; i < offsetsWithoutDuplicates.size(); i++) {
                    List<Integer> offsetArray = new ArrayList<Integer>();
                    offsetArray.add(offsetsWithoutDuplicates.get(i++));
                    if (offsetsWithoutDuplicates.size() > i) {
                        offsetArray.add(offsetsWithoutDuplicates.get(i));
                    }
                    offsetList.add(offsetArray);
                }
                int counter = 0;
                for (List<Integer> offsetSent : offsetList) {
                    Integer offsetFrom = offsetSent.get(0);
                    Integer offsetTo = offsetSent.get(1);
                    String aspectString = sentString.substring(offsetFrom, offsetTo);
                    sb.replace(offsetFrom + counter, offsetTo + counter,
                            "<START:target> " + aspectString + " <END>");
                    counter += 21;
                }
                System.out.println(sb.toString());
            }
        }
    } catch (JDOMException | IOException e) {
        e.printStackTrace();
    }
}

From source file:es.ehu.si.ixa.pipe.convert.Convert.java

License:Apache License

public void absaSemEvalToMultiClassNER2015(String fileName) {
    SAXBuilder sax = new SAXBuilder();
    XPathFactory xFactory = XPathFactory.instance();
    try {//from  ww w.j  a  v a  2  s . c  om
        Document doc = sax.build(fileName);
        XPathExpression<Element> expr = xFactory.compile("//sentence", Filters.element());
        List<Element> sentences = expr.evaluate(doc);
        for (Element sent : sentences) {

            String sentString = sent.getChildText("text");
            StringBuilder sb = new StringBuilder();
            sb = sb.append(sentString);
            Element opinionsElement = sent.getChild("Opinions");
            if (opinionsElement != null) {
                List<List<Integer>> offsetList = new ArrayList<List<Integer>>();
                HashSet<String> targetClassSet = new LinkedHashSet<String>();
                List<Integer> offsets = new ArrayList<Integer>();
                List<Element> opinionList = opinionsElement.getChildren();
                for (Element opinion : opinionList) {
                    if (!opinion.getAttributeValue("target").equals("NULL")) {
                        String className = opinion.getAttributeValue("category");
                        String targetString = opinion.getAttributeValue("target");
                        Integer offsetFrom = Integer.parseInt(opinion.getAttributeValue("from"));
                        Integer offsetTo = Integer.parseInt(opinion.getAttributeValue("to"));
                        offsets.add(offsetFrom);
                        offsets.add(offsetTo);
                        targetClassSet.add(targetString + "JAR!" + className + opinion.getAttributeValue("from")
                                + opinion.getAttributeValue("to"));
                    }
                }
                List<Integer> offsetsWithoutDuplicates = new ArrayList<Integer>(new HashSet<Integer>(offsets));
                Collections.sort(offsetsWithoutDuplicates);
                List<String> targetClassList = new ArrayList<String>(targetClassSet);

                for (int i = 0; i < offsetsWithoutDuplicates.size(); i++) {
                    List<Integer> offsetArray = new ArrayList<Integer>();
                    offsetArray.add(offsetsWithoutDuplicates.get(i++));
                    if (offsetsWithoutDuplicates.size() > i) {
                        offsetArray.add(offsetsWithoutDuplicates.get(i));
                    }
                    offsetList.add(offsetArray);
                }
                int counter = 0;
                for (int i = 0; i < offsetList.size(); i++) {
                    Integer offsetFrom = offsetList.get(i).get(0);
                    Integer offsetTo = offsetList.get(i).get(1);
                    String className = targetClassList.get(i);
                    String aspectString = sentString.substring(offsetFrom, offsetTo);
                    sb.replace(offsetFrom + counter, offsetTo + counter, "<START:"
                            + className.split("JAR!")[1].substring(0, 3) + "> " + aspectString + " <END>");
                    counter += 18;
                }
                System.out.println(sb.toString());
            }
        }
    } catch (JDOMException | IOException e) {
        e.printStackTrace();
    }
}

From source file:es.ehu.si.ixa.pipe.convert.Convert.java

License:Apache License

public void absaSemEvalText(Reader reader) {
    SAXBuilder sax = new SAXBuilder();
    XPathFactory xFactory = XPathFactory.instance();
    try {/* w  w w.  ja  v a  2 s. co m*/
        Document doc = sax.build(reader);
        XPathExpression<Element> expr = xFactory.compile("//sentence", Filters.element());
        List<Element> sentences = expr.evaluate(doc);
        for (Element sent : sentences) {
            String sentString = sent.getChildText("text");
            System.out.println(sentString);
        }
    } catch (JDOMException | IOException e) {
        e.printStackTrace();
    }
}

From source file:es.ehu.si.ixa.pipe.convert.Convert.java

License:Apache License

public String absa15testToNAF(String fileName) {
    KAFDocument kaf = new KAFDocument("en", "v1.naf");
    Segmenter segmenter = new Segmenter();
    TokenFactory tokenFactory = new TokenFactory();
    Properties properties = setAnnotateProperties();
    SAXBuilder sax = new SAXBuilder();
    XPathFactory xFactory = XPathFactory.instance();
    try {/* ww w.  ja  va2s  . c o  m*/
        Document doc = sax.build(fileName);
        XPathExpression<Element> expr = xFactory.compile("//sentence", Filters.element());
        List<Element> sentences = expr.evaluate(doc);

        int counter = 1;
        for (Element sent : sentences) {
            String sentId = sent.getAttributeValue("id");
            String sentString = sent.getChildText("text");
            StringReader stringReader = new StringReader(sentString);
            BufferedReader breader = new BufferedReader(stringReader);
            IxaPipeTokenizer<Token> tokenizer = new IxaPipeTokenizer<Token>(breader, tokenFactory, properties);
            List<Token> tokens = tokenizer.tokenize();
            List<List<Token>> segmentedSentences = segmenter.segment(tokens);
            for (List<Token> sentence : segmentedSentences) {
                for (Token token : sentence) {
                    WF wf = kaf.newWF(token.value(), token.startOffset(), counter);
                    wf.setXpath(sentId);
                }
            }
            counter++;
        }
    } catch (JDOMException | IOException e) {
        e.printStackTrace();
    }
    return kaf.toString();
}

From source file:eu.himeros.hocr.FlatXml.java

License:Open Source License

private void init(File inFile, File outFile) throws Exception {
    SAXBuilder builder = new SAXBuilder();
    Document doc = builder.build(inFile);
    Element root = doc.getRootElement();
    Namespace oldns = root.getNamespace();
    Element newRoot = new Element("html", "http://www.w3.org/1999/xhtml");
    Namespace xmlns = newRoot.getNamespace();
    Element head = root.getChild("head", oldns);
    head.setNamespace(xmlns);/*from  www . j  a va2s.c  o  m*/
    for (Element child : head.getChildren())
        child.setNamespace(xmlns);
    Element title = new Element("title", xmlns);
    title.addContent("ocr");
    if (head != null)
        head.addContent(title);
    Element body = root.getChild("body", oldns);
    body.setNamespace(xmlns);
    /*Element oldPage;
    try{
    oldPage=body.getChild("div",xmlns);
    }catch(Exception ex){
    oldPage=new Element("div",xmlns);
    }*/
    Element page = new Element("div", xmlns);
    page.setAttribute("class", "ocr_page");
    page.setAttribute("id", "i" + inFile.getName().substring(1).replace(".html", ".png"));
    XPathExpression<Element> xpath = XPathFactory.instance().compile("//*[@class='ocr_carea']",
            Filters.element(), null, Namespace.getNamespace("ns", "http://www.w3.org/1999/xhtml"));
    List<Element> careaElL = xpath.evaluate(body);
    for (Element careaEl : careaElL) {
        page.addContent(new Comment("<div class=\"" + careaEl.getAttributeValue("class") + "\" title=\""
                + careaEl.getAttributeValue("title") + "\">"));
        for (Element pEl : careaEl.getChildren()) {
            page.addContent(new Comment("<p>"));
            for (Element lineEl : pEl.getChildren()) {
                lineEl.removeAttribute("id");
                lineEl.setNamespace(xmlns);
                for (Element child : lineEl.getChildren()) {
                    child.removeAttribute("id");
                    child.removeAttribute("lang");
                    child.removeAttribute("lang", xmlns);
                    child.setNamespace(xmlns);
                }
                page.addContent(lineEl.clone());
            }
            page.addContent(new Comment("</p>"));
        }
        page.addContent(new Comment("</div>"));
    }
    //oldPage.detach();
    if (body != null) {
        body.removeContent();
        body.addContent(page);
    }
    newRoot.addContent(root.removeContent());
    doc.detachRootElement();
    doc.setRootElement(newRoot);
    XMLOutputter xmlOutputter = new XMLOutputter(Format.getPrettyFormat());
    xmlOutputter.output(doc, new BufferedWriter(new FileWriter(outFile)));
}

From source file:eu.himeros.hocr.HocrInfoAggregator.java

License:Open Source License

private void updateElements() {
    xpath = XPathFactory.instance().compile("//ns:span[@uc!='']", Filters.element(), null,
            Namespace.getNamespace("ns", "http://www.w3.org/1999/xhtml"));
    List<Element> elements = xpath.evaluate(root);
    for (Element element : elements) {
        String uc = element.getAttributeValue("uc");
        element.setAttribute("occ", "" + occHm.get(uc));
        try {/*from  w w w . j a v  a 2s. c  o m*/
            if (occHm.get(uc) == 1) {
                element.setAttribute("anchor", nearGtHm.get(uc).getAttributeValue("uc"));
                element.setAttribute("anchor-id", nearGtHm.get(uc).getAttributeValue("id"));
                if ("CORRWORD".equals(element.getAttributeValue("class"))
                        | "UCWORD".equals(element.getAttributeValue("class"))) {
                    String title = element.getAttributeValue("title");
                    title = nearGtHm.get(uc).getAttributeValue("text") + "\u261a " + title;
                    element.setAttribute("title", title);
                }
            }
        } catch (Exception ex) {
            continue;
        }
    }
}