Example usage for org.jdom2.input SAXBuilder SAXBuilder

List of usage examples for org.jdom2.input SAXBuilder SAXBuilder

Introduction

In this page you can find the example usage for org.jdom2.input SAXBuilder SAXBuilder.

Prototype

public SAXBuilder() 

Source Link

Document

Creates a new JAXP-based SAXBuilder.

Usage

From source file:esiptestbed.mudrod.ontology.pre.AggregateTriples.java

License:Apache License

/**
 * Load OWL file into memory//w ww .j  a va  2s .  c  om
 * @param filePathName local path of OWL file
 * @throws JDOMException JDOMException
 * @throws IOException IOException
 */
public void loadxml(String filePathName) throws JDOMException, IOException {
    SAXBuilder saxBuilder = new SAXBuilder();
    File file = new File(filePathName);

    document = saxBuilder.build(file);
    rootNode = document.getRootElement();
}

From source file:eu.himeros.cophi.ocr.proofreader.controller.pojo.HocrDocumentBufferedReader.java

License:Open Source License

/**
 * Load the resource./*www  .  j av a2s. c om*/
 * @param origin the buffered reader used to read the resource.
 * @return the DOM document created processing the original document.
 */
@Override
public Document load(BufferedReader origin) {
    try {
        SAXBuilder builder = new SAXBuilder();
        return builder.build(origin);
    } catch (IOException | JDOMException ex) {
        ex.printStackTrace(System.err);
        return null;
    }
}

From source file:eu.himeros.digitaledition.AlignedQuotationParser.java

License:Open Source License

public Element getRoot(String inFileName) throws Exception {
    builder = new SAXBuilder();
    docIn = builder.build(inFileName);// w w  w. j a v  a  2s.  com
    return docIn.getRootElement();
}

From source file:eu.himeros.hocr.FlatXml.java

License:Open Source License

private void init(File inFile, File outFile) throws Exception {
    SAXBuilder builder = new SAXBuilder();
    Document doc = builder.build(inFile);
    Element root = doc.getRootElement();
    Namespace oldns = root.getNamespace();
    Element newRoot = new Element("html", "http://www.w3.org/1999/xhtml");
    Namespace xmlns = newRoot.getNamespace();
    Element head = root.getChild("head", oldns);
    head.setNamespace(xmlns);/*from  www. ja  va 2 s .  com*/
    for (Element child : head.getChildren())
        child.setNamespace(xmlns);
    Element title = new Element("title", xmlns);
    title.addContent("ocr");
    if (head != null)
        head.addContent(title);
    Element body = root.getChild("body", oldns);
    body.setNamespace(xmlns);
    /*Element oldPage;
    try{
    oldPage=body.getChild("div",xmlns);
    }catch(Exception ex){
    oldPage=new Element("div",xmlns);
    }*/
    Element page = new Element("div", xmlns);
    page.setAttribute("class", "ocr_page");
    page.setAttribute("id", "i" + inFile.getName().substring(1).replace(".html", ".png"));
    XPathExpression<Element> xpath = XPathFactory.instance().compile("//*[@class='ocr_carea']",
            Filters.element(), null, Namespace.getNamespace("ns", "http://www.w3.org/1999/xhtml"));
    List<Element> careaElL = xpath.evaluate(body);
    for (Element careaEl : careaElL) {
        page.addContent(new Comment("<div class=\"" + careaEl.getAttributeValue("class") + "\" title=\""
                + careaEl.getAttributeValue("title") + "\">"));
        for (Element pEl : careaEl.getChildren()) {
            page.addContent(new Comment("<p>"));
            for (Element lineEl : pEl.getChildren()) {
                lineEl.removeAttribute("id");
                lineEl.setNamespace(xmlns);
                for (Element child : lineEl.getChildren()) {
                    child.removeAttribute("id");
                    child.removeAttribute("lang");
                    child.removeAttribute("lang", xmlns);
                    child.setNamespace(xmlns);
                }
                page.addContent(lineEl.clone());
            }
            page.addContent(new Comment("</p>"));
        }
        page.addContent(new Comment("</div>"));
    }
    //oldPage.detach();
    if (body != null) {
        body.removeContent();
        body.addContent(page);
    }
    newRoot.addContent(root.removeContent());
    doc.detachRootElement();
    doc.setRootElement(newRoot);
    XMLOutputter xmlOutputter = new XMLOutputter(Format.getPrettyFormat());
    xmlOutputter.output(doc, new BufferedWriter(new FileWriter(outFile)));
}

From source file:eu.himeros.hocr.HocrInfoAggregator.java

License:Open Source License

public void initFile(String inFileName) throws Exception {
    builder = new SAXBuilder();
    doc = builder.build(inFileName);//from  w  w w . j  a v a 2s. com
    root = doc.getRootElement();
    xmlns = root.getNamespace();
    l1Fm = new GreekContextFilterMananger(); //TODO: generalize
    aqp = new AlignedQuotationParser();
    try {
        nearGt = aqp.parse(inFileName.substring(0, inFileName.length() - 5) + ".ngt.xml"); //TODO : generalize
        makeNearGtHm();
    } catch (Exception e) {
        // solving problems by ignoring them
    }
}

From source file:eu.himeros.hocr.NgtMaker.java

License:Open Source License

public void parseDoc(File file) throws Exception {
    adjustFile(file);//from  w w w .  j  ava  2s  .c o m
    start = -1;
    end = -1;
    prevValue = -1;
    ocrAl = new ArrayList<>(1000);
    outFileName = file.getAbsolutePath().substring(0, file.getAbsolutePath().length() - 4) + "ngt.xml";
    builder = new SAXBuilder();
    doc = builder.build(file);
    root = doc.getRootElement();
    xmlns = root.getNamespace();
    xpath = XPathFactory.instance().compile("//ns:span[@class='ocr_word']", Filters.element(), null,
            Namespace.getNamespace("ns", "http://www.w3.org/1999/xhtml"));
    List<Element> elements = xpath.evaluate(root);
    for (Element element : elements) {
        parseOcrWord(element);
    }

    ocrAl.add("%%%");
    ocrAl.add("%%%");
    findAnchors();
    writeFragment(start, end);
}

From source file:eu.himeros.hocr.XmlWordListExtractor.java

License:Open Source License

private void init(String inFileName, String outFileName) throws Exception {
    SAXBuilder builder = new SAXBuilder();
    Document doc = builder.build(inFileName);
    BufferedWriter bw = new BufferedWriter(new FileWriter(outFileName));
    Element el = doc.getRootElement();
    String s = el.getValue();//from   ww w .j a v a2 s.c  o m
    s = s.replaceAll("\n", " ");
    s = s.replaceAll(" +", " ");
    s = s.replace("- ", "");
    s = s.replace(" ", "\n");
    String[] ss = s.split("\n");
    for (String item : ss) {
        item = item.replaceAll("[^\u0370-\u03FF\u1F00-\u1FFF]|[]", "");
        if (item.length() < 1)
            continue;
        bw.write(item);
        bw.newLine();
    }
    bw.close();
}

From source file:eu.knux.passmanager.helper.FileHelper.java

License:Apache License

public static LinkedHashMap<String, Category> loadPassword(File f) {
    SAXBuilder builder = new SAXBuilder();
    Element racine = null;//from w w w  .  j  a  v  a  2 s  .c  o  m
    LinkedHashMap<String, Category> categoriesReturned = new LinkedHashMap<>();
    try {
        Document doc = builder.build(f);
        racine = doc.getRootElement();
    } catch (JDOMException | IOException e) {
        e.printStackTrace();
    }

    if (racine != null) {
        List<Element> categories = racine.getChildren("category");
        categoriesReturned.put("root", new Category("root"));
        for (Element e : categories) {
            String name = e.getAttributeValue("name");
            List<Element> passes = e.getChildren("password");
            for (Element e2 : passes) {
                Category currCate = null;
                if (name != null && !categoriesReturned.containsKey(name)) {
                    categoriesReturned.put(name, new Category(name));
                }
                currCate = (name == null) ? categoriesReturned.get("root") : categoriesReturned.get(name);

                Password p = new Password();
                p.setName(e2.getChildText("name"));
                p.setPass(e2.getChildText("pass"));
                p.setComment(e2.getChildText("comment"));
                p.setEncrypted(true);
                currCate.addPassword(p);
            }
        }
    }

    return categoriesReturned;
}

From source file:eus.ixa.ixa.pipe.convert.AbsaSemEval.java

License:Apache License

private static void absa2015ToNAFNER(KAFDocument kaf, String fileName, String language) {
    // reading the ABSA xml file
    SAXBuilder sax = new SAXBuilder();
    XPathFactory xFactory = XPathFactory.instance();
    try {//from   w ww .ja  v  a 2 s  .  c o  m
        Document doc = sax.build(fileName);
        XPathExpression<Element> expr = xFactory.compile("//sentence", Filters.element());
        List<Element> sentences = expr.evaluate(doc);

        // naf sentence counter
        int counter = 1;
        for (Element sent : sentences) {
            List<Integer> wfFromOffsets = new ArrayList<>();
            List<Integer> wfToOffsets = new ArrayList<>();
            List<WF> sentWFs = new ArrayList<>();
            List<Term> sentTerms = new ArrayList<>();
            // sentence id and original text
            String sentId = sent.getAttributeValue("id");
            String sentString = sent.getChildText("text");
            // the list contains just one list of tokens
            List<List<Token>> segmentedSentence = StringUtils.tokenizeSentence(sentString, language);
            for (List<Token> sentence : segmentedSentence) {
                for (Token token : sentence) {
                    WF wf = kaf.newWF(token.startOffset(), token.getTokenValue(), counter);
                    wf.setXpath(sentId);
                    final List<WF> wfTarget = new ArrayList<>();
                    wfTarget.add(wf);
                    wfFromOffsets.add(wf.getOffset());
                    wfToOffsets.add(wf.getOffset() + wf.getLength());
                    sentWFs.add(wf);
                    Term term = kaf.newTerm(KAFDocument.newWFSpan(wfTarget));
                    term.setPos("O");
                    term.setLemma(token.getTokenValue());
                    sentTerms.add(term);
                }
            }
            counter++;
            String[] tokenIds = new String[sentWFs.size()];
            for (int i = 0; i < sentWFs.size(); i++) {
                tokenIds[i] = sentWFs.get(i).getId();
            }
            // going through every opinion element for each sentence
            // each opinion element can contain one or more opinions
            Element opinionsElement = sent.getChild("Opinions");
            if (opinionsElement != null) {
                // iterating over every opinion in the opinions element
                List<Element> opinionList = opinionsElement.getChildren();
                for (Element opinion : opinionList) {
                    String category = opinion.getAttributeValue("category");
                    String targetString = opinion.getAttributeValue("target");
                    System.err.println("-> " + category + ", " + targetString);
                    // adding OTE
                    if (!targetString.equalsIgnoreCase("NULL")) {
                        int fromOffset = Integer.parseInt(opinion.getAttributeValue("from"));
                        int toOffset = Integer.parseInt(opinion.getAttributeValue("to"));
                        int startIndex = -1;
                        int endIndex = -1;
                        for (int i = 0; i < wfFromOffsets.size(); i++) {
                            if (wfFromOffsets.get(i) == fromOffset) {
                                startIndex = i;
                            }
                        }
                        for (int i = 0; i < wfToOffsets.size(); i++) {
                            if (wfToOffsets.get(i) == toOffset) {
                                // span is +1 with respect to the last token of the span
                                endIndex = i + 1;
                            }
                        }
                        // TODO remove this condition to correct manually offsets
                        if (startIndex != -1 && endIndex != -1) {
                            List<String> wfIds = Arrays
                                    .asList(Arrays.copyOfRange(tokenIds, startIndex, endIndex));
                            List<String> wfTermIds = NAFUtils.getWFIdsFromTerms(sentTerms);
                            if (NAFUtils.checkTermsRefsIntegrity(wfIds, wfTermIds)) {
                                List<Term> nameTerms = kaf.getTermsFromWFs(wfIds);
                                ixa.kaflib.Span<Term> neSpan = KAFDocument.newTermSpan(nameTerms);
                                List<ixa.kaflib.Span<Term>> references = new ArrayList<ixa.kaflib.Span<Term>>();
                                references.add(neSpan);
                                Entity neEntity = kaf.newEntity(references);
                                neEntity.setType(category);
                            }
                        }
                    }
                }
            }
        } // end of sentence
    } catch (JDOMException | IOException e) {
        e.printStackTrace();
    }
}

From source file:eus.ixa.ixa.pipe.convert.AbsaSemEval.java

License:Apache License

public static String absa2015ToWFs(String fileName, String language) {
    KAFDocument kaf = new KAFDocument("en", "v1.naf");
    SAXBuilder sax = new SAXBuilder();
    XPathFactory xFactory = XPathFactory.instance();
    try {/*from w  ww.ja  va2 s .  co  m*/
        Document doc = sax.build(fileName);
        XPathExpression<Element> expr = xFactory.compile("//sentence", Filters.element());
        List<Element> sentences = expr.evaluate(doc);

        int counter = 1;
        for (Element sent : sentences) {
            String sentId = sent.getAttributeValue("id");
            String sentString = sent.getChildText("text");
            List<List<Token>> segmentedSentences = StringUtils.tokenizeSentence(sentString, language);
            for (List<Token> sentence : segmentedSentences) {
                for (Token token : sentence) {
                    WF wf = kaf.newWF(token.startOffset(), token.getTokenValue(), counter);
                    wf.setXpath(sentId);
                }
            }
            counter++;
        }
    } catch (JDOMException | IOException e) {
        e.printStackTrace();
    }
    return kaf.toString();
}