Example usage for org.jdom2.input SAXBuilder SAXBuilder

Introduction

In this page you can find the example usage for org.jdom2.input SAXBuilder SAXBuilder.

Prototype

public SAXBuilder()

Source Link

Document

Creates a new JAXP-based SAXBuilder.

Usage

From source file:eus.ixa.ixa.pipe.convert.AbsaSemEval.java

License:Apache License

public static String absa2015ToDocCatFormatForPolarity(String fileName, String language, int windowMin,
        int windowMax) {
    SAXBuilder sax = new SAXBuilder();
    XPathFactory xFactory = XPathFactory.instance();
    Document doc = null;/*from   ww w. ja v  a  2s .  co  m*/
    String text = "";

    try {
        doc = sax.build(fileName);
        XPathExpression<Element> expr = xFactory.compile("//sentence", Filters.element());
        List<Element> sentences = expr.evaluate(doc);

        for (Element sent : sentences) {
            Element opinionsElement = sent.getChild("Opinions");
            String sentStringTmp = sent.getChildText("text");

            List<List<Token>> segmentedSentence = StringUtils.tokenizeSentence(sentStringTmp, language);
            List<Token> sentence = segmentedSentence.get(0);

            if (opinionsElement != null) {
                // iterating over every opinion in the opinions element
                List<Element> opinionList = opinionsElement.getChildren();

                for (Element opinion : opinionList) {

                    String sentString = "";

                    String targetString = opinion.getAttributeValue("target");
                    String polarityString = opinion.getAttributeValue("polarity");

                    if (targetString.equalsIgnoreCase("NULL") || opinionList.size() == 1) {
                        for (Token token : sentence) {
                            sentString += token.getTokenValue() + " ";
                        }
                        text += polarityString + "\t" + sentString + "\n";
                    } else {
                        int posTargetMin = -1;
                        int posTargetMax = -1;
                        // List<String> itemsTarget = Arrays.asList(targetString.split("
                        // "));
                        List<List<Token>> segmentedtarget = StringUtils.tokenizeSentence(targetString,
                                language);
                        List<Token> target = segmentedtarget.get(0);
                        String targetMin = target.get(0).getTokenValue();
                        String targetMax = target.get(target.size() - 1).getTokenValue();
                        int count = 0;
                        for (Token token : sentence) {
                            if (token.getTokenValue().equals(targetMin)) {
                                posTargetMin = count;
                            }
                            if (token.getTokenValue().equals(targetMax) && posTargetMin > -1) {
                                posTargetMax = count;
                                break;
                            }
                            count++;
                        }
                        if (posTargetMin - windowMin >= 0) {
                            posTargetMin = posTargetMin - windowMin;
                        } else
                            posTargetMin = 0;
                        if (posTargetMax + windowMax < sentence.size()) {
                            posTargetMax = posTargetMax + windowMax;
                        } else
                            posTargetMax = sentence.size() - 1;
                        for (int x = posTargetMin; x <= posTargetMax; x++) {
                            sentString += sentence.get(x).getTokenValue() + " ";
                        }
                        text += polarityString + "\t" + sentString + "\n";
                    }
                }

            }
        } // end of sentence
    } catch (JDOMException | IOException e) {
        e.printStackTrace();
    }

    return text;
}

From source file:eus.ixa.ixa.pipe.convert.AbsaSemEval.java

License:Apache License

private static void absa2014ToNAFNER(KAFDocument kaf, String fileName, String language) {
    // reading the ABSA xml file
    SAXBuilder sax = new SAXBuilder();
    XPathFactory xFactory = XPathFactory.instance();
    try {/* w w w  .  j av a 2  s  . co m*/
        Document doc = sax.build(fileName);
        XPathExpression<Element> expr = xFactory.compile("//sentence", Filters.element());
        List<Element> sentences = expr.evaluate(doc);

        // naf sentence counter
        int counter = 1;
        for (Element sent : sentences) {
            List<Integer> wfFromOffsets = new ArrayList<>();
            List<Integer> wfToOffsets = new ArrayList<>();
            List<WF> sentWFs = new ArrayList<>();
            List<Term> sentTerms = new ArrayList<>();
            // sentence id and original text
            String sentId = sent.getAttributeValue("id");
            String sentString = sent.getChildText("text");
            // the list contains just one list of tokens
            List<List<Token>> segmentedSentence = StringUtils.tokenizeSentence(sentString, language);
            for (List<Token> sentence : segmentedSentence) {
                for (Token token : sentence) {
                    WF wf = kaf.newWF(token.startOffset(), token.getTokenValue(), counter);
                    wf.setXpath(sentId);
                    final List<WF> wfTarget = new ArrayList<WF>();
                    wfTarget.add(wf);
                    wfFromOffsets.add(wf.getOffset());
                    wfToOffsets.add(wf.getOffset() + wf.getLength());
                    sentWFs.add(wf);
                    Term term = kaf.newTerm(KAFDocument.newWFSpan(wfTarget));
                    term.setPos("O");
                    term.setLemma(token.getTokenValue());
                    sentTerms.add(term);
                }
            }
            counter++;
            String[] tokenIds = new String[sentWFs.size()];
            for (int i = 0; i < sentWFs.size(); i++) {
                tokenIds[i] = sentWFs.get(i).getId();
            }
            // going through every opinion element for each sentence
            // each opinion element can contain one or more opinions
            Element aspectTermsElem = sent.getChild("aspectTerms");

            if (aspectTermsElem != null) {

                List<Element> aspectTermsList = aspectTermsElem.getChildren();
                // iterating over every opinion in the opinions element
                if (!aspectTermsList.isEmpty()) {
                    for (Element aspectTerm : aspectTermsList) {
                        // String targetString = aspectTerm.getAttributeValue("term");
                        // System.err.println("-> " + targetString);
                        // adding OTE
                        int fromOffset = Integer.parseInt(aspectTerm.getAttributeValue("from"));
                        int toOffset = Integer.parseInt(aspectTerm.getAttributeValue("to"));
                        int startIndex = -1;
                        int endIndex = -1;
                        for (int i = 0; i < wfFromOffsets.size(); i++) {
                            if (wfFromOffsets.get(i) == fromOffset) {
                                startIndex = i;
                            }
                        }
                        for (int i = 0; i < wfToOffsets.size(); i++) {
                            if (wfToOffsets.get(i) == toOffset) {
                                // span is +1 with respect to the last token of the span
                                endIndex = i + 1;
                            }
                        }
                        // TODO remove this condition to correct manually offsets
                        if (startIndex != -1 && endIndex != -1) {
                            List<String> wfIds = Arrays
                                    .asList(Arrays.copyOfRange(tokenIds, startIndex, endIndex));
                            List<String> wfTermIds = NAFUtils.getWFIdsFromTerms(sentTerms);
                            if (NAFUtils.checkTermsRefsIntegrity(wfIds, wfTermIds)) {
                                List<Term> nameTerms = kaf.getTermsFromWFs(wfIds);
                                ixa.kaflib.Span<Term> neSpan = KAFDocument.newTermSpan(nameTerms);
                                List<ixa.kaflib.Span<Term>> references = new ArrayList<ixa.kaflib.Span<Term>>();
                                references.add(neSpan);
                                Entity neEntity = kaf.newEntity(references);
                                neEntity.setType("term");
                            }
                        }
                    }
                }
            }
        } // end of sentence
    } catch (JDOMException | IOException e) {
        e.printStackTrace();
    }
}

From source file:eus.ixa.ixa.pipe.convert.DSRCCorpus.java

License:Apache License

private static void DSRCToNAFNER(KAFDocument kaf, String wordsDoc, String markablesDoc)
        throws JDOMException, IOException {
    // reading the words xml file
    SAXBuilder sax = new SAXBuilder();
    XPathFactory xFactory = XPathFactory.instance();
    Document docWords = sax.build(wordsDoc);
    XPathExpression<Element> expr = xFactory.compile("//word", Filters.element());
    List<Element> words = expr.evaluate(docWords);
    List<WF> sentWFs = new ArrayList<>();
    List<Term> sentTerms = new ArrayList<>();
    // building the NAF containing the WFs and Terms
    // naf sentence counter
    int sentCounter = 1;
    for (Element word : words) {
        // sentence id and original text
        String token = word.getText();
        // the list contains just one list of tokens
        WF wf = kaf.newWF(0, token, sentCounter);
        final List<WF> wfTarget = new ArrayList<WF>();
        wfTarget.add(wf);// w  ww.  j a v a2s . co  m
        sentWFs.add(wf);
        Term term = kaf.newTerm(KAFDocument.newWFSpan(wfTarget));
        term.setPos("O");
        term.setLemma(token);
        sentTerms.add(term);
        Matcher endMatcher = endOfSentence.matcher(token);
        if (endMatcher.matches()) {
            sentCounter++;
        }
    } // end of processing words

    String[] tokenIds = new String[sentWFs.size()];
    for (int i = 0; i < sentWFs.size(); i++) {
        tokenIds[i] = sentWFs.get(i).getId();
    }
    // processing markables document in mmax opinion expression files
    Document markDoc = sax.build(markablesDoc);
    XPathFactory markFactory = XPathFactory.instance();
    XPathExpression<Element> markExpr = markFactory.compile("//ns:markable", Filters.element(), null,
            Namespace.getNamespace("ns", "www.eml.org/NameSpaces/OpinionExpression"));
    List<Element> markables = markExpr.evaluate(markDoc);
    for (Element markable : markables) {
        if (markable.getAttributeValue("annotation_type").equalsIgnoreCase("target")) {
            String markSpan = markable.getAttributeValue("span");
            System.err.println("--> span: " + markSpan);
            String removeCommaSpan = markSpan.replaceAll(",word_.*", "");
            System.err.println("--> newSpan: " + removeCommaSpan);
            String[] spanWords = removeCommaSpan.split("\\.\\.");
            int startIndex = Integer.parseInt(spanWords[0].replace("word_", ""));
            int endIndex = Integer.parseInt(spanWords[spanWords.length - 1].replace("word_", "")) + 1;

            List<String> wfIds = Arrays.asList(Arrays.copyOfRange(tokenIds, startIndex - 1, endIndex - 1));
            List<String> wfTermIds = getWFIdsFromTerms(sentTerms);
            if (checkTermsRefsIntegrity(wfIds, wfTermIds)) {
                List<Term> nameTerms = kaf.getTermsFromWFs(wfIds);
                ixa.kaflib.Span<Term> neSpan = KAFDocument.newTermSpan(nameTerms);
                List<ixa.kaflib.Span<Term>> references = new ArrayList<ixa.kaflib.Span<Term>>();
                references.add(neSpan);
                Entity neEntity = kaf.newEntity(references);
                neEntity.setType("TARGET");
                System.err.println("--> target: " + neEntity.getStr());
            }
        } // end of create entity
    }
}

From source file:eus.ixa.ixa.pipe.convert.TassFormat.java

License:Apache License

public static void generalToTabulated(String fileName) throws JDOMException, IOException {
    StringBuilder sb = new StringBuilder();
    // reading the TASS General Corpus xml file
    SAXBuilder sax = new SAXBuilder();
    XPathFactory xFactory = XPathFactory.instance();
    Document doc = sax.build(fileName);
    XPathExpression<Element> expr = xFactory.compile("//tweet", Filters.element());
    List<Element> tweets = expr.evaluate(doc);

    for (Element tweet : tweets) {
        String tokenizedTweetContent = null;
        String tweetPolarity = null;
        String tweetId = tweet.getChildText("tweetid");
        String tweetContentString = tweet.getChildText("content");
        // the list contains just one list of tokens
        List<List<Token>> segmentedSentences = StringUtils.tokenizeSentence(tweetContentString, LANGUAGE);
        for (List<Token> tokenizedSentence : segmentedSentences) {
            String[] tokenizedTweetArray = eus.ixa.ixa.pipe.ml.utils.StringUtils
                    .convertListTokenToArrayStrings(tokenizedSentence);
            tokenizedTweetContent = StringUtils.getStringFromTokens(tokenizedTweetArray);
        }//w w w .  j  ava2s.  c  o m
        if (tweet.getChild("sentiments").getChild("polarity").getChildText("value") != null) {
            tweetPolarity = tweet.getChild("sentiments").getChild("polarity").getChildText("value");
        }
        sb.append(tweetId).append("\t").append(tweetPolarity).append("\t").append(tokenizedTweetContent)
                .append("\n");
    }
    System.out.println(sb.toString());
}

From source file:eus.ixa.ixa.pipe.convert.TassFormat.java

License:Apache License

public static void generalToWFs(String fileName) {
    SAXBuilder sax = new SAXBuilder();
    XPathFactory xFactory = XPathFactory.instance();
    try {//from w  w  w. j  a v  a 2s  . c om
        Document doc = sax.build(fileName);
        XPathExpression<Element> expr = xFactory.compile("//tweet", Filters.element());
        List<Element> tweets = expr.evaluate(doc);

        for (Element tweet : tweets) {
            String tweetId = tweet.getChildText("tweetid");
            KAFDocument kaf = new KAFDocument(LANGUAGE, "v1.naf");
            kaf.createPublic().publicId = tweetId;

            String tweetContentString = tweet.getChildText("content");
            List<List<Token>> segmentedSentences = StringUtils.tokenizeSentence(tweetContentString, LANGUAGE);
            for (List<Token> sentence : segmentedSentences) {
                for (Token token : sentence) {
                    kaf.newWF(token.startOffset(), token.getTokenValue(), 1);
                }
            }
            Path outfile = Files.createFile(Paths.get(tweetId + ".naf"));
            Files.write(outfile, kaf.toString().getBytes(StandardCharsets.UTF_8));
            System.err.println(">> Wrote naf document to " + outfile);
        }
    } catch (JDOMException | IOException e) {
        e.printStackTrace();
    }
}

From source file:fciencias.edatos.practica04.Catalogo.java

/**
 * Metodo que va iniciar Jdom para lectura y escritura de archivos XML
 * @param xmlOtroNombre elNombre original del archivo.
 * @throws JDOMException Excepcion de JDOM
 * @throws IOException Excepcion del sistema.
 *//*  ww  w. j  a  v a 2  s  .co  m*/

private void useJDOM(String baseDeDatos) throws JDOMException, IOException {

    SAXBuilder jdomBuilder = new SAXBuilder();
    File inputFile = new File(baseDeDatos);
    Document jdomDocument = jdomBuilder.build(inputFile);
    Element firstElement = jdomDocument.getRootElement();

    //Estos son los profesores
    for (Element elementProfesor : firstElement.getChildren()) {
        int[] salario = new int[12];
        int c = 0;
        for (Element elementSalario : elementProfesor.getChildren()) {
            String salarioRealEnString = elementSalario.getValue();
            int salarioRealMes = Integer.parseInt(salarioRealEnString);

            salario[c] = salarioRealMes;
            c++;

        }

        //           System.out.println(Arrays.toString(salario));

        String id = elementProfesor.getAttributeValue("id");
        int id_int = Integer.parseInt(id);
        String nombre = elementProfesor.getAttributeValue("nombre");
        String carreraString = elementProfesor.getAttributeValue("carrera");
        Carrera carrera = Carrera.NO_CARRERA;

        if (carreraString.equals("MATEMATICAS")) {
            carrera = Carrera.MATEMATICAS;
        } else if (carreraString.equals("ACTUARIA")) {
            carrera = Carrera.ACTUARIA;
        } else if (carreraString.equals("FISICA")) {
            carrera = Carrera.FISICA;
        } else if (carreraString.equals("BIOLOGIA")) {
            carrera = Carrera.BIOLOGIA;
        } else if (carreraString.equals("CIENCIAS_DE_LA_COMPUTACION")) {
            carrera = Carrera.CIENCIAS_DE_LA_COMPUTACION;
        } else if (carreraString.equals("FISICA_BIOMEDICA")) {
            carrera = Carrera.FISICA_BIOMEDICA;
        } else if (carreraString.equals("CIENCIAS_DE_LA_TIERRA")) {
            carrera = Carrera.CIENCIAS_DE_LA_TIERRA;
        }

        String gradoString = elementProfesor.getAttributeValue("grado");

        Grado grado = Grado.NO_GRADO;

        if (gradoString.equals("LICENCIADO")) {
            grado = Grado.LICENCIADO;
        } else if (gradoString.equals("MAESTRO")) {
            grado = Grado.MAESTRO;
        } else if (gradoString.equals("DOCTOR")) {
            grado = Grado.DOCTOR;
        }

        else if (gradoString.equals("ESPECIALISTA")) {
            grado = Grado.ESPECIALISTA;
        }

        Profesor profestorEnMemoria = null;

        profestorEnMemoria = new Profesor(id_int, nombre, carrera, grado, salario);

        @SuppressWarnings("unchecked")
        T prof = (T) profestorEnMemoria;

        listaProfesores.agregaFinal(prof);

    }

}

From source file:fi.ni.bcfextractor.BCFExtractorController.java

License:Open Source License

private void openBCFZip(File file) {
    FileInputStream inputStream = null;
    try {/* w ww .  j  a v a  2  s  .  c  o  m*/
        inputStream = new FileInputStream(file);
        ZipInputStream zInputStream = new ZipInputStream(inputStream);
        try {
            data.clear();
            for (ZipEntry entry = zInputStream.getNextEntry(); entry != null; entry = zInputStream
                    .getNextEntry()) {
                String name = entry.getName();
                if (name.endsWith(".bcf")) {

                    BufferedReader inputReader = new BufferedReader(new InputStreamReader(zInputStream));
                    StringBuilder sb = new StringBuilder();
                    String inline = "";
                    while ((inline = inputReader.readLine()) != null) {
                        sb.append(inline);
                    }

                    System.out.println(sb.toString());
                    SAXBuilder builder = new SAXBuilder();

                    try {
                        Document doc = (Document) builder
                                .build(new ByteArrayInputStream(sb.toString().getBytes()));
                        listChildren(doc.getRootElement());
                    } catch (JDOMException e) {
                        e.printStackTrace();
                    }
                }
            }
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            try {
                zInputStream.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }

    } catch (FileNotFoundException e) {
        e.printStackTrace();
    } finally {
        try {
            inputStream.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

From source file:filter.Filter.java

public Filter(Set<File> files, String nameProject, FilterSystemForm frame) {
    this.files = files;
    this.builder = new SAXBuilder();
    this.project = new Project(nameProject);
    this.progressBar = new ProgressBarForm();
    this.frame = frame;
}

From source file:fourmiz.engine.Engine.java

License:Open Source License

/**
 * Load a specific game//from   w  ww.jav  a  2s.c o m
 * @param name
 * @throws JDOMException
 * @throws IOException
 * @throws SlickException
 */
public void loadLevel(String name) throws JDOMException, IOException, SlickException {
    currentGame = name;
    ressources.load(resourcePath + currentGame + renderSuffix);
    SAXBuilder sax = new SAXBuilder();
    Document doc = sax.build(new File(resourcePath + currentGame + mapSuffix));
    Element root = doc.getRootElement();
    List<Element> listElem = root.getChildren();

    for (Element elem : listElem) {
        switch (elem.getName()) {
        case "Config":
            loadConfig(elem.getChildren());
            break;
        case "Entity":
            loadEntity(elem.getChildren());
            break;
        default:
            log.warn("loadLevel: unknown type object -> " + elem.getName());
            continue;
        }
    }

    this.loaded = true;
}

From source file:fr.amap.amapvox.io.tls.rsp.Rsp.java

License:Open Source License

public void read(final File rspFile) throws JDOMException, IOException {

    sxb = new SAXBuilder();
    rxpList = new ArrayList<>();

    //avoid loading of dtd file
    sxb.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);

    try {/*www . j  a  v  a2 s.  c o  m*/
        document = sxb.build(new FileInputStream(rspFile));
        root = document.getRootElement();
        projectName = root.getAttributeValue("name");
        Element scanPositions = root.getChild("scanpositions");
        String folderScanPositions = scanPositions.getAttributeValue("fold");
        List<Element> childrens = scanPositions.getChildren("scanposition");
        popMatrix = extractMat4D(root.getChild("pop").getChildText("matrix"));

        //scan id
        int scanCount = 0;

        for (Element child : childrens) {
            Scans rxp = new Scans();

            rxp.setName(child.getAttributeValue("name"));
            rxp.setFold(child.getAttributeValue("fold"));

            Element registeredElement = child.getChild("registered");

            if (registeredElement != null) {

                if (Integer.valueOf(registeredElement.getText()) == 1) {

                    Element singlescans = child.getChild("singlescans");
                    String singlescansFold = singlescans.getAttributeValue("fold");
                    Map<Integer, RxpScan> scanList = new HashMap<>();

                    List<Element> scans = singlescans.getChildren("scan");

                    Element sop = child.getChild("sop");
                    Matrix4d sopMatrix = extractMat4D(sop.getChildText("matrix"));
                    rxp.setSopMatrix(sopMatrix);

                    int compteur = 0;
                    for (Element sc : scans) {

                        RxpScan scan = new RxpScan();
                        scan.setName(sc.getAttributeValue("name"));
                        scan.setFileName(sc.getChildText("file"));
                        String rspFilePathOnly = rspFile.getAbsolutePath().substring(0,
                                rspFile.getAbsolutePath().lastIndexOf(File.separator));

                        scan.setAbsolutePath(rspFilePathOnly + File.separator + folderScanPositions
                                + File.separator + rxp.getFold() + File.separator + singlescansFold
                                + File.separator + scan.getFileName());
                        scanList.put(scanCount, scan);

                        if (scan.getName().contains(".mon")) {
                            rxp.setRxpLiteFile(new File(scan.getAbsolutePath()));
                            rxp.setScanLite(scan);
                        } else {
                            rxp.setScanFull(scan);
                        }
                        scan.setFile(new File(scan.getAbsolutePath()));
                        scan.setSopMatrix(sopMatrix);

                        compteur++;
                        scanCount++;
                    }

                    rxp.setScanList(scanList);

                    rxpList.add(rxp);
                } else {
                    //logger.info("Scan "+ rxp.getName() +" skipped cause unregistered");
                }
            }

        }

    } catch (JDOMException ex) {
        throw new JDOMException("error parsing or reading rsp: " + rspFile.getAbsolutePath(), ex);
    } catch (IOException ex) {
        throw new IOException("error parsing or reading rsp: " + rspFile.getAbsolutePath(), ex);
    }
}