Example usage for org.jdom2.input SAXBuilder SAXBuilder

List of usage examples for org.jdom2.input SAXBuilder SAXBuilder

Introduction

In this page you can find the example usage for org.jdom2.input SAXBuilder SAXBuilder.

Prototype

public SAXBuilder() 

Source Link

Document

Creates a new JAXP-based SAXBuilder.

Usage

From source file:eus.ixa.ixa.pipe.convert.AbsaSemEval.java

License:Apache License

public static String absa2015ToDocCatFormatForPolarity(String fileName, String language, int windowMin,
        int windowMax) {
    SAXBuilder sax = new SAXBuilder();
    XPathFactory xFactory = XPathFactory.instance();
    Document doc = null;/*from   ww w. ja v  a  2s .  co  m*/
    String text = "";

    try {
        doc = sax.build(fileName);
        XPathExpression<Element> expr = xFactory.compile("//sentence", Filters.element());
        List<Element> sentences = expr.evaluate(doc);

        for (Element sent : sentences) {
            Element opinionsElement = sent.getChild("Opinions");
            String sentStringTmp = sent.getChildText("text");

            List<List<Token>> segmentedSentence = StringUtils.tokenizeSentence(sentStringTmp, language);
            List<Token> sentence = segmentedSentence.get(0);

            if (opinionsElement != null) {
                // iterating over every opinion in the opinions element
                List<Element> opinionList = opinionsElement.getChildren();

                for (Element opinion : opinionList) {

                    String sentString = "";

                    String targetString = opinion.getAttributeValue("target");
                    String polarityString = opinion.getAttributeValue("polarity");

                    if (targetString.equalsIgnoreCase("NULL") || opinionList.size() == 1) {
                        for (Token token : sentence) {
                            sentString += token.getTokenValue() + " ";
                        }
                        text += polarityString + "\t" + sentString + "\n";
                    } else {
                        int posTargetMin = -1;
                        int posTargetMax = -1;
                        // List<String> itemsTarget = Arrays.asList(targetString.split("
                        // "));
                        List<List<Token>> segmentedtarget = StringUtils.tokenizeSentence(targetString,
                                language);
                        List<Token> target = segmentedtarget.get(0);
                        String targetMin = target.get(0).getTokenValue();
                        String targetMax = target.get(target.size() - 1).getTokenValue();
                        int count = 0;
                        for (Token token : sentence) {
                            if (token.getTokenValue().equals(targetMin)) {
                                posTargetMin = count;
                            }
                            if (token.getTokenValue().equals(targetMax) && posTargetMin > -1) {
                                posTargetMax = count;
                                break;
                            }
                            count++;
                        }
                        if (posTargetMin - windowMin >= 0) {
                            posTargetMin = posTargetMin - windowMin;
                        } else
                            posTargetMin = 0;
                        if (posTargetMax + windowMax < sentence.size()) {
                            posTargetMax = posTargetMax + windowMax;
                        } else
                            posTargetMax = sentence.size() - 1;
                        for (int x = posTargetMin; x <= posTargetMax; x++) {
                            sentString += sentence.get(x).getTokenValue() + " ";
                        }
                        text += polarityString + "\t" + sentString + "\n";
                    }
                }

            }
        } // end of sentence
    } catch (JDOMException | IOException e) {
        e.printStackTrace();
    }

    return text;
}

From source file:eus.ixa.ixa.pipe.convert.AbsaSemEval.java

License:Apache License

private static void absa2014ToNAFNER(KAFDocument kaf, String fileName, String language) {
    // reading the ABSA xml file
    SAXBuilder sax = new SAXBuilder();
    XPathFactory xFactory = XPathFactory.instance();
    try {/* w w w  .  j av a 2  s  . co m*/
        Document doc = sax.build(fileName);
        XPathExpression<Element> expr = xFactory.compile("//sentence", Filters.element());
        List<Element> sentences = expr.evaluate(doc);

        // naf sentence counter
        int counter = 1;
        for (Element sent : sentences) {
            List<Integer> wfFromOffsets = new ArrayList<>();
            List<Integer> wfToOffsets = new ArrayList<>();
            List<WF> sentWFs = new ArrayList<>();
            List<Term> sentTerms = new ArrayList<>();
            // sentence id and original text
            String sentId = sent.getAttributeValue("id");
            String sentString = sent.getChildText("text");
            // the list contains just one list of tokens
            List<List<Token>> segmentedSentence = StringUtils.tokenizeSentence(sentString, language);
            for (List<Token> sentence : segmentedSentence) {
                for (Token token : sentence) {
                    WF wf = kaf.newWF(token.startOffset(), token.getTokenValue(), counter);
                    wf.setXpath(sentId);
                    final List<WF> wfTarget = new ArrayList<WF>();
                    wfTarget.add(wf);
                    wfFromOffsets.add(wf.getOffset());
                    wfToOffsets.add(wf.getOffset() + wf.getLength());
                    sentWFs.add(wf);
                    Term term = kaf.newTerm(KAFDocument.newWFSpan(wfTarget));
                    term.setPos("O");
                    term.setLemma(token.getTokenValue());
                    sentTerms.add(term);
                }
            }
            counter++;
            String[] tokenIds = new String[sentWFs.size()];
            for (int i = 0; i < sentWFs.size(); i++) {
                tokenIds[i] = sentWFs.get(i).getId();
            }
            // going through every opinion element for each sentence
            // each opinion element can contain one or more opinions
            Element aspectTermsElem = sent.getChild("aspectTerms");

            if (aspectTermsElem != null) {

                List<Element> aspectTermsList = aspectTermsElem.getChildren();
                // iterating over every opinion in the opinions element
                if (!aspectTermsList.isEmpty()) {
                    for (Element aspectTerm : aspectTermsList) {
                        // String targetString = aspectTerm.getAttributeValue("term");
                        // System.err.println("-> " + targetString);
                        // adding OTE
                        int fromOffset = Integer.parseInt(aspectTerm.getAttributeValue("from"));
                        int toOffset = Integer.parseInt(aspectTerm.getAttributeValue("to"));
                        int startIndex = -1;
                        int endIndex = -1;
                        for (int i = 0; i < wfFromOffsets.size(); i++) {
                            if (wfFromOffsets.get(i) == fromOffset) {
                                startIndex = i;
                            }
                        }
                        for (int i = 0; i < wfToOffsets.size(); i++) {
                            if (wfToOffsets.get(i) == toOffset) {
                                // span is +1 with respect to the last token of the span
                                endIndex = i + 1;
                            }
                        }
                        // TODO remove this condition to correct manually offsets
                        if (startIndex != -1 && endIndex != -1) {
                            List<String> wfIds = Arrays
                                    .asList(Arrays.copyOfRange(tokenIds, startIndex, endIndex));
                            List<String> wfTermIds = NAFUtils.getWFIdsFromTerms(sentTerms);
                            if (NAFUtils.checkTermsRefsIntegrity(wfIds, wfTermIds)) {
                                List<Term> nameTerms = kaf.getTermsFromWFs(wfIds);
                                ixa.kaflib.Span<Term> neSpan = KAFDocument.newTermSpan(nameTerms);
                                List<ixa.kaflib.Span<Term>> references = new ArrayList<ixa.kaflib.Span<Term>>();
                                references.add(neSpan);
                                Entity neEntity = kaf.newEntity(references);
                                neEntity.setType("term");
                            }
                        }
                    }
                }
            }
        } // end of sentence
    } catch (JDOMException | IOException e) {
        e.printStackTrace();
    }
}

From source file:eus.ixa.ixa.pipe.convert.DSRCCorpus.java

License:Apache License

private static void DSRCToNAFNER(KAFDocument kaf, String wordsDoc, String markablesDoc)
        throws JDOMException, IOException {
    // reading the words xml file
    SAXBuilder sax = new SAXBuilder();
    XPathFactory xFactory = XPathFactory.instance();
    Document docWords = sax.build(wordsDoc);
    XPathExpression<Element> expr = xFactory.compile("//word", Filters.element());
    List<Element> words = expr.evaluate(docWords);
    List<WF> sentWFs = new ArrayList<>();
    List<Term> sentTerms = new ArrayList<>();
    // building the NAF containing the WFs and Terms
    // naf sentence counter
    int sentCounter = 1;
    for (Element word : words) {
        // sentence id and original text
        String token = word.getText();
        // the list contains just one list of tokens
        WF wf = kaf.newWF(0, token, sentCounter);
        final List<WF> wfTarget = new ArrayList<WF>();
        wfTarget.add(wf);// w  ww.  j a v a2s . co  m
        sentWFs.add(wf);
        Term term = kaf.newTerm(KAFDocument.newWFSpan(wfTarget));
        term.setPos("O");
        term.setLemma(token);
        sentTerms.add(term);
        Matcher endMatcher = endOfSentence.matcher(token);
        if (endMatcher.matches()) {
            sentCounter++;
        }
    } // end of processing words

    String[] tokenIds = new String[sentWFs.size()];
    for (int i = 0; i < sentWFs.size(); i++) {
        tokenIds[i] = sentWFs.get(i).getId();
    }
    // processing markables document in mmax opinion expression files
    Document markDoc = sax.build(markablesDoc);
    XPathFactory markFactory = XPathFactory.instance();
    XPathExpression<Element> markExpr = markFactory.compile("//ns:markable", Filters.element(), null,
            Namespace.getNamespace("ns", "www.eml.org/NameSpaces/OpinionExpression"));
    List<Element> markables = markExpr.evaluate(markDoc);
    for (Element markable : markables) {
        if (markable.getAttributeValue("annotation_type").equalsIgnoreCase("target")) {
            String markSpan = markable.getAttributeValue("span");
            System.err.println("--> span: " + markSpan);
            String removeCommaSpan = markSpan.replaceAll(",word_.*", "");
            System.err.println("--> newSpan: " + removeCommaSpan);
            String[] spanWords = removeCommaSpan.split("\\.\\.");
            int startIndex = Integer.parseInt(spanWords[0].replace("word_", ""));
            int endIndex = Integer.parseInt(spanWords[spanWords.length - 1].replace("word_", "")) + 1;

            List<String> wfIds = Arrays.asList(Arrays.copyOfRange(tokenIds, startIndex - 1, endIndex - 1));
            List<String> wfTermIds = getWFIdsFromTerms(sentTerms);
            if (checkTermsRefsIntegrity(wfIds, wfTermIds)) {
                List<Term> nameTerms = kaf.getTermsFromWFs(wfIds);
                ixa.kaflib.Span<Term> neSpan = KAFDocument.newTermSpan(nameTerms);
                List<ixa.kaflib.Span<Term>> references = new ArrayList<ixa.kaflib.Span<Term>>();
                references.add(neSpan);
                Entity neEntity = kaf.newEntity(references);
                neEntity.setType("TARGET");
                System.err.println("--> target: " + neEntity.getStr());
            }
        } // end of create entity
    }
}

From source file:eus.ixa.ixa.pipe.convert.TassFormat.java

License:Apache License

public static void generalToTabulated(String fileName) throws JDOMException, IOException {
    StringBuilder sb = new StringBuilder();
    // reading the TASS General Corpus xml file
    SAXBuilder sax = new SAXBuilder();
    XPathFactory xFactory = XPathFactory.instance();
    Document doc = sax.build(fileName);
    XPathExpression<Element> expr = xFactory.compile("//tweet", Filters.element());
    List<Element> tweets = expr.evaluate(doc);

    for (Element tweet : tweets) {
        String tokenizedTweetContent = null;
        String tweetPolarity = null;
        String tweetId = tweet.getChildText("tweetid");
        String tweetContentString = tweet.getChildText("content");
        // the list contains just one list of tokens
        List<List<Token>> segmentedSentences = StringUtils.tokenizeSentence(tweetContentString, LANGUAGE);
        for (List<Token> tokenizedSentence : segmentedSentences) {
            String[] tokenizedTweetArray = eus.ixa.ixa.pipe.ml.utils.StringUtils
                    .convertListTokenToArrayStrings(tokenizedSentence);
            tokenizedTweetContent = StringUtils.getStringFromTokens(tokenizedTweetArray);
        }//w w w .  j  ava2s.  c  o m
        if (tweet.getChild("sentiments").getChild("polarity").getChildText("value") != null) {
            tweetPolarity = tweet.getChild("sentiments").getChild("polarity").getChildText("value");
        }
        sb.append(tweetId).append("\t").append(tweetPolarity).append("\t").append(tokenizedTweetContent)
                .append("\n");
    }
    System.out.println(sb.toString());
}

From source file:eus.ixa.ixa.pipe.convert.TassFormat.java

License:Apache License

public static void generalToWFs(String fileName) {
    SAXBuilder sax = new SAXBuilder();
    XPathFactory xFactory = XPathFactory.instance();
    try {//from w  w  w. j  a v  a 2s  . c om
        Document doc = sax.build(fileName);
        XPathExpression<Element> expr = xFactory.compile("//tweet", Filters.element());
        List<Element> tweets = expr.evaluate(doc);

        for (Element tweet : tweets) {
            String tweetId = tweet.getChildText("tweetid");
            KAFDocument kaf = new KAFDocument(LANGUAGE, "v1.naf");
            kaf.createPublic().publicId = tweetId;

            String tweetContentString = tweet.getChildText("content");
            List<List<Token>> segmentedSentences = StringUtils.tokenizeSentence(tweetContentString, LANGUAGE);
            for (List<Token> sentence : segmentedSentences) {
                for (Token token : sentence) {
                    kaf.newWF(token.startOffset(), token.getTokenValue(), 1);
                }
            }
            Path outfile = Files.createFile(Paths.get(tweetId + ".naf"));
            Files.write(outfile, kaf.toString().getBytes(StandardCharsets.UTF_8));
            System.err.println(">> Wrote naf document to " + outfile);
        }
    } catch (JDOMException | IOException e) {
        e.printStackTrace();
    }
}

From source file:fciencias.edatos.practica04.Catalogo.java

/**
 * Metodo que va iniciar Jdom para lectura y escritura de archivos XML
 * @param xmlOtroNombre elNombre original del archivo.
 * @throws JDOMException Excepcion de JDOM
 * @throws IOException Excepcion del sistema.
 *//*  ww  w. j  a  v a 2  s  .co  m*/

private void useJDOM(String baseDeDatos) throws JDOMException, IOException {

    SAXBuilder jdomBuilder = new SAXBuilder();
    File inputFile = new File(baseDeDatos);
    Document jdomDocument = jdomBuilder.build(inputFile);
    Element firstElement = jdomDocument.getRootElement();

    //Estos son los profesores
    for (Element elementProfesor : firstElement.getChildren()) {
        int[] salario = new int[12];
        int c = 0;
        for (Element elementSalario : elementProfesor.getChildren()) {
            String salarioRealEnString = elementSalario.getValue();
            int salarioRealMes = Integer.parseInt(salarioRealEnString);

            salario[c] = salarioRealMes;
            c++;

        }

        //           System.out.println(Arrays.toString(salario));

        String id = elementProfesor.getAttributeValue("id");
        int id_int = Integer.parseInt(id);
        String nombre = elementProfesor.getAttributeValue("nombre");
        String carreraString = elementProfesor.getAttributeValue("carrera");
        Carrera carrera = Carrera.NO_CARRERA;

        if (carreraString.equals("MATEMATICAS")) {
            carrera = Carrera.MATEMATICAS;
        } else if (carreraString.equals("ACTUARIA")) {
            carrera = Carrera.ACTUARIA;
        } else if (carreraString.equals("FISICA")) {
            carrera = Carrera.FISICA;
        } else if (carreraString.equals("BIOLOGIA")) {
            carrera = Carrera.BIOLOGIA;
        } else if (carreraString.equals("CIENCIAS_DE_LA_COMPUTACION")) {
            carrera = Carrera.CIENCIAS_DE_LA_COMPUTACION;
        } else if (carreraString.equals("FISICA_BIOMEDICA")) {
            carrera = Carrera.FISICA_BIOMEDICA;
        } else if (carreraString.equals("CIENCIAS_DE_LA_TIERRA")) {
            carrera = Carrera.CIENCIAS_DE_LA_TIERRA;
        }

        String gradoString = elementProfesor.getAttributeValue("grado");

        Grado grado = Grado.NO_GRADO;

        if (gradoString.equals("LICENCIADO")) {
            grado = Grado.LICENCIADO;
        } else if (gradoString.equals("MAESTRO")) {
            grado = Grado.MAESTRO;
        } else if (gradoString.equals("DOCTOR")) {
            grado = Grado.DOCTOR;
        }

        else if (gradoString.equals("ESPECIALISTA")) {
            grado = Grado.ESPECIALISTA;
        }

        Profesor profestorEnMemoria = null;

        profestorEnMemoria = new Profesor(id_int, nombre, carrera, grado, salario);

        @SuppressWarnings("unchecked")
        T prof = (T) profestorEnMemoria;

        listaProfesores.agregaFinal(prof);

    }

}

From source file:fi.ni.bcfextractor.BCFExtractorController.java

License:Open Source License

private void openBCFZip(File file) {
    FileInputStream inputStream = null;
    try {/* w ww .  j  a v a  2  s  .  c  o  m*/
        inputStream = new FileInputStream(file);
        ZipInputStream zInputStream = new ZipInputStream(inputStream);
        try {
            data.clear();
            for (ZipEntry entry = zInputStream.getNextEntry(); entry != null; entry = zInputStream
                    .getNextEntry()) {
                String name = entry.getName();
                if (name.endsWith(".bcf")) {

                    BufferedReader inputReader = new BufferedReader(new InputStreamReader(zInputStream));
                    StringBuilder sb = new StringBuilder();
                    String inline = "";
                    while ((inline = inputReader.readLine()) != null) {
                        sb.append(inline);
                    }

                    System.out.println(sb.toString());
                    SAXBuilder builder = new SAXBuilder();

                    try {
                        Document doc = (Document) builder
                                .build(new ByteArrayInputStream(sb.toString().getBytes()));
                        listChildren(doc.getRootElement());
                    } catch (JDOMException e) {
                        e.printStackTrace();
                    }
                }
            }
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            try {
                zInputStream.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }

    } catch (FileNotFoundException e) {
        e.printStackTrace();
    } finally {
        try {
            inputStream.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

From source file:filter.Filter.java

public Filter(Set<File> files, String nameProject, FilterSystemForm frame) {
    this.files = files;
    this.builder = new SAXBuilder();
    this.project = new Project(nameProject);
    this.progressBar = new ProgressBarForm();
    this.frame = frame;
}

From source file:fourmiz.engine.Engine.java

License:Open Source License

/**
 * Load a specific game//from   w  ww.jav  a  2s.c o m
 * @param name
 * @throws JDOMException
 * @throws IOException
 * @throws SlickException
 */
public void loadLevel(String name) throws JDOMException, IOException, SlickException {
    currentGame = name;
    ressources.load(resourcePath + currentGame + renderSuffix);
    SAXBuilder sax = new SAXBuilder();
    Document doc = sax.build(new File(resourcePath + currentGame + mapSuffix));
    Element root = doc.getRootElement();
    List<Element> listElem = root.getChildren();

    for (Element elem : listElem) {
        switch (elem.getName()) {
        case "Config":
            loadConfig(elem.getChildren());
            break;
        case "Entity":
            loadEntity(elem.getChildren());
            break;
        default:
            log.warn("loadLevel: unknown type object -> " + elem.getName());
            continue;
        }
    }

    this.loaded = true;
}

From source file:fr.amap.amapvox.io.tls.rsp.Rsp.java

License:Open Source License

public void read(final File rspFile) throws JDOMException, IOException {

    sxb = new SAXBuilder();
    rxpList = new ArrayList<>();

    //avoid loading of dtd file
    sxb.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);

    try {/*www . j  a  v  a2 s.  c o  m*/
        document = sxb.build(new FileInputStream(rspFile));
        root = document.getRootElement();
        projectName = root.getAttributeValue("name");
        Element scanPositions = root.getChild("scanpositions");
        String folderScanPositions = scanPositions.getAttributeValue("fold");
        List<Element> childrens = scanPositions.getChildren("scanposition");
        popMatrix = extractMat4D(root.getChild("pop").getChildText("matrix"));

        //scan id
        int scanCount = 0;

        for (Element child : childrens) {
            Scans rxp = new Scans();

            rxp.setName(child.getAttributeValue("name"));
            rxp.setFold(child.getAttributeValue("fold"));

            Element registeredElement = child.getChild("registered");

            if (registeredElement != null) {

                if (Integer.valueOf(registeredElement.getText()) == 1) {

                    Element singlescans = child.getChild("singlescans");
                    String singlescansFold = singlescans.getAttributeValue("fold");
                    Map<Integer, RxpScan> scanList = new HashMap<>();

                    List<Element> scans = singlescans.getChildren("scan");

                    Element sop = child.getChild("sop");
                    Matrix4d sopMatrix = extractMat4D(sop.getChildText("matrix"));
                    rxp.setSopMatrix(sopMatrix);

                    int compteur = 0;
                    for (Element sc : scans) {

                        RxpScan scan = new RxpScan();
                        scan.setName(sc.getAttributeValue("name"));
                        scan.setFileName(sc.getChildText("file"));
                        String rspFilePathOnly = rspFile.getAbsolutePath().substring(0,
                                rspFile.getAbsolutePath().lastIndexOf(File.separator));

                        scan.setAbsolutePath(rspFilePathOnly + File.separator + folderScanPositions
                                + File.separator + rxp.getFold() + File.separator + singlescansFold
                                + File.separator + scan.getFileName());
                        scanList.put(scanCount, scan);

                        if (scan.getName().contains(".mon")) {
                            rxp.setRxpLiteFile(new File(scan.getAbsolutePath()));
                            rxp.setScanLite(scan);
                        } else {
                            rxp.setScanFull(scan);
                        }
                        scan.setFile(new File(scan.getAbsolutePath()));
                        scan.setSopMatrix(sopMatrix);

                        compteur++;
                        scanCount++;
                    }

                    rxp.setScanList(scanList);

                    rxpList.add(rxp);
                } else {
                    //logger.info("Scan "+ rxp.getName() +" skipped cause unregistered");
                }
            }

        }

    } catch (JDOMException ex) {
        throw new JDOMException("error parsing or reading rsp: " + rspFile.getAbsolutePath(), ex);
    } catch (IOException ex) {
        throw new IOException("error parsing or reading rsp: " + rspFile.getAbsolutePath(), ex);
    }
}