List of usage examples for org.jdom2.input SAXBuilder SAXBuilder
public SAXBuilder()
From source file:eus.ixa.ixa.pipe.convert.AbsaSemEval.java
License:Apache License
public static String absa2015ToDocCatFormatForPolarity(String fileName, String language, int windowMin, int windowMax) { SAXBuilder sax = new SAXBuilder(); XPathFactory xFactory = XPathFactory.instance(); Document doc = null;/*from ww w. ja v a 2s . co m*/ String text = ""; try { doc = sax.build(fileName); XPathExpression<Element> expr = xFactory.compile("//sentence", Filters.element()); List<Element> sentences = expr.evaluate(doc); for (Element sent : sentences) { Element opinionsElement = sent.getChild("Opinions"); String sentStringTmp = sent.getChildText("text"); List<List<Token>> segmentedSentence = StringUtils.tokenizeSentence(sentStringTmp, language); List<Token> sentence = segmentedSentence.get(0); if (opinionsElement != null) { // iterating over every opinion in the opinions element List<Element> opinionList = opinionsElement.getChildren(); for (Element opinion : opinionList) { String sentString = ""; String targetString = opinion.getAttributeValue("target"); String polarityString = opinion.getAttributeValue("polarity"); if (targetString.equalsIgnoreCase("NULL") || opinionList.size() == 1) { for (Token token : sentence) { sentString += token.getTokenValue() + " "; } text += polarityString + "\t" + sentString + "\n"; } else { int posTargetMin = -1; int posTargetMax = -1; // List<String> itemsTarget = Arrays.asList(targetString.split(" // ")); List<List<Token>> segmentedtarget = StringUtils.tokenizeSentence(targetString, language); List<Token> target = segmentedtarget.get(0); String targetMin = target.get(0).getTokenValue(); String targetMax = target.get(target.size() - 1).getTokenValue(); int count = 0; for (Token token : sentence) { if (token.getTokenValue().equals(targetMin)) { posTargetMin = count; } if (token.getTokenValue().equals(targetMax) && posTargetMin > -1) { posTargetMax = count; break; } count++; } if (posTargetMin - windowMin >= 0) { posTargetMin = posTargetMin - windowMin; } else posTargetMin = 0; if (posTargetMax + windowMax < sentence.size()) { posTargetMax = posTargetMax + windowMax; } else posTargetMax = sentence.size() - 1; for (int x = posTargetMin; x <= posTargetMax; x++) { sentString += sentence.get(x).getTokenValue() + " "; } text += polarityString + "\t" + sentString + "\n"; } } } } // end of sentence } catch (JDOMException | IOException e) { e.printStackTrace(); } return text; }
From source file:eus.ixa.ixa.pipe.convert.AbsaSemEval.java
License:Apache License
private static void absa2014ToNAFNER(KAFDocument kaf, String fileName, String language) { // reading the ABSA xml file SAXBuilder sax = new SAXBuilder(); XPathFactory xFactory = XPathFactory.instance(); try {/* w w w . j av a 2 s . co m*/ Document doc = sax.build(fileName); XPathExpression<Element> expr = xFactory.compile("//sentence", Filters.element()); List<Element> sentences = expr.evaluate(doc); // naf sentence counter int counter = 1; for (Element sent : sentences) { List<Integer> wfFromOffsets = new ArrayList<>(); List<Integer> wfToOffsets = new ArrayList<>(); List<WF> sentWFs = new ArrayList<>(); List<Term> sentTerms = new ArrayList<>(); // sentence id and original text String sentId = sent.getAttributeValue("id"); String sentString = sent.getChildText("text"); // the list contains just one list of tokens List<List<Token>> segmentedSentence = StringUtils.tokenizeSentence(sentString, language); for (List<Token> sentence : segmentedSentence) { for (Token token : sentence) { WF wf = kaf.newWF(token.startOffset(), token.getTokenValue(), counter); wf.setXpath(sentId); final List<WF> wfTarget = new ArrayList<WF>(); wfTarget.add(wf); wfFromOffsets.add(wf.getOffset()); wfToOffsets.add(wf.getOffset() + wf.getLength()); sentWFs.add(wf); Term term = kaf.newTerm(KAFDocument.newWFSpan(wfTarget)); term.setPos("O"); term.setLemma(token.getTokenValue()); sentTerms.add(term); } } counter++; String[] tokenIds = new String[sentWFs.size()]; for (int i = 0; i < sentWFs.size(); i++) { tokenIds[i] = sentWFs.get(i).getId(); } // going through every opinion element for each sentence // each opinion element can contain one or more opinions Element aspectTermsElem = sent.getChild("aspectTerms"); if (aspectTermsElem != null) { List<Element> aspectTermsList = aspectTermsElem.getChildren(); // iterating over every opinion in the opinions element if (!aspectTermsList.isEmpty()) { for (Element aspectTerm : aspectTermsList) { // String targetString = aspectTerm.getAttributeValue("term"); // System.err.println("-> " + targetString); // adding OTE int fromOffset = Integer.parseInt(aspectTerm.getAttributeValue("from")); int toOffset = Integer.parseInt(aspectTerm.getAttributeValue("to")); int startIndex = -1; int endIndex = -1; for (int i = 0; i < wfFromOffsets.size(); i++) { if (wfFromOffsets.get(i) == fromOffset) { startIndex = i; } } for (int i = 0; i < wfToOffsets.size(); i++) { if (wfToOffsets.get(i) == toOffset) { // span is +1 with respect to the last token of the span endIndex = i + 1; } } // TODO remove this condition to correct manually offsets if (startIndex != -1 && endIndex != -1) { List<String> wfIds = Arrays .asList(Arrays.copyOfRange(tokenIds, startIndex, endIndex)); List<String> wfTermIds = NAFUtils.getWFIdsFromTerms(sentTerms); if (NAFUtils.checkTermsRefsIntegrity(wfIds, wfTermIds)) { List<Term> nameTerms = kaf.getTermsFromWFs(wfIds); ixa.kaflib.Span<Term> neSpan = KAFDocument.newTermSpan(nameTerms); List<ixa.kaflib.Span<Term>> references = new ArrayList<ixa.kaflib.Span<Term>>(); references.add(neSpan); Entity neEntity = kaf.newEntity(references); neEntity.setType("term"); } } } } } } // end of sentence } catch (JDOMException | IOException e) { e.printStackTrace(); } }
From source file:eus.ixa.ixa.pipe.convert.DSRCCorpus.java
License:Apache License
private static void DSRCToNAFNER(KAFDocument kaf, String wordsDoc, String markablesDoc) throws JDOMException, IOException { // reading the words xml file SAXBuilder sax = new SAXBuilder(); XPathFactory xFactory = XPathFactory.instance(); Document docWords = sax.build(wordsDoc); XPathExpression<Element> expr = xFactory.compile("//word", Filters.element()); List<Element> words = expr.evaluate(docWords); List<WF> sentWFs = new ArrayList<>(); List<Term> sentTerms = new ArrayList<>(); // building the NAF containing the WFs and Terms // naf sentence counter int sentCounter = 1; for (Element word : words) { // sentence id and original text String token = word.getText(); // the list contains just one list of tokens WF wf = kaf.newWF(0, token, sentCounter); final List<WF> wfTarget = new ArrayList<WF>(); wfTarget.add(wf);// w ww. j a v a2s . co m sentWFs.add(wf); Term term = kaf.newTerm(KAFDocument.newWFSpan(wfTarget)); term.setPos("O"); term.setLemma(token); sentTerms.add(term); Matcher endMatcher = endOfSentence.matcher(token); if (endMatcher.matches()) { sentCounter++; } } // end of processing words String[] tokenIds = new String[sentWFs.size()]; for (int i = 0; i < sentWFs.size(); i++) { tokenIds[i] = sentWFs.get(i).getId(); } // processing markables document in mmax opinion expression files Document markDoc = sax.build(markablesDoc); XPathFactory markFactory = XPathFactory.instance(); XPathExpression<Element> markExpr = markFactory.compile("//ns:markable", Filters.element(), null, Namespace.getNamespace("ns", "www.eml.org/NameSpaces/OpinionExpression")); List<Element> markables = markExpr.evaluate(markDoc); for (Element markable : markables) { if (markable.getAttributeValue("annotation_type").equalsIgnoreCase("target")) { String markSpan = markable.getAttributeValue("span"); System.err.println("--> span: " + markSpan); String removeCommaSpan = markSpan.replaceAll(",word_.*", ""); System.err.println("--> newSpan: " + removeCommaSpan); String[] spanWords = removeCommaSpan.split("\\.\\."); int startIndex = Integer.parseInt(spanWords[0].replace("word_", "")); int endIndex = Integer.parseInt(spanWords[spanWords.length - 1].replace("word_", "")) + 1; List<String> wfIds = Arrays.asList(Arrays.copyOfRange(tokenIds, startIndex - 1, endIndex - 1)); List<String> wfTermIds = getWFIdsFromTerms(sentTerms); if (checkTermsRefsIntegrity(wfIds, wfTermIds)) { List<Term> nameTerms = kaf.getTermsFromWFs(wfIds); ixa.kaflib.Span<Term> neSpan = KAFDocument.newTermSpan(nameTerms); List<ixa.kaflib.Span<Term>> references = new ArrayList<ixa.kaflib.Span<Term>>(); references.add(neSpan); Entity neEntity = kaf.newEntity(references); neEntity.setType("TARGET"); System.err.println("--> target: " + neEntity.getStr()); } } // end of create entity } }
From source file:eus.ixa.ixa.pipe.convert.TassFormat.java
License:Apache License
public static void generalToTabulated(String fileName) throws JDOMException, IOException { StringBuilder sb = new StringBuilder(); // reading the TASS General Corpus xml file SAXBuilder sax = new SAXBuilder(); XPathFactory xFactory = XPathFactory.instance(); Document doc = sax.build(fileName); XPathExpression<Element> expr = xFactory.compile("//tweet", Filters.element()); List<Element> tweets = expr.evaluate(doc); for (Element tweet : tweets) { String tokenizedTweetContent = null; String tweetPolarity = null; String tweetId = tweet.getChildText("tweetid"); String tweetContentString = tweet.getChildText("content"); // the list contains just one list of tokens List<List<Token>> segmentedSentences = StringUtils.tokenizeSentence(tweetContentString, LANGUAGE); for (List<Token> tokenizedSentence : segmentedSentences) { String[] tokenizedTweetArray = eus.ixa.ixa.pipe.ml.utils.StringUtils .convertListTokenToArrayStrings(tokenizedSentence); tokenizedTweetContent = StringUtils.getStringFromTokens(tokenizedTweetArray); }//w w w . j ava2s. c o m if (tweet.getChild("sentiments").getChild("polarity").getChildText("value") != null) { tweetPolarity = tweet.getChild("sentiments").getChild("polarity").getChildText("value"); } sb.append(tweetId).append("\t").append(tweetPolarity).append("\t").append(tokenizedTweetContent) .append("\n"); } System.out.println(sb.toString()); }
From source file:eus.ixa.ixa.pipe.convert.TassFormat.java
License:Apache License
public static void generalToWFs(String fileName) { SAXBuilder sax = new SAXBuilder(); XPathFactory xFactory = XPathFactory.instance(); try {//from w w w. j a v a 2s . c om Document doc = sax.build(fileName); XPathExpression<Element> expr = xFactory.compile("//tweet", Filters.element()); List<Element> tweets = expr.evaluate(doc); for (Element tweet : tweets) { String tweetId = tweet.getChildText("tweetid"); KAFDocument kaf = new KAFDocument(LANGUAGE, "v1.naf"); kaf.createPublic().publicId = tweetId; String tweetContentString = tweet.getChildText("content"); List<List<Token>> segmentedSentences = StringUtils.tokenizeSentence(tweetContentString, LANGUAGE); for (List<Token> sentence : segmentedSentences) { for (Token token : sentence) { kaf.newWF(token.startOffset(), token.getTokenValue(), 1); } } Path outfile = Files.createFile(Paths.get(tweetId + ".naf")); Files.write(outfile, kaf.toString().getBytes(StandardCharsets.UTF_8)); System.err.println(">> Wrote naf document to " + outfile); } } catch (JDOMException | IOException e) { e.printStackTrace(); } }
From source file:fciencias.edatos.practica04.Catalogo.java
/** * Metodo que va iniciar Jdom para lectura y escritura de archivos XML * @param xmlOtroNombre elNombre original del archivo. * @throws JDOMException Excepcion de JDOM * @throws IOException Excepcion del sistema. *//* ww w. j a v a 2 s .co m*/ private void useJDOM(String baseDeDatos) throws JDOMException, IOException { SAXBuilder jdomBuilder = new SAXBuilder(); File inputFile = new File(baseDeDatos); Document jdomDocument = jdomBuilder.build(inputFile); Element firstElement = jdomDocument.getRootElement(); //Estos son los profesores for (Element elementProfesor : firstElement.getChildren()) { int[] salario = new int[12]; int c = 0; for (Element elementSalario : elementProfesor.getChildren()) { String salarioRealEnString = elementSalario.getValue(); int salarioRealMes = Integer.parseInt(salarioRealEnString); salario[c] = salarioRealMes; c++; } // System.out.println(Arrays.toString(salario)); String id = elementProfesor.getAttributeValue("id"); int id_int = Integer.parseInt(id); String nombre = elementProfesor.getAttributeValue("nombre"); String carreraString = elementProfesor.getAttributeValue("carrera"); Carrera carrera = Carrera.NO_CARRERA; if (carreraString.equals("MATEMATICAS")) { carrera = Carrera.MATEMATICAS; } else if (carreraString.equals("ACTUARIA")) { carrera = Carrera.ACTUARIA; } else if (carreraString.equals("FISICA")) { carrera = Carrera.FISICA; } else if (carreraString.equals("BIOLOGIA")) { carrera = Carrera.BIOLOGIA; } else if (carreraString.equals("CIENCIAS_DE_LA_COMPUTACION")) { carrera = Carrera.CIENCIAS_DE_LA_COMPUTACION; } else if (carreraString.equals("FISICA_BIOMEDICA")) { carrera = Carrera.FISICA_BIOMEDICA; } else if (carreraString.equals("CIENCIAS_DE_LA_TIERRA")) { carrera = Carrera.CIENCIAS_DE_LA_TIERRA; } String gradoString = elementProfesor.getAttributeValue("grado"); Grado grado = Grado.NO_GRADO; if (gradoString.equals("LICENCIADO")) { grado = Grado.LICENCIADO; } else if (gradoString.equals("MAESTRO")) { grado = Grado.MAESTRO; } else if (gradoString.equals("DOCTOR")) { grado = Grado.DOCTOR; } else if (gradoString.equals("ESPECIALISTA")) { grado = Grado.ESPECIALISTA; } Profesor profestorEnMemoria = null; profestorEnMemoria = new Profesor(id_int, nombre, carrera, grado, salario); @SuppressWarnings("unchecked") T prof = (T) profestorEnMemoria; listaProfesores.agregaFinal(prof); } }
From source file:fi.ni.bcfextractor.BCFExtractorController.java
License:Open Source License
private void openBCFZip(File file) { FileInputStream inputStream = null; try {/* w ww . j a v a 2 s . c o m*/ inputStream = new FileInputStream(file); ZipInputStream zInputStream = new ZipInputStream(inputStream); try { data.clear(); for (ZipEntry entry = zInputStream.getNextEntry(); entry != null; entry = zInputStream .getNextEntry()) { String name = entry.getName(); if (name.endsWith(".bcf")) { BufferedReader inputReader = new BufferedReader(new InputStreamReader(zInputStream)); StringBuilder sb = new StringBuilder(); String inline = ""; while ((inline = inputReader.readLine()) != null) { sb.append(inline); } System.out.println(sb.toString()); SAXBuilder builder = new SAXBuilder(); try { Document doc = (Document) builder .build(new ByteArrayInputStream(sb.toString().getBytes())); listChildren(doc.getRootElement()); } catch (JDOMException e) { e.printStackTrace(); } } } } catch (IOException e) { e.printStackTrace(); } finally { try { zInputStream.close(); } catch (IOException e) { e.printStackTrace(); } } } catch (FileNotFoundException e) { e.printStackTrace(); } finally { try { inputStream.close(); } catch (IOException e) { e.printStackTrace(); } } }
From source file:filter.Filter.java
public Filter(Set<File> files, String nameProject, FilterSystemForm frame) { this.files = files; this.builder = new SAXBuilder(); this.project = new Project(nameProject); this.progressBar = new ProgressBarForm(); this.frame = frame; }
From source file:fourmiz.engine.Engine.java
License:Open Source License
/** * Load a specific game//from w ww.jav a 2s.c o m * @param name * @throws JDOMException * @throws IOException * @throws SlickException */ public void loadLevel(String name) throws JDOMException, IOException, SlickException { currentGame = name; ressources.load(resourcePath + currentGame + renderSuffix); SAXBuilder sax = new SAXBuilder(); Document doc = sax.build(new File(resourcePath + currentGame + mapSuffix)); Element root = doc.getRootElement(); List<Element> listElem = root.getChildren(); for (Element elem : listElem) { switch (elem.getName()) { case "Config": loadConfig(elem.getChildren()); break; case "Entity": loadEntity(elem.getChildren()); break; default: log.warn("loadLevel: unknown type object -> " + elem.getName()); continue; } } this.loaded = true; }
From source file:fr.amap.amapvox.io.tls.rsp.Rsp.java
License:Open Source License
public void read(final File rspFile) throws JDOMException, IOException { sxb = new SAXBuilder(); rxpList = new ArrayList<>(); //avoid loading of dtd file sxb.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); try {/*www . j a v a2 s. c o m*/ document = sxb.build(new FileInputStream(rspFile)); root = document.getRootElement(); projectName = root.getAttributeValue("name"); Element scanPositions = root.getChild("scanpositions"); String folderScanPositions = scanPositions.getAttributeValue("fold"); List<Element> childrens = scanPositions.getChildren("scanposition"); popMatrix = extractMat4D(root.getChild("pop").getChildText("matrix")); //scan id int scanCount = 0; for (Element child : childrens) { Scans rxp = new Scans(); rxp.setName(child.getAttributeValue("name")); rxp.setFold(child.getAttributeValue("fold")); Element registeredElement = child.getChild("registered"); if (registeredElement != null) { if (Integer.valueOf(registeredElement.getText()) == 1) { Element singlescans = child.getChild("singlescans"); String singlescansFold = singlescans.getAttributeValue("fold"); Map<Integer, RxpScan> scanList = new HashMap<>(); List<Element> scans = singlescans.getChildren("scan"); Element sop = child.getChild("sop"); Matrix4d sopMatrix = extractMat4D(sop.getChildText("matrix")); rxp.setSopMatrix(sopMatrix); int compteur = 0; for (Element sc : scans) { RxpScan scan = new RxpScan(); scan.setName(sc.getAttributeValue("name")); scan.setFileName(sc.getChildText("file")); String rspFilePathOnly = rspFile.getAbsolutePath().substring(0, rspFile.getAbsolutePath().lastIndexOf(File.separator)); scan.setAbsolutePath(rspFilePathOnly + File.separator + folderScanPositions + File.separator + rxp.getFold() + File.separator + singlescansFold + File.separator + scan.getFileName()); scanList.put(scanCount, scan); if (scan.getName().contains(".mon")) { rxp.setRxpLiteFile(new File(scan.getAbsolutePath())); rxp.setScanLite(scan); } else { rxp.setScanFull(scan); } scan.setFile(new File(scan.getAbsolutePath())); scan.setSopMatrix(sopMatrix); compteur++; scanCount++; } rxp.setScanList(scanList); rxpList.add(rxp); } else { //logger.info("Scan "+ rxp.getName() +" skipped cause unregistered"); } } } } catch (JDOMException ex) { throw new JDOMException("error parsing or reading rsp: " + rspFile.getAbsolutePath(), ex); } catch (IOException ex) { throw new IOException("error parsing or reading rsp: " + rspFile.getAbsolutePath(), ex); } }