List of usage examples for javax.xml.stream.events XMLEvent asEndElement
public EndElement asEndElement();
From source file:com.act.lcms.MzMLParser.java
public Iterator<S> getIterator(String inputFile) throws ParserConfigurationException, IOException, XMLStreamException { DocumentBuilderFactory docFactory = mkDocBuilderFactory(); DocumentBuilder docBuilder = docFactory.newDocumentBuilder(); final XMLInputFactory xmlInputFactory = XMLInputFactory.newInstance(); final XMLOutputFactory xmlOutputFactory = XMLOutputFactory.newInstance(); return new Iterator<S>() { boolean inEntry = false; XMLEventReader xr = xmlInputFactory.createXMLEventReader(new FileInputStream(inputFile), "utf-8"); // TODO: is the use of the XML version/encoding tag definitely necessary? StringWriter w = new StringWriter().append(XML_PREAMBLE).append("\n"); XMLEventWriter xw = xmlOutputFactory.createXMLEventWriter(w); S next = null;/*from w ww.j a v a2 s .c o m*/ /* Because we're handling the XML as a stream, we can only determine whether we have another Spectrum to return * by attempting to parse the next one. `this.next()` reads */ private S getNextSpectrum() { S spectrum = null; if (xr == null || !xr.hasNext()) { return null; } try { while (xr.hasNext()) { XMLEvent e = xr.nextEvent(); if (!inEntry && e.isStartElement() && e.asStartElement().getName().getLocalPart().equals((SPECTRUM_OBJECT_TAG))) { xw.add(e); inEntry = true; } else if (e.isEndElement() && e.asEndElement().getName().getLocalPart().equals(SPECTRUM_OBJECT_TAG)) { xw.add(e); xw.flush(); /* TODO: the XMLOutputFactory docs don't make it clear if/how events can be written directly into a new * document structure, so we incur the cost of extracting each spectrum entry, serializing it, and * re-reading it into its own document so it can be handled by XPath. Master this strange corner of the * Java ecosystem and get rid of <></>his doc -> string -> doc conversion. */ Document doc = docBuilder.parse(new ReaderInputStream(new StringReader(w.toString()))); spectrum = handleSpectrumEntry(doc); xw.close(); /* Note: this can also be accomplished with `w.getBuffer().setLength(0);`, but using a new event writer * seems safer. */ w = new StringWriter(); w.append(XML_PREAMBLE).append("\n"); xw = xmlOutputFactory.createXMLEventWriter(w); inEntry = false; // Don't stop parsing if handleSpectrumEntry didn't like this spectrum document. if (spectrum != null) { break; } } else if (inEntry) { // Add this element if we're in an entry xw.add(e); } } // We've reached the end of the document; close the reader to show that we're done. if (!xr.hasNext()) { xr.close(); xr = null; } } catch (Exception e) { // TODO: do better. We seem to run into this sort of thing with Iterators a lot... throw new RuntimeException(e); } return spectrum; } private S tryParseNext() { // Fail the attempt if the reader is closed. if (xr == null || !xr.hasNext()) { return null; } // No checks on whether we already have a spectrum stored: we expect the callers to do that. return getNextSpectrum(); } @Override public boolean hasNext() { // Prime the pump if the iterator doesn't have a value stored yet. if (this.next == null) { this.next = tryParseNext(); } // If we have an entry waiting, return true; otherwise read the next entry and return true if successful. return this.next != null; } @Override public S next() { // Prime the pump like we do in hasNext(). if (this.next == null) { this.next = tryParseNext(); } // Take available spectrum and return it. S res = this.next; /* Advance to the next element immediately, making next() do the heavy lifting most of the time. Otherwise, * the parsing will resume on hasNext(), which seems like it ought to be a light-weight operation. */ this.next = tryParseNext(); return res; } }; }
From source file:edu.unc.lib.dl.services.TripleStoreManagerMulgaraImpl.java
/** * @param query// www . j a v a 2s . c o m * an ITQL command * @return the message returned by Mulgara * @throws RemoteException * for communication failure */ public String storeCommand(String query) { String result = null; String response = this.sendTQL(query); if (response != null) { StringReader sr = new StringReader(response); XMLInputFactory factory = XMLInputFactory.newInstance(); factory.setProperty(XMLInputFactory.IS_COALESCING, Boolean.TRUE); XMLEventReader r = null; try { boolean inMessage = false; StringBuffer message = new StringBuffer(); r = factory.createXMLEventReader(sr); while (r.hasNext()) { XMLEvent e = r.nextEvent(); if (e.isStartElement()) { StartElement s = e.asStartElement(); if ("message".equals(s.getName().getLocalPart())) { inMessage = true; } } else if (e.isEndElement()) { EndElement end = e.asEndElement(); if ("message".equals(end.getName().getLocalPart())) { inMessage = false; } } else if (inMessage && e.isCharacters()) { message.append(e.asCharacters().getData()); } } result = message.toString(); } catch (XMLStreamException e) { e.printStackTrace(); } finally { if (r != null) { try { r.close(); } catch (Exception ignored) { log.error(ignored); } } } sr.close(); } return result; }
From source file:com.aionengine.gameserver.dataholders.loadingutils.XmlMerger.java
/** * This method processes the source file, replacing all of * the 'import' tags by the data from the relevant files. * * @throws XMLStreamException on event writing error. * @throws IOException if the destination file exists but is a directory rather than * a regular file, does not exist but cannot be created, * or cannot be opened for any other reason *///from w ww . ja v a 2 s . c om private void doUpdate() throws XMLStreamException, IOException { XMLEventReader reader = null; XMLEventWriter writer = null; Properties metadata = new Properties(); try { writer = outputFactory.createXMLEventWriter(new BufferedWriter(new FileWriter(destFile, false))); reader = inputFactory.createXMLEventReader(new FileReader(sourceFile)); while (reader.hasNext()) { final XMLEvent xmlEvent = reader.nextEvent(); if (xmlEvent.isStartElement() && isImportQName(xmlEvent.asStartElement().getName())) { processImportElement(xmlEvent.asStartElement(), writer, metadata); continue; } if (xmlEvent.isEndElement() && isImportQName(xmlEvent.asEndElement().getName())) continue; if (xmlEvent instanceof Comment)// skip comments. continue; if (xmlEvent.isCharacters())// skip whitespaces. if (xmlEvent.asCharacters().isWhiteSpace() || xmlEvent.asCharacters().isIgnorableWhiteSpace())// skip whitespaces. continue; writer.add(xmlEvent); if (xmlEvent.isStartDocument()) { writer.add(eventFactory.createComment("\nThis file is machine-generated. DO NOT MODIFY IT!\n")); } } storeFileModifications(metadata, metaDataFile); } finally { if (writer != null) try { writer.close(); } catch (Exception ignored) { } if (reader != null) try { reader.close(); } catch (Exception ignored) { } } }
From source file:edu.jhu.hlt.concrete.ingesters.bolt.BoltForumPostIngester.java
private int handleQuote(final XMLEventReader rdr) throws XMLStreamException { // For quotes, there will be character contents - skip for now... XMLEvent quoteContent = rdr.nextEvent(); if (!quoteContent.isCharacters()) throw new RuntimeException("Characters did not follow quote."); // Skip end of quote. XMLEvent next = rdr.nextEvent(); // Exit loop only when next end quote is hit. boolean hitEndQuoteElement = false; while (!next.isEndElement() && !hitEndQuoteElement) { // Move to next element. next = rdr.nextEvent();// ww w. j a v a 2 s . c o m // If next element is an end element, // see if it's an end quote. // If so, exit the loop. if (next.isEndElement()) hitEndQuoteElement = next.asEndElement().getName().getLocalPart().equals("quote"); } return next.getLocation().getCharacterOffset(); }
From source file:com.aionemu.gameserver.dataholders.loadingutils.XmlMerger.java
/** * This method processes the source file, replacing all of the 'import' tags * by the data from the relevant files./*from w ww . j a v a 2s.co m*/ * * @throws XMLStreamException on event writing error. * @throws IOException if the destination file exists but is a directory * rather than a regular file, does not exist but cannot be created, or * cannot be opened for any other reason */ private void doUpdate() throws XMLStreamException, IOException { XMLEventReader reader = null; XMLEventWriter writer = null; Properties metadata = new Properties(); try { writer = outputFactory.createXMLEventWriter(new BufferedWriter(new FileWriter(destFile, false))); reader = inputFactory.createXMLEventReader(new FileReader(sourceFile)); while (reader.hasNext()) { final XMLEvent xmlEvent = reader.nextEvent(); if (xmlEvent.isStartElement() && isImportQName(xmlEvent.asStartElement().getName())) { processImportElement(xmlEvent.asStartElement(), writer, metadata); continue; } if (xmlEvent.isEndElement() && isImportQName(xmlEvent.asEndElement().getName())) { continue; } if (xmlEvent instanceof Comment)// skip comments. { continue; } if (xmlEvent.isCharacters())// skip whitespaces. { if (xmlEvent.asCharacters().isWhiteSpace() || xmlEvent.asCharacters().isIgnorableWhiteSpace())// skip // whitespaces. { continue; } } writer.add(xmlEvent); if (xmlEvent.isStartDocument()) { writer.add(eventFactory.createComment("\nThis file is machine-generated. DO NOT MODIFY IT!\n")); } } storeFileModifications(metadata, metaDataFile); } finally { if (writer != null) { try { writer.close(); } catch (Exception ignored) { } } if (reader != null) { try { reader.close(); } catch (Exception ignored) { } } } }
From source file:de.dfki.km.leech.parser.wikipedia.WikipediaDumpParser.java
@Override public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { try {// w w w .ja v a 2 s. co m // wir iterieren schn ber die page-Eintrge. Darin gibt es dann title, timestamp, <contributor> => <username> und text. den text mssen // wir noch bereinigen. dazu nehmen wir eine Vorverarbeitung mit bliki - dazu mssen wir aber selbst nochmal den String vorbereiten und // nachbereinigen. Leider. WikipediaDumpParserConfig wikipediaDumpParserConfig = context.get(WikipediaDumpParserConfig.class); if (wikipediaDumpParserConfig == null) { Logger.getLogger(WikipediaDumpParser.class.getName()) .info("No wikipedia parser config found. Will take the default one."); wikipediaDumpParserConfig = new WikipediaDumpParserConfig(); } TikaInputStream tikaStream = TikaInputStream.get(stream); File fWikipediaDumpFile4Stream = tikaStream.getFile(); MultiValueHashMap<String, String> hsPageTitle2Redirects = new MultiValueHashMap<String, String>(); if (wikipediaDumpParserConfig.determinePageRedirects) hsPageTitle2Redirects = getPageTitle2Redirects(new FileInputStream(fWikipediaDumpFile4Stream)); HashSet<String> hsRedirectPageTitles = new HashSet<String>(hsPageTitle2Redirects.values()); String strCleanedText = ""; String strBaseURL = null; XMLInputFactory xmlInputFactory = XMLInputFactory.newInstance(); XMLEventReader xmlEventReader = xmlInputFactory .createXMLEventReader(new FileInputStream(fWikipediaDumpFile4Stream), "Utf-8"); while (xmlEventReader.hasNext()) { XMLEvent xmlEvent = xmlEventReader.nextEvent(); if (xmlEvent.isEndElement() && xmlEvent.asEndElement().getName().getLocalPart().equals("page")) { if (metadata.size() == 0) continue; // den mimetype wollen wir auch noch in den Metadaten haben metadata.add(Metadata.CONTENT_TYPE, "application/wikipedia+xml"); XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); xhtml.startDocument(); xhtml.startElement("p"); xhtml.characters(strCleanedText.toCharArray(), 0, strCleanedText.length()); xhtml.endElement("p"); xhtml.endDocument(); } if (!xmlEvent.isStartElement()) continue; // ##### die siteinfo if (strBaseURL == null && xmlEvent.asStartElement().getName().getLocalPart().equals("base")) { // http://de.wikipedia.org/wiki/Wikipedia:Hauptseite =>http://de.wikipedia.org/wiki/ strBaseURL = readNextCharEventsText(xmlEventReader); strBaseURL = strBaseURL.substring(0, strBaseURL.lastIndexOf("/") + 1); } // ##### die page if (xmlEvent.asStartElement().getName().getLocalPart().equals("page")) { for (String strKey : metadata.names()) metadata.remove(strKey); } // ##### der Title if (xmlEvent.asStartElement().getName().getLocalPart().equals("title")) { // wir merken uns immer den aktuellen Titel String strCurrentTitle = readNextCharEventsText(xmlEventReader); if (strCurrentTitle.equalsIgnoreCase("DuckDuckGo")) { int fasd = 8; } if (strCurrentTitle.toLowerCase().contains("duck") && strCurrentTitle.toLowerCase().contains("go")) { int is = 666; } // wenn der Titel eine redirect-Page ist, dann tragen wir die ganze Page aus der EventQueue aus, springen an das endPage, und // haben somit diese Seite ignoriert. Ferner ignorieren wir auch spezielle wikipedia-Seiten String strSmallTitle = strCurrentTitle.trim().toLowerCase(); if (hsRedirectPageTitles.contains(strCurrentTitle) || hsRedirectPageTitles.contains(strSmallTitle) || hsRedirectPageTitles.contains(strCurrentTitle.trim()) || strSmallTitle.startsWith("category:") || strSmallTitle.startsWith("kategorie:") || strSmallTitle.startsWith("vorlage:") || strSmallTitle.startsWith("template:") || strSmallTitle.startsWith("hilfe:") || strSmallTitle.startsWith("help:") || strSmallTitle.startsWith("wikipedia:") || strSmallTitle.startsWith("portal:") || strSmallTitle.startsWith("mediawiki:")) { while (true) { XMLEvent nextXmlEvent = xmlEventReader.nextEvent(); if (nextXmlEvent.isEndElement() && nextXmlEvent.asEndElement().getName().getLocalPart().equals("page")) break; } } else { metadata.add(Metadata.TITLE, strCurrentTitle); metadata.add(Metadata.SOURCE, strBaseURL + strCurrentTitle); for (String strRedirect : hsPageTitle2Redirects.get(strCurrentTitle)) { // wir ignorieren Titel, die sich lediglich durch gro/kleinschreibung unterscheiden if (!StringUtils.containsIgnoreCase(strRedirect, metadata.getValues(Metadata.TITLE))) metadata.add(Metadata.TITLE, strRedirect); } } continue; } // ##### der text if (xmlEvent.asStartElement().getName().getLocalPart().equals("text")) { String strText = readNextCharEventsText(xmlEventReader); if (wikipediaDumpParserConfig.parseLinksAndCategories) parseLinksAndCategories(strText, strBaseURL, metadata, handler); if (wikipediaDumpParserConfig.parseInfoBoxes) parseInfoBox(strText, metadata, handler); if (wikipediaDumpParserConfig.parseGeoCoordinates) parseGeoCoordinates(strText, metadata); // aufgrund einiger Defizite in dem verwendeten cleaner mssen wir hier leider noch zu-und nacharbeiten strText = strText.replaceAll("==\n", "==\n\n"); strText = strText.replaceAll("\n==", "\n\n=="); strCleanedText = m_wikiModel.render(new PlainTextConverter(), strText); strCleanedText = strCleanedText.replaceAll("\\{\\{", " "); strCleanedText = strCleanedText.replaceAll("\\}\\}", " "); strCleanedText = StringEscapeUtils.unescapeHtml4(strCleanedText); continue; } // ##### der timestamp if (xmlEvent.asStartElement().getName().getLocalPart().equals("timestamp")) { String strTimestamp = readNextCharEventsText(xmlEventReader); metadata.add(Metadata.MODIFIED, strTimestamp); continue; } // ##### der username if (xmlEvent.asStartElement().getName().getLocalPart().equals("username")) { String strUsername = readNextCharEventsText(xmlEventReader); metadata.add(Metadata.CREATOR, strUsername); continue; } } } catch (Exception e) { Logger.getLogger(WikipediaDumpParser.class.getName()).log(Level.SEVERE, "Error", e); } }
From source file:com.joliciel.frenchTreebank.upload.TreebankXmlReader.java
@Override public boolean hasNextSentence() { MONITOR.startTask("hasNextSentence"); try {//from w w w . j ava 2 s . c o m if (sentenceCount > 0 && currentSentenceCount == sentenceCount) return false; if (eventReader == null) this.getNextEventReader(); boolean sentenceClosed = false; while (eventReader != null && !sentenceClosed) { while (eventReader.hasNext() && !sentenceClosed) { XMLEvent xmlEvent; try { xmlEvent = eventReader.nextEvent(); } catch (XMLStreamException e) { LogUtils.logError(LOG, e); throw new RuntimeException(e); } switch (xmlEvent.getEventType()) { case XMLEvent.START_ELEMENT: StartElement startElementEvent = xmlEvent.asStartElement(); this.startElement(startElementEvent); break; case XMLEvent.END_ELEMENT: EndElement endElementEvent = xmlEvent.asEndElement(); sentenceClosed = this.endElement(endElementEvent); break; case XMLEvent.PROCESSING_INSTRUCTION: break; case XMLEvent.CHARACTERS: Characters charactersEvent = xmlEvent.asCharacters(); this.characters(charactersEvent); break; case XMLEvent.COMMENT: break; case XMLEvent.START_DOCUMENT: break; case XMLEvent.END_DOCUMENT: break; case XMLEvent.ENTITY_REFERENCE: break; case XMLEvent.ATTRIBUTE: break; case XMLEvent.DTD: break; case XMLEvent.CDATA: break; case XMLEvent.SPACE: break; } } if (!eventReader.hasNext()) { eventReader = null; this.getNextEventReader(); } if (sentenceNumber != null && sentenceNumber.length() > 0 && sentenceClosed) { if (!sentenceNumber.equals(sentence.getSentenceNumber())) { sentenceClosed = false; sentence = null; } } } return sentenceClosed; } finally { MONITOR.endTask("hasNextSentence"); } }
From source file:org.eclipse.swordfish.core.configuration.xml.XmlToPropertiesTransformerImpl.java
public void loadConfiguration(URL path) { Assert.notNull(path);/*ww w.j a v a 2 s. co m*/ InputStream inputStream = null; try { inputStream = path.openStream(); XMLInputFactory inputFactory = XMLInputFactory.newInstance(); LinkedList<String> currentElements = new LinkedList<String>(); XMLEventReader eventReader = inputFactory.createXMLEventReader(inputStream); Map<String, List<String>> props = new HashMap<String, List<String>>(); // Read the XML document while (eventReader.hasNext()) { XMLEvent event = eventReader.nextEvent(); if (event.isCharacters() && !event.asCharacters().isWhiteSpace()) { putElement(props, getQualifiedName(currentElements), event.asCharacters().getData()); } else if (event.isStartElement()) { currentElements.add(event.asStartElement().getName().getLocalPart()); for (Iterator attrIt = event.asStartElement().getAttributes(); attrIt.hasNext();) { Attribute attribute = (Attribute) attrIt.next(); putElement(props, getQualifiedName(currentElements) + "[@" + attribute.getName() + "]", attribute.getValue()); } } else if (event.isAttribute()) { } else if (event.isEndElement()) { String lastElem = event.asEndElement().getName().getLocalPart(); if (!currentElements.getLast().equals(lastElem)) { throw new UnsupportedOperationException(lastElem + "," + currentElements.getLast()); } currentElements.removeLast(); } } properties = flattenProperties(props); } catch (Exception ex) { throw new SwordfishException(ex); } finally { if (inputStream != null) { try { inputStream.close(); } catch (IOException ex) { } } } }
From source file:com.msopentech.odatajclient.testservice.utils.XMLUtilities.java
private InputStream writeFromStartToEndElement(final StartElement element, final XMLEventReader reader, final boolean document) throws XMLStreamException { final ByteArrayOutputStream bos = new ByteArrayOutputStream(); final XMLOutputFactory xof = XMLOutputFactory.newInstance(); final XMLEventWriter writer = xof.createXMLEventWriter(bos); final QName name = element.getName(); if (document) { final XMLEventFactory eventFactory = XMLEventFactory.newInstance(); writer.add(eventFactory.createStartDocument("UTF-8", "1.0")); writer.add(element);/*from w w w .j a v a2 s . c om*/ if (element.getAttributeByName(new QName(ATOM_DATASERVICE_NS)) == null) { writer.add(eventFactory.createNamespace(ATOM_PROPERTY_PREFIX.substring(0, 1), DATASERVICES_NS)); } if (element.getAttributeByName(new QName(ATOM_METADATA_NS)) == null) { writer.add(eventFactory.createNamespace(ATOM_METADATA_PREFIX.substring(0, 1), METADATA_NS)); } } else { writer.add(element); } XMLEvent event = element; while (reader.hasNext() && !(event.isEndElement() && name.equals(event.asEndElement().getName()))) { event = reader.nextEvent(); writer.add(event); } writer.flush(); writer.close(); return new ByteArrayInputStream(bos.toByteArray()); }
From source file:fr.dutra.confluence2wordpress.core.sync.DefaultAttachmentsSynchronizer.java
private Set<Attachment> parseForAttachments(ContentEntityObject page) throws SynchronizationException { Set<Attachment> attachments = new HashSet<Attachment>(); try {//from ww w .j a va 2 s.c o m XMLEventReader r = StaxUtils.getReader(page); String fileName = null; String pageTitle = null; String spaceKey = null; try { while (r.hasNext()) { XMLEvent e = r.nextEvent(); if (e.isStartElement()) { StartElement startElement = e.asStartElement(); QName name = startElement.getName(); if (name.equals(ATTACHMENT_QNAME)) { Attribute att = startElement.getAttributeByName(FILENAME_QNAME); if (att != null) { fileName = att.getValue(); } } else if (name.equals(PAGE_QNAME)) { Attribute title = startElement.getAttributeByName(TITLE_QNAME); if (title != null) { pageTitle = title.getValue(); } Attribute space = startElement.getAttributeByName(SPACE_QNAME); if (space != null) { spaceKey = space.getValue(); } } } else if (e.isEndElement()) { EndElement endElement = e.asEndElement(); if (endElement.getName().equals(ATTACHMENT_QNAME)) { ContentEntityObject attachmentPage; if (pageTitle == null) { attachmentPage = page; } else { attachmentPage = pageManager.getPage(spaceKey, pageTitle); } Attachment attachment = attachmentManager.getAttachment(attachmentPage, fileName); attachments.add(attachment); fileName = null; pageTitle = null; spaceKey = null; } } } } finally { r.close(); } } catch (XMLStreamException e) { throw new SynchronizationException("Cannot read page: " + page.getTitle(), e); } return attachments; }