Example usage for javax.xml.stream.events XMLEvent isStartElement

List of usage examples for javax.xml.stream.events XMLEvent isStartElement

Introduction

In this page you can find the example usage for javax.xml.stream.events XMLEvent isStartElement.

Prototype

public boolean isStartElement();

Source Link

Document

A utility function to check if this event is a StartElement.

Usage

From source file:com.aionemu.gameserver.dataholders.loadingutils.XmlMerger.java

/**
 * Read all {@link javax.xml.stream.events.XMLEvent}'s from specified file
 * and write them onto the {@link javax.xml.stream.XMLEventWriter}
 *
 * @param file     File to import/*from  w  ww. j av a 2  s  . c  o  m*/
 * @param skipRoot Skip-root flag
 * @param writer   Destenation writer
 * @throws XMLStreamException    On event reading/writing error.
 * @throws FileNotFoundException if the reading file does not exist, is a
 *                               directory rather than a regular file, or for some other reason cannot be
 *                               opened for reading.
 */
private void importFile(File file, boolean skipRoot, XMLEventWriter writer, Properties metadata)
        throws XMLStreamException, IOException {
    logger.debug("Appending file " + file);
    metadata.setProperty(file.getPath(), makeHash(file));

    XMLEventReader reader = null;

    try {
        reader = inputFactory.createXMLEventReader(new FileReader(file));

        QName firstTagQName = null;

        while (reader.hasNext()) {
            XMLEvent event = reader.nextEvent();

            // skip start and end of document.
            if (event.isStartDocument() || event.isEndDocument()) {
                continue;
            }
            // skip all comments.
            if (event instanceof Comment) {
                continue;
            }
            // skip white-spaces and all ignoreable white-spaces.
            if (event.isCharacters()) {
                if (event.asCharacters().isWhiteSpace() || event.asCharacters().isIgnorableWhiteSpace()) {
                    continue;
                }
            }

            // modify root-tag of imported file.
            if (firstTagQName == null && event.isStartElement()) {
                firstTagQName = event.asStartElement().getName();

                if (skipRoot) {
                    continue;
                } else {
                    StartElement old = event.asStartElement();

                    event = eventFactory.createStartElement(old.getName(), old.getAttributes(), null);
                }
            }

            // if root was skipped - skip root end too.
            if (event.isEndElement() && skipRoot && event.asEndElement().getName().equals(firstTagQName)) {
                continue;
            }

            // finally - write tag
            writer.add(event);
        }
    } finally {
        if (reader != null) {
            try {
                reader.close();
            } catch (Exception ignored) {
            }
        }
    }
}

From source file:com.google.code.activetemplates.impl.TemplateCompilerImpl.java

private void doCompile(String name, CompileContext cc) throws XMLStreamException {

    while (cc.hasNextEvent()) {

        XMLEvent e = cc.nextEvent();

        //Location loc = e.getLocation();

        if (e.isAttribute()) {
            //System.out.println("Adding " + e);

            // attributes added during tag processing and under the same tag
            // get handled here, outcome is always PROCESS_ALL

            Attribute a = (Attribute) e;
            if (h.isAttributeHandled(a.getName())) {
                h.processAttribute(cc, a);
            } else {
                String value = a.getValue();
                String nvalue = processText(cc, value);
                if (nvalue != null) {
                    a = cc.getElementFactory().createAttribute(a.getName(), nvalue);
                }//from  w w  w  .j a v a  2  s .com
                //System.out.println("Adding " + e);
                cc.getWriter().add(a);
            }

        } else if (e.isStartElement()) {

            StartElement se = e.asStartElement();

            Processing processing = Processing.DEFAULT;

            // collect namespaces
            @SuppressWarnings("unchecked")
            Iterator<Namespace> nsit = se.getNamespaces();
            List<Namespace> namespaces = new ArrayList<Namespace>();

            while (nsit.hasNext()) {
                Namespace ns = nsit.next();
                if (excludedNamespaces.contains(ns.getNamespaceURI())) {
                    processing = Processing.REPLACE;
                } else {
                    namespaces.add(ns);
                }
            }

            // collect attributes
            @SuppressWarnings("unchecked")
            Iterator<Attribute> it = se.getAttributes();
            List<Attribute> attributes = new LinkedList<Attribute>();
            while (it.hasNext()) {
                attributes.add(it.next());
            }

            // collect any separate attribute and namespace xml events
            while (cc.hasNextEvent()) {
                if (cc.peekEvent().isNamespace()) {
                    namespaces.add((Namespace) cc.nextEvent());
                    processing = Processing.REPLACE;
                } else if (cc.peekEvent().isAttribute()) {
                    attributes.add((Attribute) cc.nextEvent());
                    processing = Processing.REPLACE;
                } else {
                    break;
                }
            }

            // preprocess attributes
            it = attributes.iterator();
            attributes = new ArrayList<Attribute>();

            while (it.hasNext() && processing != Processing.SKIP) {
                Attribute a = it.next();

                if (h.isAttributeHandled(a.getName())) {
                    processing = Processing.REPLACE;

                    AttributeHandler.Outcome o = h.processAttribute(cc, a);
                    if (o == Outcome.PROCESS_NONE) {
                        processing = Processing.SKIP;
                    }

                } else {
                    String value = a.getValue();
                    String nvalue = processText(cc, value);
                    if (nvalue != null) {
                        a = cc.getElementFactory().createAttribute(a.getName(), nvalue);
                        processing = Processing.REPLACE;
                    }

                    attributes.add(a);
                }
            }

            if (processing == Processing.SKIP) {

                skipChildren(cc, false);

            } else {

                if (processing == Processing.REPLACE) {
                    // replace element with new one
                    se = cc.getElementFactory().createStartElement(se.getName(), attributes.iterator(),
                            namespaces.iterator());
                }

                // handle start element
                if (h.isElementHandled(se.getName())) {
                    ElementHandler.Outcome o = h.processStartElement(cc, se);
                    cc.flushEventQueue();
                    switch (o) {
                    case PROCESS_SIBLINGS:
                        skipChildren(cc, true);
                        break;
                    }
                } else {
                    //System.out.println("Adding " + se);
                    cc.getWriter().add(se);
                    cc.flushEventQueue(); // flush events added by any attribute handlers
                }
            }

        } else if (e.isEndElement()) {

            // handle end element
            if (h.isElementHandled(e.asEndElement().getName())) {
                h.processEndElement(cc, e.asEndElement());
                cc.flushEventQueue();
            } else {
                //System.out.println("Adding " + e);
                cc.getWriter().add(e);
            }

        } else if (e.isCharacters()) {

            // process text
            Characters ce = e.asCharacters();
            String s = ce.getData();
            String ns = processText(cc, s);
            if (ns != null) {
                ce = cc.getElementFactory().createCharacters(ns);
            }
            //System.out.println("Adding " + e);
            cc.getWriter().add(ce);

        }

    }

}

From source file:edu.jhu.hlt.concrete.ingesters.bolt.BoltForumPostIngester.java

@Override
public Communication fromCharacterBasedFile(final Path path) throws IngestException {
    if (!Files.exists(path))
        throw new IngestException("No file at: " + path.toString());

    AnalyticUUIDGeneratorFactory f = new AnalyticUUIDGeneratorFactory();
    AnalyticUUIDGenerator gen = f.create();
    Communication c = new Communication();
    c.setUuid(gen.next());//from  ww  w  .  j  a  v  a  2 s . c o  m
    c.setType(this.getKind());
    c.setMetadata(TooledMetadataConverter.convert(this));

    try {
        ExistingNonDirectoryFile ef = new ExistingNonDirectoryFile(path);
        c.setId(ef.getName().split("\\.")[0]);
    } catch (NoSuchFileException | NotFileException e) {
        // might throw if path is a directory.
        throw new IngestException(path.toString() + " is not a file, or is a directory.");
    }

    String content;
    try (InputStream is = Files.newInputStream(path);
            BufferedInputStream bin = new BufferedInputStream(is, 1024 * 8 * 8);) {
        content = IOUtils.toString(bin, StandardCharsets.UTF_8);
        c.setText(content);
    } catch (IOException e) {
        throw new IngestException(e);
    }

    try (InputStream is = Files.newInputStream(path);
            BufferedInputStream bin = new BufferedInputStream(is, 1024 * 8 * 8);
            BufferedReader reader = new BufferedReader(new InputStreamReader(bin, StandardCharsets.UTF_8));) {
        XMLEventReader rdr = null;
        try {
            rdr = inF.createXMLEventReader(reader);

            // Below method moves the reader
            // to the first post element.
            Section headline = handleHeadline(rdr, content);
            headline.setUuid(gen.next());
            c.addToSectionList(headline);
            int start = headline.getTextSpan().getStart();
            int ending = headline.getTextSpan().getEnding();
            if (ending < start)
                ending = start; // @tongfei: handle empty headlines
            String htxt = c.getText().substring(start, ending);
            LOGGER.debug("headline text: {}", htxt);

            // Section indices.
            int sectNumber = 1;
            int subSect = 0;

            // Move iterator to post start element.
            this.iterateToPosts(rdr);

            // Offset pointer.
            int currOff = -1;

            SectionFactory sf = new SectionFactory(gen);

            // First post element.
            while (rdr.hasNext()) {
                XMLEvent nextEvent = rdr.nextEvent();
                currOff = nextEvent.getLocation().getCharacterOffset();
                if (currOff > 0) {
                    int currOffPlus = currOff + 20;
                    int currOffLess = currOff - 20;
                    LOGGER.debug("Offset: {}", currOff);
                    if (currOffPlus < content.length())
                        LOGGER.debug("Surrounding text: {}", content.substring(currOffLess, currOffPlus));
                }

                // First: see if document is going to end.
                // If yes: exit.
                if (nextEvent.isEndDocument())
                    break;

                // XMLEvent peeker = rdr.peek();

                // Check if start element.
                if (nextEvent.isStartElement()) {
                    StartElement se = nextEvent.asStartElement();
                    QName name = se.getName();
                    final String localName = name.getLocalPart();
                    LOGGER.debug("Hit start element: {}", localName);

                    //region
                    // Add sections for authors and datetimes for each bolt post
                    // by Tongfei Chen
                    Attribute attrAuthor = se.getAttributeByName(QName.valueOf("author"));
                    Attribute attrDateTime = se.getAttributeByName(QName.valueOf("datetime"));

                    if (attrAuthor != null && attrDateTime != null) {

                        int loc = attrAuthor.getLocation().getCharacterOffset();

                        int sectAuthorBeginningOffset = loc + "<post author=\"".length();

                        Section sectAuthor = sf.fromTextSpan(new TextSpan(sectAuthorBeginningOffset,
                                sectAuthorBeginningOffset + attrAuthor.getValue().length()), "author");
                        c.addToSectionList(sectAuthor);

                        int sectDateTimeBeginningOffset = sectAuthorBeginningOffset
                                + attrAuthor.getValue().length() + " datetime=".length();

                        Section sectDateTime = sf.fromTextSpan(
                                new TextSpan(sectDateTimeBeginningOffset,
                                        sectDateTimeBeginningOffset + attrDateTime.getValue().length()),
                                "datetime");
                        c.addToSectionList(sectDateTime);
                    }
                    //endregion

                    // Move past quotes, images, and links.
                    if (localName.equals(QUOTE_LOCAL_NAME)) {
                        this.handleQuote(rdr);
                    } else if (localName.equals(IMG_LOCAL_NAME)) {
                        this.handleImg(rdr);
                    } else if (localName.equals(LINK_LOCAL_NAME)) {
                        this.handleLink(rdr);
                    }

                    // not a start element
                } else if (nextEvent.isCharacters()) {
                    Characters chars = nextEvent.asCharacters();
                    int coff = chars.getLocation().getCharacterOffset();
                    if (!chars.isWhiteSpace()) {
                        // content to be captured
                        String fpContent = chars.getData();
                        LOGGER.debug("Character offset: {}", coff);
                        LOGGER.debug("Character based data: {}", fpContent);
                        // LOGGER.debug("Character data via offset diff: {}", content.substring(coff - fpContent.length(), coff));

                        SimpleImmutableEntry<Integer, Integer> pads = trimSpacing(fpContent);
                        final int tsb = currOff + pads.getKey();
                        final int tse = currOff + fpContent.length() - pads.getValue();
                        final String subs = content.substring(tsb, tse);
                        if (subs.replaceAll("\\p{Zs}", "").replaceAll("\\n", "").isEmpty()) {
                            LOGGER.info("Found empty section: skipping.");
                            continue;
                        }

                        LOGGER.debug("Section text: {}", subs);
                        TextSpan ts = new TextSpan(tsb, tse);

                        Section s = sf.fromTextSpan(ts, "post");
                        List<Integer> intList = new ArrayList<>();
                        intList.add(sectNumber);
                        intList.add(subSect);
                        s.setNumberList(intList);
                        c.addToSectionList(s);

                        subSect++;
                    }
                } else if (nextEvent.isEndElement()) {
                    EndElement ee = nextEvent.asEndElement();
                    currOff = ee.getLocation().getCharacterOffset();
                    QName name = ee.getName();
                    String localName = name.getLocalPart();
                    LOGGER.debug("Hit end element: {}", localName);
                    if (localName.equalsIgnoreCase(POST_LOCAL_NAME)) {
                        sectNumber++;
                        subSect = 0;
                    }
                }
            }
            return c;
        } catch (XMLStreamException | ConcreteException | StringIndexOutOfBoundsException x) {
            throw new IngestException(x);
        } finally {
            if (rdr != null)
                try {
                    rdr.close();
                } catch (XMLStreamException e) {
                    // not likely.
                    LOGGER.info("Error closing XMLReader.", e);
                }
        }
    } catch (IOException e) {
        throw new IngestException(e);
    }
}

From source file:com.streamsets.pipeline.lib.xml.StreamingXmlParser.java

@SuppressWarnings("unchecked")
Field parse(XMLEventReader reader, StartElement startE) throws XMLStreamException, ObjectLengthException {
    Map<String, Field> map = this.useFieldAttributesInsteadOfFields ? new LinkedHashMap<>() : toField(startE);
    Map<String, Field> startEMap = map;
    Map<String, Object> contents = new LinkedHashMap<>();
    boolean maybeText = true;
    while (hasNext(reader) && !peek(reader).isEndElement()) {
        XMLEvent next = read(reader);
        if (next.isCharacters()) {
            // If this set of characters is all whitespace, ignore.
            if (next.asCharacters().isWhiteSpace()) {
                continue;
            } else if (peek(reader).isEndElement() && maybeText) {
                contents.put(VALUE_KEY, Field.create(((Characters) next).getData()));
            } else if (peek(reader).isStartElement()) {
                StartElement subStartE = (StartElement) read(reader);
                Field subField = parse(reader, subStartE);
                addContent(contents, getName(subStartE), subField);
                if (hasNext(reader) && peek(reader).isCharacters()) {
                    read(reader);//from  w  w  w  . ja  v a  2 s.co m
                }
            } else if (maybeText) {
                throw new XMLStreamException(Utils
                        .format("Unexpected XMLEvent '{}', it should be START_ELEMENT or END_ELEMENT", next),
                        next.getLocation());
            }
        } else if (next.isStartElement()) {
            String name = getName((StartElement) next);
            Field field = parse(reader, (StartElement) next);
            addContent(contents, name, field);
        } else {
            throw new XMLStreamException(
                    Utils.format("Unexpected XMLEvent '{}', it should be START_ELEMENT or CHARACTERS", next),
                    next.getLocation());
        }
        maybeText = false;
    }
    if (hasNext(reader)) {
        EndElement endE = (EndElement) read(reader);
        if (!endE.getName().equals(startE.getName())) {
            throw new XMLStreamException(Utils.format("Unexpected EndElement '{}', it should be '{}'",
                    endE.getName().getLocalPart(), startE.getName().getLocalPart()), endE.getLocation());
        }
        for (Map.Entry<String, Object> entry : contents.entrySet()) {
            if (entry.getValue() instanceof Field) {
                startEMap.put(entry.getKey(), (Field) entry.getValue());
            } else {
                startEMap.put(entry.getKey(), Field.create((List<Field>) entry.getValue()));
            }
        }
    }
    final Field field = Field.create(startEMap);

    if (this.useFieldAttributesInsteadOfFields) {
        Iterator attrs = startE.getAttributes();
        while (attrs.hasNext()) {
            Attribute attr = (Attribute) attrs.next();
            field.setAttribute(getName(XMLATTR_ATTRIBUTE_PREFIX, attr), attr.getValue());
        }
        Iterator nss = startE.getNamespaces();
        while (nss.hasNext()) {
            Namespace ns = (Namespace) nss.next();
            field.setAttribute(getName(null, ns), ns.getNamespaceURI());
        }
    }

    lastParsedFieldXpathPrefix = getXpathPrefix();
    return field;
}

From source file:edu.unc.lib.dl.util.TripleStoreQueryServiceMulgaraImpl.java

/**
 * @param query/*from   www.j av a  2  s  .co  m*/
 *            an ITQL command
 * @return the message returned by Mulgara
 * @throws RemoteException
 *             for communication failure
 */
public String storeCommand(String query) {
    String result = null;
    String response = this.sendTQL(query);
    if (response != null) {
        XMLInputFactory factory = XMLInputFactory.newInstance();
        factory.setProperty(XMLInputFactory.IS_COALESCING, Boolean.TRUE);
        try (StringReader sr = new StringReader(response)) {
            XMLEventReader r = factory.createXMLEventReader(sr);
            boolean inMessage = false;
            StringBuffer message = new StringBuffer();
            while (r.hasNext()) {
                XMLEvent e = r.nextEvent();
                if (e.isStartElement()) {
                    StartElement s = e.asStartElement();
                    if ("message".equals(s.getName().getLocalPart())) {
                        inMessage = true;
                    }
                } else if (e.isEndElement()) {
                    EndElement end = e.asEndElement();
                    if ("message".equals(end.getName().getLocalPart())) {
                        inMessage = false;
                    }
                } else if (inMessage && e.isCharacters()) {
                    message.append(e.asCharacters().getData());
                }
            }
            r.close();
            result = message.toString();
        } catch (XMLStreamException e) {
            e.printStackTrace();
        }
    }
    return result;
}

From source file:de.dfki.km.leech.parser.wikipedia.WikipediaDumpParser.java

@Override
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context)
        throws IOException, SAXException, TikaException {

    try {//  w ww .  ja va 2 s  . c o  m

        // wir iterieren schn ber die page-Eintrge. Darin gibt es dann title, timestamp, <contributor> => <username> und text. den text mssen
        // wir noch bereinigen. dazu nehmen wir eine Vorverarbeitung mit bliki - dazu mssen wir aber selbst nochmal den String vorbereiten und
        // nachbereinigen. Leider.

        WikipediaDumpParserConfig wikipediaDumpParserConfig = context.get(WikipediaDumpParserConfig.class);

        if (wikipediaDumpParserConfig == null) {
            Logger.getLogger(WikipediaDumpParser.class.getName())
                    .info("No wikipedia parser config found. Will take the default one.");
            wikipediaDumpParserConfig = new WikipediaDumpParserConfig();
        }

        TikaInputStream tikaStream = TikaInputStream.get(stream);

        File fWikipediaDumpFile4Stream = tikaStream.getFile();

        MultiValueHashMap<String, String> hsPageTitle2Redirects = new MultiValueHashMap<String, String>();
        if (wikipediaDumpParserConfig.determinePageRedirects)
            hsPageTitle2Redirects = getPageTitle2Redirects(new FileInputStream(fWikipediaDumpFile4Stream));

        HashSet<String> hsRedirectPageTitles = new HashSet<String>(hsPageTitle2Redirects.values());

        String strCleanedText = "";
        String strBaseURL = null;

        XMLInputFactory xmlInputFactory = XMLInputFactory.newInstance();
        XMLEventReader xmlEventReader = xmlInputFactory
                .createXMLEventReader(new FileInputStream(fWikipediaDumpFile4Stream), "Utf-8");
        while (xmlEventReader.hasNext()) {

            XMLEvent xmlEvent = xmlEventReader.nextEvent();

            if (xmlEvent.isEndElement() && xmlEvent.asEndElement().getName().getLocalPart().equals("page")) {
                if (metadata.size() == 0)
                    continue;

                // den mimetype wollen wir auch noch in den Metadaten haben
                metadata.add(Metadata.CONTENT_TYPE, "application/wikipedia+xml");

                XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
                xhtml.startDocument();

                xhtml.startElement("p");
                xhtml.characters(strCleanedText.toCharArray(), 0, strCleanedText.length());
                xhtml.endElement("p");

                xhtml.endDocument();

            }

            if (!xmlEvent.isStartElement())
                continue;

            // ##### die siteinfo

            if (strBaseURL == null && xmlEvent.asStartElement().getName().getLocalPart().equals("base")) {
                // http://de.wikipedia.org/wiki/Wikipedia:Hauptseite =>http://de.wikipedia.org/wiki/
                strBaseURL = readNextCharEventsText(xmlEventReader);
                strBaseURL = strBaseURL.substring(0, strBaseURL.lastIndexOf("/") + 1);
            }

            // ##### die page

            if (xmlEvent.asStartElement().getName().getLocalPart().equals("page")) {
                for (String strKey : metadata.names())
                    metadata.remove(strKey);
            }

            // ##### der Title

            if (xmlEvent.asStartElement().getName().getLocalPart().equals("title")) {
                // wir merken uns immer den aktuellen Titel
                String strCurrentTitle = readNextCharEventsText(xmlEventReader);

                if (strCurrentTitle.equalsIgnoreCase("DuckDuckGo")) {
                    int fasd = 8;
                }

                if (strCurrentTitle.toLowerCase().contains("duck")
                        && strCurrentTitle.toLowerCase().contains("go")) {
                    int is = 666;
                }

                // wenn der Titel eine redirect-Page ist, dann tragen wir die ganze Page aus der EventQueue aus, springen an das endPage, und
                // haben somit diese Seite ignoriert. Ferner ignorieren wir auch spezielle wikipedia-Seiten
                String strSmallTitle = strCurrentTitle.trim().toLowerCase();
                if (hsRedirectPageTitles.contains(strCurrentTitle)
                        || hsRedirectPageTitles.contains(strSmallTitle)
                        || hsRedirectPageTitles.contains(strCurrentTitle.trim())
                        || strSmallTitle.startsWith("category:") || strSmallTitle.startsWith("kategorie:")
                        || strSmallTitle.startsWith("vorlage:") || strSmallTitle.startsWith("template:")
                        || strSmallTitle.startsWith("hilfe:") || strSmallTitle.startsWith("help:")
                        || strSmallTitle.startsWith("wikipedia:") || strSmallTitle.startsWith("portal:")
                        || strSmallTitle.startsWith("mediawiki:")) {

                    while (true) {
                        XMLEvent nextXmlEvent = xmlEventReader.nextEvent();
                        if (nextXmlEvent.isEndElement()
                                && nextXmlEvent.asEndElement().getName().getLocalPart().equals("page"))
                            break;
                    }
                } else {
                    metadata.add(Metadata.TITLE, strCurrentTitle);
                    metadata.add(Metadata.SOURCE, strBaseURL + strCurrentTitle);

                    for (String strRedirect : hsPageTitle2Redirects.get(strCurrentTitle)) {
                        // wir ignorieren Titel, die sich lediglich durch gro/kleinschreibung unterscheiden
                        if (!StringUtils.containsIgnoreCase(strRedirect, metadata.getValues(Metadata.TITLE)))
                            metadata.add(Metadata.TITLE, strRedirect);
                    }
                }

                continue;
            }

            // ##### der text
            if (xmlEvent.asStartElement().getName().getLocalPart().equals("text")) {
                String strText = readNextCharEventsText(xmlEventReader);

                if (wikipediaDumpParserConfig.parseLinksAndCategories)
                    parseLinksAndCategories(strText, strBaseURL, metadata, handler);
                if (wikipediaDumpParserConfig.parseInfoBoxes)
                    parseInfoBox(strText, metadata, handler);
                if (wikipediaDumpParserConfig.parseGeoCoordinates)
                    parseGeoCoordinates(strText, metadata);

                // aufgrund einiger Defizite in dem verwendeten cleaner mssen wir hier leider noch zu-und nacharbeiten
                strText = strText.replaceAll("==\n", "==\n\n");
                strText = strText.replaceAll("\n==", "\n\n==");

                strCleanedText = m_wikiModel.render(new PlainTextConverter(), strText);

                strCleanedText = strCleanedText.replaceAll("\\{\\{", " ");
                strCleanedText = strCleanedText.replaceAll("\\}\\}", " ");

                strCleanedText = StringEscapeUtils.unescapeHtml4(strCleanedText);

                continue;
            }

            // ##### der timestamp
            if (xmlEvent.asStartElement().getName().getLocalPart().equals("timestamp")) {
                String strTimestamp = readNextCharEventsText(xmlEventReader);

                metadata.add(Metadata.MODIFIED, strTimestamp);

                continue;
            }

            // ##### der username
            if (xmlEvent.asStartElement().getName().getLocalPart().equals("username")) {
                String strUsername = readNextCharEventsText(xmlEventReader);

                metadata.add(Metadata.CREATOR, strUsername);

                continue;
            }

        }

    } catch (Exception e) {
        Logger.getLogger(WikipediaDumpParser.class.getName()).log(Level.SEVERE, "Error", e);
    }

}

From source file:org.alex73.osm.converters.bel.Convert.java

public static void main(String[] args) throws Exception {
    loadStreetNamesForHouses();//from  w  ww.jav  a 2  s  .  co m

    InputStream in = new BZip2CompressorInputStream(
            new BufferedInputStream(new FileInputStream("tmp/belarus-latest.osm.bz2"), BUFFER_SIZE));

    // create xml event reader for input stream
    XMLEventFactory eventFactory = XMLEventFactory.newInstance();
    XMLEvent newLine = eventFactory.createCharacters("\n");
    XMLInputFactory xif = XMLInputFactory.newInstance();
    XMLOutputFactory xof = XMLOutputFactory.newInstance();
    XMLEventReader reader = xif.createXMLEventReader(in);
    XMLEventWriter wrCyr = xof.createXMLEventWriter(
            new BufferedOutputStream(new FileOutputStream("tmp/belarus-bel.osm"), BUFFER_SIZE));
    XMLEventWriter wrInt = xof.createXMLEventWriter(
            new BufferedOutputStream(new FileOutputStream("tmp/belarus-intl.osm"), BUFFER_SIZE));

    // initialize jaxb
    JAXBContext jaxbCtx = JAXBContext.newInstance(Node.class, Way.class, Relation.class);
    Unmarshaller um = jaxbCtx.createUnmarshaller();
    Marshaller m = jaxbCtx.createMarshaller();
    m.setProperty(Marshaller.JAXB_FRAGMENT, true);
    m.setProperty(Marshaller.JAXB_FORMATTED_OUTPUT, Boolean.TRUE);

    XMLEvent e = null;
    while ((e = reader.peek()) != null) {
        boolean processed = false;
        if (e.isStartElement()) {
            StartElement se = (StartElement) e;
            switch (se.getName().getLocalPart()) {
            case "way":
                Way way = um.unmarshal(reader, Way.class).getValue();
                if (way.getId() == 25439425) {
                    System.out.println();
                }
                fixBel(way.getTag(), "name:be", "name");
                String nameBeHouse = houseStreetBe.get(way.getId());
                if (nameBeHouse != null) {
                    setTag(way.getTag(), "addr:street", nameBeHouse);
                }
                m.marshal(way, wrCyr);
                fixInt(way.getTag());
                m.marshal(way, wrInt);
                wrCyr.add(newLine);
                wrInt.add(newLine);
                processed = true;
                break;
            case "node":
                Node node = um.unmarshal(reader, Node.class).getValue();
                fixBel(node.getTag(), "name:be", "name");
                // fixBel(node.getTag(),"addr:street:be","addr:street");
                m.marshal(node, wrCyr);
                fixInt(node.getTag());
                m.marshal(node, wrInt);
                wrCyr.add(newLine);
                wrInt.add(newLine);
                processed = true;
                break;
            case "relation":
                Relation relation = um.unmarshal(reader, Relation.class).getValue();
                fixBel(relation.getTag(), "name:be", "name");
                // fixBel(relation.getTag(),"addr:street:be","addr:street");
                m.marshal(relation, wrCyr);
                fixInt(relation.getTag());
                m.marshal(relation, wrInt);
                wrCyr.add(newLine);
                wrInt.add(newLine);
                processed = true;
                break;
            }
        }
        if (!processed) {
            wrCyr.add(e);
            wrInt.add(e);
        }
        reader.next();
    }

    wrCyr.flush();
    wrCyr.close();
    wrInt.flush();
    wrInt.close();
    System.out.println("UniqueTranslatedTags: " + uniqueTranslatedTags);
}

From source file:org.apache.hadoop.util.ConfTest.java

private static List<NodeInfo> parseConf(InputStream in) throws XMLStreamException {
    QName configuration = new QName("configuration");
    QName property = new QName("property");

    List<NodeInfo> nodes = new ArrayList<NodeInfo>();
    Stack<NodeInfo> parsed = new Stack<NodeInfo>();

    XMLInputFactory factory = XMLInputFactory.newInstance();
    XMLEventReader reader = factory.createXMLEventReader(in);

    while (reader.hasNext()) {
        XMLEvent event = reader.nextEvent();
        if (event.isStartElement()) {
            StartElement currentElement = event.asStartElement();
            NodeInfo currentNode = new NodeInfo(currentElement);
            if (parsed.isEmpty()) {
                if (!currentElement.getName().equals(configuration)) {
                    return null;
                }/*from w ww.  j  a v  a2 s  .c  o  m*/
            } else {
                NodeInfo parentNode = parsed.peek();
                QName parentName = parentNode.getStartElement().getName();
                if (parentName.equals(configuration)
                        && currentNode.getStartElement().getName().equals(property)) {
                    @SuppressWarnings("unchecked")
                    Iterator<Attribute> it = currentElement.getAttributes();
                    while (it.hasNext()) {
                        currentNode.addAttribute(it.next());
                    }
                } else if (parentName.equals(property)) {
                    parentNode.addElement(currentElement);
                }
            }
            parsed.push(currentNode);
        } else if (event.isEndElement()) {
            NodeInfo node = parsed.pop();
            if (parsed.size() == 1) {
                nodes.add(node);
            }
        } else if (event.isCharacters()) {
            if (2 < parsed.size()) {
                NodeInfo parentNode = parsed.pop();
                StartElement parentElement = parentNode.getStartElement();
                NodeInfo grandparentNode = parsed.peek();
                if (grandparentNode.getElement(parentElement) == null) {
                    grandparentNode.setElement(parentElement, event.asCharacters());
                }
                parsed.push(parentNode);
            }
        }
    }

    return nodes;
}

From source file:org.apache.olingo.client.core.serialization.AtomDeserializer.java

private Object fromPrimitive(final XMLEventReader reader, final StartElement start, final EdmTypeInfo typeInfo)
        throws XMLStreamException, EdmPrimitiveTypeException {

    Object value = null;//  w w  w .  j  a v  a2 s .  c o m

    boolean foundEndProperty = false;
    while (reader.hasNext() && !foundEndProperty) {
        final XMLEvent event = reader.nextEvent();

        if (event.isStartElement() && typeInfo != null && typeInfo.getPrimitiveTypeKind().isGeospatial()) {
            final EdmPrimitiveTypeKind geoType = EdmPrimitiveTypeKind
                    .valueOfFQN(typeInfo.getFullQualifiedName().toString());
            value = geoDeserializer.deserialize(reader, event.asStartElement(), geoType);
        }

        if (event.isCharacters() && !event.asCharacters().isWhiteSpace()
                && (typeInfo == null || !typeInfo.getPrimitiveTypeKind().isGeospatial())) {
            final String stringValue = event.asCharacters().getData();
            if (typeInfo == null) {
                value = stringValue;
            } else {
                final EdmPrimitiveType primitiveType = (EdmPrimitiveType) typeInfo.getType();
                final Class<?> returnType = primitiveType.getDefaultType().isAssignableFrom(Calendar.class)
                        ? Timestamp.class
                        : primitiveType.getDefaultType();
                value = ((EdmPrimitiveType) typeInfo.getType()).valueOfString(stringValue, true, null,
                        Constants.DEFAULT_PRECISION, Constants.DEFAULT_SCALE, true, returnType);
            }
        }

        if (event.isEndElement() && start.getName().equals(event.asEndElement().getName())) {
            foundEndProperty = true;
        }
    }

    return value;
}

From source file:org.apache.olingo.client.core.serialization.AtomDeserializer.java

private Object fromComplexOrEnum(final XMLEventReader reader, final StartElement start)
        throws XMLStreamException, EdmPrimitiveTypeException {

    Object value = null;//from  w ww .ja  v  a2 s  .  c  o m

    boolean foundEndProperty = false;
    while (reader.hasNext() && !foundEndProperty) {
        final XMLEvent event = reader.nextEvent();

        if (event.isStartElement()) {
            if (value == null) {
                value = new ComplexValue();
            }

            if (Constants.QNAME_ATOM_ELEM_LINK.equals(event.asStartElement().getName())) {
                final Link link = new Link();
                final Attribute rel = event.asStartElement()
                        .getAttributeByName(QName.valueOf(Constants.ATTR_REL));
                if (rel != null) {
                    link.setRel(rel.getValue());
                }
                final Attribute title = event.asStartElement()
                        .getAttributeByName(QName.valueOf(Constants.ATTR_TITLE));
                if (title != null) {
                    link.setTitle(title.getValue());
                }
                final Attribute href = event.asStartElement()
                        .getAttributeByName(QName.valueOf(Constants.ATTR_HREF));
                if (href != null) {
                    link.setHref(href.getValue());
                }
                final Attribute type = event.asStartElement()
                        .getAttributeByName(QName.valueOf(Constants.ATTR_TYPE));
                if (type != null) {
                    link.setType(type.getValue());
                }

                if (link.getRel().startsWith(Constants.NS_NAVIGATION_LINK_REL)) {

                    ((ComplexValue) value).getNavigationLinks().add(link);
                    inline(reader, event.asStartElement(), link);
                } else if (link.getRel().startsWith(Constants.NS_ASSOCIATION_LINK_REL)) {

                    ((Valuable) value).asComplex().getAssociationLinks().add(link);
                }
            } else {
                ((ComplexValue) value).getValue().add(property(reader, event.asStartElement()));
            }
        }

        if (event.isCharacters() && !event.asCharacters().isWhiteSpace()) {
            value = event.asCharacters().getData();
        }

        if (event.isEndElement() && start.getName().equals(event.asEndElement().getName())) {
            foundEndProperty = true;
        }
    }

    return value;
}