Example usage for javax.xml.stream.events XMLEvent isStartElement

Introduction

In this page you can find the example usage for javax.xml.stream.events XMLEvent isStartElement.

Prototype

public boolean isStartElement();

Source Link

Document

A utility function to check if this event is a StartElement.

Usage

From source file:com.aionemu.gameserver.dataholders.loadingutils.XmlMerger.java

/**
 * Read all {@link javax.xml.stream.events.XMLEvent}'s from specified file
 * and write them onto the {@link javax.xml.stream.XMLEventWriter}
 *
 * @param file     File to import/*from  w  ww. j av a 2  s  . c  o  m*/
 * @param skipRoot Skip-root flag
 * @param writer   Destenation writer
 * @throws XMLStreamException    On event reading/writing error.
 * @throws FileNotFoundException if the reading file does not exist, is a
 *                               directory rather than a regular file, or for some other reason cannot be
 *                               opened for reading.
 */
private void importFile(File file, boolean skipRoot, XMLEventWriter writer, Properties metadata)
        throws XMLStreamException, IOException {
    logger.debug("Appending file " + file);
    metadata.setProperty(file.getPath(), makeHash(file));

    XMLEventReader reader = null;

    try {
        reader = inputFactory.createXMLEventReader(new FileReader(file));

        QName firstTagQName = null;

        while (reader.hasNext()) {
            XMLEvent event = reader.nextEvent();

            // skip start and end of document.
            if (event.isStartDocument() || event.isEndDocument()) {
                continue;
            }
            // skip all comments.
            if (event instanceof Comment) {
                continue;
            }
            // skip white-spaces and all ignoreable white-spaces.
            if (event.isCharacters()) {
                if (event.asCharacters().isWhiteSpace() || event.asCharacters().isIgnorableWhiteSpace()) {
                    continue;
                }
            }

            // modify root-tag of imported file.
            if (firstTagQName == null && event.isStartElement()) {
                firstTagQName = event.asStartElement().getName();

                if (skipRoot) {
                    continue;
                } else {
                    StartElement old = event.asStartElement();

                    event = eventFactory.createStartElement(old.getName(), old.getAttributes(), null);
                }
            }

            // if root was skipped - skip root end too.
            if (event.isEndElement() && skipRoot && event.asEndElement().getName().equals(firstTagQName)) {
                continue;
            }

            // finally - write tag
            writer.add(event);
        }
    } finally {
        if (reader != null) {
            try {
                reader.close();
            } catch (Exception ignored) {
            }
        }
    }
}

From source file:com.google.code.activetemplates.impl.TemplateCompilerImpl.java

private void doCompile(String name, CompileContext cc) throws XMLStreamException {

    while (cc.hasNextEvent()) {

        XMLEvent e = cc.nextEvent();

        //Location loc = e.getLocation();

        if (e.isAttribute()) {
            //System.out.println("Adding " + e);

            // attributes added during tag processing and under the same tag
            // get handled here, outcome is always PROCESS_ALL

            Attribute a = (Attribute) e;
            if (h.isAttributeHandled(a.getName())) {
                h.processAttribute(cc, a);
            } else {
                String value = a.getValue();
                String nvalue = processText(cc, value);
                if (nvalue != null) {
                    a = cc.getElementFactory().createAttribute(a.getName(), nvalue);
                }//from  w w  w  .j a v a  2  s .com
                //System.out.println("Adding " + e);
                cc.getWriter().add(a);
            }

        } else if (e.isStartElement()) {

            StartElement se = e.asStartElement();

            Processing processing = Processing.DEFAULT;

            // collect namespaces
            @SuppressWarnings("unchecked")
            Iterator<Namespace> nsit = se.getNamespaces();
            List<Namespace> namespaces = new ArrayList<Namespace>();

            while (nsit.hasNext()) {
                Namespace ns = nsit.next();
                if (excludedNamespaces.contains(ns.getNamespaceURI())) {
                    processing = Processing.REPLACE;
                } else {
                    namespaces.add(ns);
                }
            }

            // collect attributes
            @SuppressWarnings("unchecked")
            Iterator<Attribute> it = se.getAttributes();
            List<Attribute> attributes = new LinkedList<Attribute>();
            while (it.hasNext()) {
                attributes.add(it.next());
            }

            // collect any separate attribute and namespace xml events
            while (cc.hasNextEvent()) {
                if (cc.peekEvent().isNamespace()) {
                    namespaces.add((Namespace) cc.nextEvent());
                    processing = Processing.REPLACE;
                } else if (cc.peekEvent().isAttribute()) {
                    attributes.add((Attribute) cc.nextEvent());
                    processing = Processing.REPLACE;
                } else {
                    break;
                }
            }

            // preprocess attributes
            it = attributes.iterator();
            attributes = new ArrayList<Attribute>();

            while (it.hasNext() && processing != Processing.SKIP) {
                Attribute a = it.next();

                if (h.isAttributeHandled(a.getName())) {
                    processing = Processing.REPLACE;

                    AttributeHandler.Outcome o = h.processAttribute(cc, a);
                    if (o == Outcome.PROCESS_NONE) {
                        processing = Processing.SKIP;
                    }

                } else {
                    String value = a.getValue();
                    String nvalue = processText(cc, value);
                    if (nvalue != null) {
                        a = cc.getElementFactory().createAttribute(a.getName(), nvalue);
                        processing = Processing.REPLACE;
                    }

                    attributes.add(a);
                }
            }

            if (processing == Processing.SKIP) {

                skipChildren(cc, false);

            } else {

                if (processing == Processing.REPLACE) {
                    // replace element with new one
                    se = cc.getElementFactory().createStartElement(se.getName(), attributes.iterator(),
                            namespaces.iterator());
                }

                // handle start element
                if (h.isElementHandled(se.getName())) {
                    ElementHandler.Outcome o = h.processStartElement(cc, se);
                    cc.flushEventQueue();
                    switch (o) {
                    case PROCESS_SIBLINGS:
                        skipChildren(cc, true);
                        break;
                    }
                } else {
                    //System.out.println("Adding " + se);
                    cc.getWriter().add(se);
                    cc.flushEventQueue(); // flush events added by any attribute handlers
                }
            }

        } else if (e.isEndElement()) {

            // handle end element
            if (h.isElementHandled(e.asEndElement().getName())) {
                h.processEndElement(cc, e.asEndElement());
                cc.flushEventQueue();
            } else {
                //System.out.println("Adding " + e);
                cc.getWriter().add(e);
            }

        } else if (e.isCharacters()) {

            // process text
            Characters ce = e.asCharacters();
            String s = ce.getData();
            String ns = processText(cc, s);
            if (ns != null) {
                ce = cc.getElementFactory().createCharacters(ns);
            }
            //System.out.println("Adding " + e);
            cc.getWriter().add(ce);

        }

    }

}

From source file:edu.jhu.hlt.concrete.ingesters.bolt.BoltForumPostIngester.java

@Override
public Communication fromCharacterBasedFile(final Path path) throws IngestException {
    if (!Files.exists(path))
        throw new IngestException("No file at: " + path.toString());

    AnalyticUUIDGeneratorFactory f = new AnalyticUUIDGeneratorFactory();
    AnalyticUUIDGenerator gen = f.create();
    Communication c = new Communication();
    c.setUuid(gen.next());//from  ww  w  .  j  a  v  a  2 s . c o  m
    c.setType(this.getKind());
    c.setMetadata(TooledMetadataConverter.convert(this));

    try {
        ExistingNonDirectoryFile ef = new ExistingNonDirectoryFile(path);
        c.setId(ef.getName().split("\\.")[0]);
    } catch (NoSuchFileException | NotFileException e) {
        // might throw if path is a directory.
        throw new IngestException(path.toString() + " is not a file, or is a directory.");
    }

    String content;
    try (InputStream is = Files.newInputStream(path);
            BufferedInputStream bin = new BufferedInputStream(is, 1024 * 8 * 8);) {
        content = IOUtils.toString(bin, StandardCharsets.UTF_8);
        c.setText(content);
    } catch (IOException e) {
        throw new IngestException(e);
    }

    try (InputStream is = Files.newInputStream(path);
            BufferedInputStream bin = new BufferedInputStream(is, 1024 * 8 * 8);
            BufferedReader reader = new BufferedReader(new InputStreamReader(bin, StandardCharsets.UTF_8));) {
        XMLEventReader rdr = null;
        try {
            rdr = inF.createXMLEventReader(reader);

            // Below method moves the reader
            // to the first post element.
            Section headline = handleHeadline(rdr, content);
            headline.setUuid(gen.next());
            c.addToSectionList(headline);
            int start = headline.getTextSpan().getStart();
            int ending = headline.getTextSpan().getEnding();
            if (ending < start)
                ending = start; // @tongfei: handle empty headlines
            String htxt = c.getText().substring(start, ending);
            LOGGER.debug("headline text: {}", htxt);

            // Section indices.
            int sectNumber = 1;
            int subSect = 0;

            // Move iterator to post start element.
            this.iterateToPosts(rdr);

            // Offset pointer.
            int currOff = -1;

            SectionFactory sf = new SectionFactory(gen);

            // First post element.
            while (rdr.hasNext()) {
                XMLEvent nextEvent = rdr.nextEvent();
                currOff = nextEvent.getLocation().getCharacterOffset();
                if (currOff > 0) {
                    int currOffPlus = currOff + 20;
                    int currOffLess = currOff - 20;
                    LOGGER.debug("Offset: {}", currOff);
                    if (currOffPlus < content.length())
                        LOGGER.debug("Surrounding text: {}", content.substring(currOffLess, currOffPlus));
                }

                // First: see if document is going to end.
                // If yes: exit.
                if (nextEvent.isEndDocument())
                    break;

                // XMLEvent peeker = rdr.peek();

                // Check if start element.
                if (nextEvent.isStartElement()) {
                    StartElement se = nextEvent.asStartElement();
                    QName name = se.getName();
                    final String localName = name.getLocalPart();
                    LOGGER.debug("Hit start element: {}", localName);

                    //region
                    // Add sections for authors and datetimes for each bolt post
                    // by Tongfei Chen
                    Attribute attrAuthor = se.getAttributeByName(QName.valueOf("author"));
                    Attribute attrDateTime = se.getAttributeByName(QName.valueOf("datetime"));

                    if (attrAuthor != null && attrDateTime != null) {

                        int loc = attrAuthor.getLocation().getCharacterOffset();

                        int sectAuthorBeginningOffset = loc + "<post author=\"".length();

                        Section sectAuthor = sf.fromTextSpan(new TextSpan(sectAuthorBeginningOffset,
                                sectAuthorBeginningOffset + attrAuthor.getValue().length()), "author");
                        c.addToSectionList(sectAuthor);

                        int sectDateTimeBeginningOffset = sectAuthorBeginningOffset
                                + attrAuthor.getValue().length() + " datetime=".length();

                        Section sectDateTime = sf.fromTextSpan(
                                new TextSpan(sectDateTimeBeginningOffset,
                                        sectDateTimeBeginningOffset + attrDateTime.getValue().length()),
                                "datetime");
                        c.addToSectionList(sectDateTime);
                    }
                    //endregion

                    // Move past quotes, images, and links.
                    if (localName.equals(QUOTE_LOCAL_NAME)) {
                        this.handleQuote(rdr);
                    } else if (localName.equals(IMG_LOCAL_NAME)) {
                        this.handleImg(rdr);
                    } else if (localName.equals(LINK_LOCAL_NAME)) {
                        this.handleLink(rdr);
                    }

                    // not a start element
                } else if (nextEvent.isCharacters()) {
                    Characters chars = nextEvent.asCharacters();
                    int coff = chars.getLocation().getCharacterOffset();
                    if (!chars.isWhiteSpace()) {
                        // content to be captured
                        String fpContent = chars.getData();
                        LOGGER.debug("Character offset: {}", coff);
                        LOGGER.debug("Character based data: {}", fpContent);
                        // LOGGER.debug("Character data via offset diff: {}", content.substring(coff - fpContent.length(), coff));

                        SimpleImmutableEntry<Integer, Integer> pads = trimSpacing(fpContent);
                        final int tsb = currOff + pads.getKey();
                        final int tse = currOff + fpContent.length() - pads.getValue();
                        final String subs = content.substring(tsb, tse);
                        if (subs.replaceAll("\\p{Zs}", "").replaceAll("\\n", "").isEmpty()) {
                            LOGGER.info("Found empty section: skipping.");
                            continue;
                        }

                        LOGGER.debug("Section text: {}", subs);
                        TextSpan ts = new TextSpan(tsb, tse);

                        Section s = sf.fromTextSpan(ts, "post");
                        List<Integer> intList = new ArrayList<>();
                        intList.add(sectNumber);
                        intList.add(subSect);
                        s.setNumberList(intList);
                        c.addToSectionList(s);

                        subSect++;
                    }
                } else if (nextEvent.isEndElement()) {
                    EndElement ee = nextEvent.asEndElement();
                    currOff = ee.getLocation().getCharacterOffset();
                    QName name = ee.getName();
                    String localName = name.getLocalPart();
                    LOGGER.debug("Hit end element: {}", localName);
                    if (localName.equalsIgnoreCase(POST_LOCAL_NAME)) {
                        sectNumber++;
                        subSect = 0;
                    }
                }
            }
            return c;
        } catch (XMLStreamException | ConcreteException | StringIndexOutOfBoundsException x) {
            throw new IngestException(x);
        } finally {
            if (rdr != null)
                try {
                    rdr.close();
                } catch (XMLStreamException e) {
                    // not likely.
                    LOGGER.info("Error closing XMLReader.", e);
                }
        }
    } catch (IOException e) {
        throw new IngestException(e);
    }
}

From source file:com.streamsets.pipeline.lib.xml.StreamingXmlParser.java

@SuppressWarnings("unchecked")
Field parse(XMLEventReader reader, StartElement startE) throws XMLStreamException, ObjectLengthException {
    Map<String, Field> map = this.useFieldAttributesInsteadOfFields ? new LinkedHashMap<>() : toField(startE);
    Map<String, Field> startEMap = map;
    Map<String, Object> contents = new LinkedHashMap<>();
    boolean maybeText = true;
    while (hasNext(reader) && !peek(reader).isEndElement()) {
        XMLEvent next = read(reader);
        if (next.isCharacters()) {
            // If this set of characters is all whitespace, ignore.
            if (next.asCharacters().isWhiteSpace()) {
                continue;
            } else if (peek(reader).isEndElement() && maybeText) {
                contents.put(VALUE_KEY, Field.create(((Characters) next).getData()));
            } else if (peek(reader).isStartElement()) {
                StartElement subStartE = (StartElement) read(reader);
                Field subField = parse(reader, subStartE);
                addContent(contents, getName(subStartE), subField);
                if (hasNext(reader) && peek(reader).isCharacters()) {
                    read(reader);//from  w  w  w  . ja  v a  2 s.co m
                }
            } else if (maybeText) {
                throw new XMLStreamException(Utils
                        .format("Unexpected XMLEvent '{}', it should be START_ELEMENT or END_ELEMENT", next),
                        next.getLocation());
            }
        } else if (next.isStartElement()) {
            String name = getName((StartElement) next);
            Field field = parse(reader, (StartElement) next);
            addContent(contents, name, field);
        } else {
            throw new XMLStreamException(
                    Utils.format("Unexpected XMLEvent '{}', it should be START_ELEMENT or CHARACTERS", next),
                    next.getLocation());
        }
        maybeText = false;
    }
    if (hasNext(reader)) {
        EndElement endE = (EndElement) read(reader);
        if (!endE.getName().equals(startE.getName())) {
            throw new XMLStreamException(Utils.format("Unexpected EndElement '{}', it should be '{}'",
                    endE.getName().getLocalPart(), startE.getName().getLocalPart()), endE.getLocation());
        }
        for (Map.Entry<String, Object> entry : contents.entrySet()) {
            if (entry.getValue() instanceof Field) {
                startEMap.put(entry.getKey(), (Field) entry.getValue());
            } else {
                startEMap.put(entry.getKey(), Field.create((List<Field>) entry.getValue()));
            }
        }
    }
    final Field field = Field.create(startEMap);

    if (this.useFieldAttributesInsteadOfFields) {
        Iterator attrs = startE.getAttributes();
        while (attrs.hasNext()) {
            Attribute attr = (Attribute) attrs.next();
            field.setAttribute(getName(XMLATTR_ATTRIBUTE_PREFIX, attr), attr.getValue());
        }
        Iterator nss = startE.getNamespaces();
        while (nss.hasNext()) {
            Namespace ns = (Namespace) nss.next();
            field.setAttribute(getName(null, ns), ns.getNamespaceURI());
        }
    }

    lastParsedFieldXpathPrefix = getXpathPrefix();
    return field;
}

From source file:edu.unc.lib.dl.util.TripleStoreQueryServiceMulgaraImpl.java

/**
 * @param query/*from   www.j av a  2  s  .co  m*/
 *            an ITQL command
 * @return the message returned by Mulgara
 * @throws RemoteException
 *             for communication failure
 */
public String storeCommand(String query) {
    String result = null;
    String response = this.sendTQL(query);
    if (response != null) {
        XMLInputFactory factory = XMLInputFactory.newInstance();
        factory.setProperty(XMLInputFactory.IS_COALESCING, Boolean.TRUE);
        try (StringReader sr = new StringReader(response)) {
            XMLEventReader r = factory.createXMLEventReader(sr);
            boolean inMessage = false;
            StringBuffer message = new StringBuffer();
            while (r.hasNext()) {
                XMLEvent e = r.nextEvent();
                if (e.isStartElement()) {
                    StartElement s = e.asStartElement();
                    if ("message".equals(s.getName().getLocalPart())) {
                        inMessage = true;
                    }
                } else if (e.isEndElement()) {
                    EndElement end = e.asEndElement();
                    if ("message".equals(end.getName().getLocalPart())) {
                        inMessage = false;
                    }
                } else if (inMessage && e.isCharacters()) {
                    message.append(e.asCharacters().getData());
                }
            }
            r.close();
            result = message.toString();
        } catch (XMLStreamException e) {
            e.printStackTrace();
        }
    }
    return result;
}

From source file:de.dfki.km.leech.parser.wikipedia.WikipediaDumpParser.java

@Override
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context)
        throws IOException, SAXException, TikaException {

    try {//  w ww .  ja va 2 s  . c o  m

        // wir iterieren schn ber die page-Eintrge. Darin gibt es dann title, timestamp, <contributor> => <username> und text. den text mssen
        // wir noch bereinigen. dazu nehmen wir eine Vorverarbeitung mit bliki - dazu mssen wir aber selbst nochmal den String vorbereiten und
        // nachbereinigen. Leider.

        WikipediaDumpParserConfig wikipediaDumpParserConfig = context.get(WikipediaDumpParserConfig.class);

        if (wikipediaDumpParserConfig == null) {
            Logger.getLogger(WikipediaDumpParser.class.getName())
                    .info("No wikipedia parser config found. Will take the default one.");
            wikipediaDumpParserConfig = new WikipediaDumpParserConfig();
        }

        TikaInputStream tikaStream = TikaInputStream.get(stream);

        File fWikipediaDumpFile4Stream = tikaStream.getFile();

        MultiValueHashMap<String, String> hsPageTitle2Redirects = new MultiValueHashMap<String, String>();
        if (wikipediaDumpParserConfig.determinePageRedirects)
            hsPageTitle2Redirects = getPageTitle2Redirects(new FileInputStream(fWikipediaDumpFile4Stream));

        HashSet<String> hsRedirectPageTitles = new HashSet<String>(hsPageTitle2Redirects.values());

        String strCleanedText = "";
        String strBaseURL = null;

        XMLInputFactory xmlInputFactory = XMLInputFactory.newInstance();
        XMLEventReader xmlEventReader = xmlInputFactory
                .createXMLEventReader(new FileInputStream(fWikipediaDumpFile4Stream), "Utf-8");
        while (xmlEventReader.hasNext()) {

            XMLEvent xmlEvent = xmlEventReader.nextEvent();

            if (xmlEvent.isEndElement() && xmlEvent.asEndElement().getName().getLocalPart().equals("page")) {
                if (metadata.size() == 0)
                    continue;

                // den mimetype wollen wir auch noch in den Metadaten haben
                metadata.add(Metadata.CONTENT_TYPE, "application/wikipedia+xml");

                XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
                xhtml.startDocument();

                xhtml.startElement("p");
                xhtml.characters(strCleanedText.toCharArray(), 0, strCleanedText.length());
                xhtml.endElement("p");

                xhtml.endDocument();

            }

            if (!xmlEvent.isStartElement())
                continue;

            // ##### die siteinfo

            if (strBaseURL == null && xmlEvent.asStartElement().getName().getLocalPart().equals("base")) {
                // http://de.wikipedia.org/wiki/Wikipedia:Hauptseite =>http://de.wikipedia.org/wiki/
                strBaseURL = readNextCharEventsText(xmlEventReader);
                strBaseURL = strBaseURL.substring(0, strBaseURL.lastIndexOf("/") + 1);
            }

            // ##### die page

            if (xmlEvent.asStartElement().getName().getLocalPart().equals("page")) {
                for (String strKey : metadata.names())
                    metadata.remove(strKey);
            }

            // ##### der Title

            if (xmlEvent.asStartElement().getName().getLocalPart().equals("title")) {
                // wir merken uns immer den aktuellen Titel
                String strCurrentTitle = readNextCharEventsText(xmlEventReader);

                if (strCurrentTitle.equalsIgnoreCase("DuckDuckGo")) {
                    int fasd = 8;
                }

                if (strCurrentTitle.toLowerCase().contains("duck")
                        && strCurrentTitle.toLowerCase().contains("go")) {
                    int is = 666;
                }

                // wenn der Titel eine redirect-Page ist, dann tragen wir die ganze Page aus der EventQueue aus, springen an das endPage, und
                // haben somit diese Seite ignoriert. Ferner ignorieren wir auch spezielle wikipedia-Seiten
                String strSmallTitle = strCurrentTitle.trim().toLowerCase();
                if (hsRedirectPageTitles.contains(strCurrentTitle)
                        || hsRedirectPageTitles.contains(strSmallTitle)
                        || hsRedirectPageTitles.contains(strCurrentTitle.trim())
                        || strSmallTitle.startsWith("category:") || strSmallTitle.startsWith("kategorie:")
                        || strSmallTitle.startsWith("vorlage:") || strSmallTitle.startsWith("template:")
                        || strSmallTitle.startsWith("hilfe:") || strSmallTitle.startsWith("help:")
                        || strSmallTitle.startsWith("wikipedia:") || strSmallTitle.startsWith("portal:")
                        || strSmallTitle.startsWith("mediawiki:")) {

                    while (true) {
                        XMLEvent nextXmlEvent = xmlEventReader.nextEvent();
                        if (nextXmlEvent.isEndElement()
                                && nextXmlEvent.asEndElement().getName().getLocalPart().equals("page"))
                            break;
                    }
                } else {
                    metadata.add(Metadata.TITLE, strCurrentTitle);
                    metadata.add(Metadata.SOURCE, strBaseURL + strCurrentTitle);

                    for (String strRedirect : hsPageTitle2Redirects.get(strCurrentTitle)) {
                        // wir ignorieren Titel, die sich lediglich durch gro/kleinschreibung unterscheiden
                        if (!StringUtils.containsIgnoreCase(strRedirect, metadata.getValues(Metadata.TITLE)))
                            metadata.add(Metadata.TITLE, strRedirect);
                    }
                }

                continue;
            }

            // ##### der text
            if (xmlEvent.asStartElement().getName().getLocalPart().equals("text")) {
                String strText = readNextCharEventsText(xmlEventReader);

                if (wikipediaDumpParserConfig.parseLinksAndCategories)
                    parseLinksAndCategories(strText, strBaseURL, metadata, handler);
                if (wikipediaDumpParserConfig.parseInfoBoxes)
                    parseInfoBox(strText, metadata, handler);
                if (wikipediaDumpParserConfig.parseGeoCoordinates)
                    parseGeoCoordinates(strText, metadata);

                // aufgrund einiger Defizite in dem verwendeten cleaner mssen wir hier leider noch zu-und nacharbeiten
                strText = strText.replaceAll("==\n", "==\n\n");
                strText = strText.replaceAll("\n==", "\n\n==");

                strCleanedText = m_wikiModel.render(new PlainTextConverter(), strText);

                strCleanedText = strCleanedText.replaceAll("\\{\\{", " ");
                strCleanedText = strCleanedText.replaceAll("\\}\\}", " ");

                strCleanedText = StringEscapeUtils.unescapeHtml4(strCleanedText);

                continue;
            }

            // ##### der timestamp
            if (xmlEvent.asStartElement().getName().getLocalPart().equals("timestamp")) {
                String strTimestamp = readNextCharEventsText(xmlEventReader);

                metadata.add(Metadata.MODIFIED, strTimestamp);

                continue;
            }

            // ##### der username
            if (xmlEvent.asStartElement().getName().getLocalPart().equals("username")) {
                String strUsername = readNextCharEventsText(xmlEventReader);

                metadata.add(Metadata.CREATOR, strUsername);

                continue;
            }

        }

    } catch (Exception e) {
        Logger.getLogger(WikipediaDumpParser.class.getName()).log(Level.SEVERE, "Error", e);
    }

}

From source file:org.alex73.osm.converters.bel.Convert.java

public static void main(String[] args) throws Exception {
    loadStreetNamesForHouses();//from  w  ww.jav  a 2  s  .  co m

    InputStream in = new BZip2CompressorInputStream(
            new BufferedInputStream(new FileInputStream("tmp/belarus-latest.osm.bz2"), BUFFER_SIZE));

    // create xml event reader for input stream
    XMLEventFactory eventFactory = XMLEventFactory.newInstance();
    XMLEvent newLine = eventFactory.createCharacters("\n");
    XMLInputFactory xif = XMLInputFactory.newInstance();
    XMLOutputFactory xof = XMLOutputFactory.newInstance();
    XMLEventReader reader = xif.createXMLEventReader(in);
    XMLEventWriter wrCyr = xof.createXMLEventWriter(
            new BufferedOutputStream(new FileOutputStream("tmp/belarus-bel.osm"), BUFFER_SIZE));
    XMLEventWriter wrInt = xof.createXMLEventWriter(
            new BufferedOutputStream(new FileOutputStream("tmp/belarus-intl.osm"), BUFFER_SIZE));

    // initialize jaxb
    JAXBContext jaxbCtx = JAXBContext.newInstance(Node.class, Way.class, Relation.class);
    Unmarshaller um = jaxbCtx.createUnmarshaller();
    Marshaller m = jaxbCtx.createMarshaller();
    m.setProperty(Marshaller.JAXB_FRAGMENT, true);
    m.setProperty(Marshaller.JAXB_FORMATTED_OUTPUT, Boolean.TRUE);

    XMLEvent e = null;
    while ((e = reader.peek()) != null) {
        boolean processed = false;
        if (e.isStartElement()) {
            StartElement se = (StartElement) e;
            switch (se.getName().getLocalPart()) {
            case "way":
                Way way = um.unmarshal(reader, Way.class).getValue();
                if (way.getId() == 25439425) {
                    System.out.println();
                }
                fixBel(way.getTag(), "name:be", "name");
                String nameBeHouse = houseStreetBe.get(way.getId());
                if (nameBeHouse != null) {
                    setTag(way.getTag(), "addr:street", nameBeHouse);
                }
                m.marshal(way, wrCyr);
                fixInt(way.getTag());
                m.marshal(way, wrInt);
                wrCyr.add(newLine);
                wrInt.add(newLine);
                processed = true;
                break;
            case "node":
                Node node = um.unmarshal(reader, Node.class).getValue();
                fixBel(node.getTag(), "name:be", "name");
                // fixBel(node.getTag(),"addr:street:be","addr:street");
                m.marshal(node, wrCyr);
                fixInt(node.getTag());
                m.marshal(node, wrInt);
                wrCyr.add(newLine);
                wrInt.add(newLine);
                processed = true;
                break;
            case "relation":
                Relation relation = um.unmarshal(reader, Relation.class).getValue();
                fixBel(relation.getTag(), "name:be", "name");
                // fixBel(relation.getTag(),"addr:street:be","addr:street");
                m.marshal(relation, wrCyr);
                fixInt(relation.getTag());
                m.marshal(relation, wrInt);
                wrCyr.add(newLine);
                wrInt.add(newLine);
                processed = true;
                break;
            }
        }
        if (!processed) {
            wrCyr.add(e);
            wrInt.add(e);
        }
        reader.next();
    }

    wrCyr.flush();
    wrCyr.close();
    wrInt.flush();
    wrInt.close();
    System.out.println("UniqueTranslatedTags: " + uniqueTranslatedTags);
}

From source file:org.apache.hadoop.util.ConfTest.java

private static List<NodeInfo> parseConf(InputStream in) throws XMLStreamException {
    QName configuration = new QName("configuration");
    QName property = new QName("property");

    List<NodeInfo> nodes = new ArrayList<NodeInfo>();
    Stack<NodeInfo> parsed = new Stack<NodeInfo>();

    XMLInputFactory factory = XMLInputFactory.newInstance();
    XMLEventReader reader = factory.createXMLEventReader(in);

    while (reader.hasNext()) {
        XMLEvent event = reader.nextEvent();
        if (event.isStartElement()) {
            StartElement currentElement = event.asStartElement();
            NodeInfo currentNode = new NodeInfo(currentElement);
            if (parsed.isEmpty()) {
                if (!currentElement.getName().equals(configuration)) {
                    return null;
                }/*from w ww.  j  a v  a2 s  .c  o  m*/
            } else {
                NodeInfo parentNode = parsed.peek();
                QName parentName = parentNode.getStartElement().getName();
                if (parentName.equals(configuration)
                        && currentNode.getStartElement().getName().equals(property)) {
                    @SuppressWarnings("unchecked")
                    Iterator<Attribute> it = currentElement.getAttributes();
                    while (it.hasNext()) {
                        currentNode.addAttribute(it.next());
                    }
                } else if (parentName.equals(property)) {
                    parentNode.addElement(currentElement);
                }
            }
            parsed.push(currentNode);
        } else if (event.isEndElement()) {
            NodeInfo node = parsed.pop();
            if (parsed.size() == 1) {
                nodes.add(node);
            }
        } else if (event.isCharacters()) {
            if (2 < parsed.size()) {
                NodeInfo parentNode = parsed.pop();
                StartElement parentElement = parentNode.getStartElement();
                NodeInfo grandparentNode = parsed.peek();
                if (grandparentNode.getElement(parentElement) == null) {
                    grandparentNode.setElement(parentElement, event.asCharacters());
                }
                parsed.push(parentNode);
            }
        }
    }

    return nodes;
}

From source file:org.apache.olingo.client.core.serialization.AtomDeserializer.java

private Object fromPrimitive(final XMLEventReader reader, final StartElement start, final EdmTypeInfo typeInfo)
        throws XMLStreamException, EdmPrimitiveTypeException {

    Object value = null;//  w w  w .  j  a v  a2 s .  c o m

    boolean foundEndProperty = false;
    while (reader.hasNext() && !foundEndProperty) {
        final XMLEvent event = reader.nextEvent();

        if (event.isStartElement() && typeInfo != null && typeInfo.getPrimitiveTypeKind().isGeospatial()) {
            final EdmPrimitiveTypeKind geoType = EdmPrimitiveTypeKind
                    .valueOfFQN(typeInfo.getFullQualifiedName().toString());
            value = geoDeserializer.deserialize(reader, event.asStartElement(), geoType);
        }

        if (event.isCharacters() && !event.asCharacters().isWhiteSpace()
                && (typeInfo == null || !typeInfo.getPrimitiveTypeKind().isGeospatial())) {
            final String stringValue = event.asCharacters().getData();
            if (typeInfo == null) {
                value = stringValue;
            } else {
                final EdmPrimitiveType primitiveType = (EdmPrimitiveType) typeInfo.getType();
                final Class<?> returnType = primitiveType.getDefaultType().isAssignableFrom(Calendar.class)
                        ? Timestamp.class
                        : primitiveType.getDefaultType();
                value = ((EdmPrimitiveType) typeInfo.getType()).valueOfString(stringValue, true, null,
                        Constants.DEFAULT_PRECISION, Constants.DEFAULT_SCALE, true, returnType);
            }
        }

        if (event.isEndElement() && start.getName().equals(event.asEndElement().getName())) {
            foundEndProperty = true;
        }
    }

    return value;
}

From source file:org.apache.olingo.client.core.serialization.AtomDeserializer.java

private Object fromComplexOrEnum(final XMLEventReader reader, final StartElement start)
        throws XMLStreamException, EdmPrimitiveTypeException {

    Object value = null;//from  w ww .ja  v  a2 s  .  c  o m

    boolean foundEndProperty = false;
    while (reader.hasNext() && !foundEndProperty) {
        final XMLEvent event = reader.nextEvent();

        if (event.isStartElement()) {
            if (value == null) {
                value = new ComplexValue();
            }

            if (Constants.QNAME_ATOM_ELEM_LINK.equals(event.asStartElement().getName())) {
                final Link link = new Link();
                final Attribute rel = event.asStartElement()
                        .getAttributeByName(QName.valueOf(Constants.ATTR_REL));
                if (rel != null) {
                    link.setRel(rel.getValue());
                }
                final Attribute title = event.asStartElement()
                        .getAttributeByName(QName.valueOf(Constants.ATTR_TITLE));
                if (title != null) {
                    link.setTitle(title.getValue());
                }
                final Attribute href = event.asStartElement()
                        .getAttributeByName(QName.valueOf(Constants.ATTR_HREF));
                if (href != null) {
                    link.setHref(href.getValue());
                }
                final Attribute type = event.asStartElement()
                        .getAttributeByName(QName.valueOf(Constants.ATTR_TYPE));
                if (type != null) {
                    link.setType(type.getValue());
                }

                if (link.getRel().startsWith(Constants.NS_NAVIGATION_LINK_REL)) {

                    ((ComplexValue) value).getNavigationLinks().add(link);
                    inline(reader, event.asStartElement(), link);
                } else if (link.getRel().startsWith(Constants.NS_ASSOCIATION_LINK_REL)) {

                    ((Valuable) value).asComplex().getAssociationLinks().add(link);
                }
            } else {
                ((ComplexValue) value).getValue().add(property(reader, event.asStartElement()));
            }
        }

        if (event.isCharacters() && !event.asCharacters().isWhiteSpace()) {
            value = event.asCharacters().getData();
        }

        if (event.isEndElement() && start.getName().equals(event.asEndElement().getName())) {
            foundEndProperty = true;
        }
    }

    return value;
}