Example usage for javax.xml.stream.events Characters isWhiteSpace

List of usage examples for javax.xml.stream.events Characters isWhiteSpace

Introduction

In this page you can find the example usage for javax.xml.stream.events Characters isWhiteSpace.

Prototype

public boolean isWhiteSpace();

Source Link

Document

Returns true if this set of Characters is all whitespace.

Usage

From source file:StAXEventTreeViewer.java

public void buildTree(DefaultTreeModel treeModel, DefaultMutableTreeNode current, File file)
        throws XMLStreamException, FileNotFoundException {

    XMLInputFactory inputFactory = XMLInputFactory.newInstance();
    XMLEventReader reader = inputFactory.createXMLEventReader(new FileInputStream(file));
    while (reader.hasNext()) {
        XMLEvent event = reader.nextEvent();
        switch (event.getEventType()) {
        case XMLStreamConstants.START_DOCUMENT:
            StartDocument startDocument = (StartDocument) event;
            DefaultMutableTreeNode version = new DefaultMutableTreeNode(startDocument.getVersion());
            current.add(version);//from w w w. j  a  va2 s.  com

            current.add(new DefaultMutableTreeNode(startDocument.isStandalone()));
            current.add(new DefaultMutableTreeNode(startDocument.standaloneSet()));
            current.add(new DefaultMutableTreeNode(startDocument.encodingSet()));
            current.add(new DefaultMutableTreeNode(startDocument.getCharacterEncodingScheme()));
            break;
        case XMLStreamConstants.START_ELEMENT:
            StartElement startElement = (StartElement) event;
            QName elementName = startElement.getName();

            DefaultMutableTreeNode element = new DefaultMutableTreeNode(elementName.getLocalPart());
            current.add(element);
            current = element;

            if (!elementName.getNamespaceURI().equals("")) {
                String prefix = elementName.getPrefix();
                if (prefix.equals("")) {
                    prefix = "[None]";
                }
                DefaultMutableTreeNode namespace = new DefaultMutableTreeNode(
                        "prefix=" + prefix + ",URI=" + elementName.getNamespaceURI());
                current.add(namespace);
            }

            for (Iterator it = startElement.getAttributes(); it.hasNext();) {
                Attribute attr = (Attribute) it.next();
                DefaultMutableTreeNode attribute = new DefaultMutableTreeNode("Attribute (name="
                        + attr.getName().getLocalPart() + ",value=" + attr.getValue() + "')");
                String attURI = attr.getName().getNamespaceURI();
                if (!attURI.equals("")) {
                    String attPrefix = attr.getName().getPrefix();
                    if (attPrefix.equals("")) {
                        attPrefix = "[None]";
                    }
                    attribute.add(new DefaultMutableTreeNode("prefix = " + attPrefix + ", URI = " + attURI));
                }
                current.add(attribute);
            }
            break;
        case XMLStreamConstants.END_ELEMENT:
            current = (DefaultMutableTreeNode) current.getParent();
            break;
        case XMLStreamConstants.CHARACTERS:
            Characters characters = (Characters) event;
            if (!characters.isIgnorableWhiteSpace() && !characters.isWhiteSpace()) {
                String data = characters.getData();
                if (data.length() != 0) {
                    current.add(new DefaultMutableTreeNode(characters.getData()));
                }
            }
            break;
        case XMLStreamConstants.DTD:
            DTD dtde = (DTD) event;
            current.add(new DefaultMutableTreeNode(dtde.getDocumentTypeDeclaration()));
        default:
            System.out.println(event.getClass().getName());
        }
    }
}

From source file:edu.jhu.hlt.concrete.ingesters.webposts.WebPostIngester.java

@Override
public Communication fromCharacterBasedFile(final Path path) throws IngestException {
    if (!Files.exists(path))
        throw new IngestException("No file at: " + path.toString());

    AnalyticUUIDGeneratorFactory f = new AnalyticUUIDGeneratorFactory();
    AnalyticUUIDGenerator g = f.create();
    Communication c = new Communication();
    c.setUuid(g.next());//from   w w w . j av a 2  s  . c  o  m
    c.setType(this.getKind());
    c.setMetadata(TooledMetadataConverter.convert(this));

    try {
        ExistingNonDirectoryFile ef = new ExistingNonDirectoryFile(path);
        c.setId(ef.getName().split("\\.")[0]);
    } catch (NoSuchFileException | NotFileException e) {
        // might throw if path is a directory.
        throw new IngestException(path.toString() + " is not a file, or is a directory.");
    }

    String content;
    try (InputStream is = Files.newInputStream(path);
            BufferedInputStream bin = new BufferedInputStream(is, 1024 * 8 * 8);) {
        content = IOUtils.toString(bin, StandardCharsets.UTF_8);
        c.setText(content);
    } catch (IOException e) {
        throw new IngestException(e);
    }

    try (InputStream is = Files.newInputStream(path);
            BufferedInputStream bin = new BufferedInputStream(is, 1024 * 8 * 8);
            BufferedReader reader = new BufferedReader(new InputStreamReader(bin, StandardCharsets.UTF_8));) {
        XMLEventReader rdr = null;
        try {
            rdr = inF.createXMLEventReader(reader);

            // Below method moves the reader
            // to the headline end element.
            Section headline = this.handleBeginning(rdr, content, c);
            headline.setUuid(g.next());
            c.addToSectionList(headline);
            TextSpan sts = headline.getTextSpan();
            LOGGER.debug("headline text: {}", c.getText().substring(sts.getStart(), sts.getEnding()));

            int sectNumber = 1;
            int subSect = 0;

            int currOff = -1;
            // Big amounts of characters.
            while (rdr.hasNext()) {
                XMLEvent nextEvent = rdr.nextEvent();
                currOff = nextEvent.getLocation().getCharacterOffset();

                // First: see if document is going to end.
                // If yes: exit.
                if (nextEvent.isEndDocument())
                    break;

                // region
                // enables ingestion of quotes inside a usenet webpost.
                // by Tongfei Chen
                if (nextEvent.isStartElement()
                        && nextEvent.asStartElement().getName().equals(QName.valueOf("QUOTE"))) {
                    Attribute attrQuote = nextEvent.asStartElement()
                            .getAttributeByName(QName.valueOf("PREVIOUSPOST"));
                    String quote = StringEscapeUtils.escapeXml(attrQuote.getValue());
                    int location = attrQuote.getLocation().getCharacterOffset()
                            + "<QUOTE PREVIOUSPOST=\"".length();
                    Section quoteSection = new Section(g.next(), "quote")
                            .setTextSpan(new TextSpan(location, location + quote.length()));
                    c.addToSectionList(quoteSection);
                }
                // endregion

                // Check if start element.
                if (nextEvent.isCharacters()) {
                    Characters chars = nextEvent.asCharacters();
                    if (!chars.isWhiteSpace()) {
                        String fpContent = chars.getData();
                        LOGGER.debug("Character offset: {}", currOff);
                        LOGGER.debug("Character based data: {}", fpContent);

                        SimpleImmutableEntry<Integer, Integer> pads = trimSpacing(fpContent);
                        final int tsb = currOff + pads.getKey();

                        final int tse = currOff + fpContent.replace("\"", "&quot;").replace("<", "&lt;")
                                .replace(">", "&gt;").length() - (pads.getValue());
                        // MAINTAIN CORRECT TEXT SPAN
                        // CANNOT USE StringEscapeUtils.escapeXml because it will escape "'", which
                        // is not escaped in the data
                        // @tongfei

                        LOGGER.debug("Section text: {}", content.substring(tsb, tse));
                        TextSpan ts = new TextSpan(tsb, tse);
                        String sk;
                        if (subSect == 0)
                            sk = "poster";
                        else if (subSect == 1)
                            sk = "postdate";
                        else
                            sk = "post";

                        Section s = new Section();
                        s.setKind(sk);
                        s.setTextSpan(ts);
                        s.setUuid(g.next());
                        List<Integer> intList = new ArrayList<>();
                        intList.add(sectNumber);
                        intList.add(subSect);
                        s.setNumberList(intList);
                        c.addToSectionList(s);

                        subSect++;
                    }
                } else if (nextEvent.isEndElement()) {
                    EndElement ee = nextEvent.asEndElement();
                    currOff = ee.getLocation().getCharacterOffset();
                    QName name = ee.getName();
                    String localName = name.getLocalPart();
                    LOGGER.debug("Hit end element: {}", localName);
                    if (localName.equalsIgnoreCase(POST_LOCAL_NAME)) {
                        LOGGER.debug("Switching to new post.");
                        sectNumber++;
                        subSect = 0;
                    } else if (localName.equalsIgnoreCase(TEXT_LOCAL_NAME)) {
                        // done with document.
                        break;
                    }
                }
            }

            return c;

        } catch (XMLStreamException | ConcreteException | StringIndexOutOfBoundsException
                | ClassCastException x) {
            throw new IngestException(x);
        } finally {
            if (rdr != null)
                try {
                    rdr.close();
                } catch (XMLStreamException e) {
                    // not likely.
                    LOGGER.info("Error closing XMLReader.", e);
                }
        }
    } catch (IOException e) {
        throw new IngestException(e);
    }
}

From source file:act.installer.pubchem.PubchemParser.java

/**
 * Incrementally parses a stream of XML events from a PubChem file, extracting the next available PC-Compound entry
 * as a Chemical object./*from   w  w  w.j a  v  a 2  s. c  o m*/
 * @param eventReader The xml event reader we are parsing the XML from
 * @return The constructed chemical
 * @throws XMLStreamException
 * @throws XPathExpressionException
 */
public Chemical extractNextChemicalFromXMLStream(XMLEventReader eventReader)
        throws XMLStreamException, JaxenException {
    Document bufferDoc = null;
    Element currentElement = null;
    StringBuilder textBuffer = null;
    /* With help from
     * http://stackoverflow.com/questions/7998733/loading-local-chunks-in-dom-while-parsing-a-large-xml-file-in-sax-java
     */
    while (eventReader.hasNext()) {
        XMLEvent event = eventReader.nextEvent();

        switch (event.getEventType()) {
        case XMLStreamConstants.START_ELEMENT:
            String eventName = event.asStartElement().getName().getLocalPart();
            if (COMPOUND_DOC_TAG.equals(eventName)) {
                // Create a new document if we've found the start of a compound object.
                bufferDoc = documentBuilder.newDocument();
                currentElement = bufferDoc.createElement(eventName);
                bufferDoc.appendChild(currentElement);
            } else if (currentElement != null) { // Wait until we've found a compound entry to start slurping up data.
                // Create a new child element and push down the current pointer when we find a new node.
                Element newElement = bufferDoc.createElement(eventName);
                currentElement.appendChild(newElement);
                currentElement = newElement;
            } // If we aren't in a PC-Compound tree, we just let the elements pass by.
            break;

        case XMLStreamConstants.CHARACTERS:
            if (currentElement == null) { // Ignore this event if we're not in a PC-Compound tree.
                continue;
            }

            Characters chars = event.asCharacters();
            // Ignore only whitespace strings, which just inflate the size of the DOM.  Text coalescing makes this safe.
            if (chars.isWhiteSpace()) {
                continue;
            }

            // Rely on the XMLEventStream to coalesce consecutive text events.
            Text textNode = bufferDoc.createTextNode(chars.getData());
            currentElement.appendChild(textNode);
            break;

        case XMLStreamConstants.END_ELEMENT:
            if (currentElement == null) { // Ignore this event if we're not in a PC-Compound tree.
                continue;
            }

            eventName = event.asEndElement().getName().getLocalPart();
            Node parentNode = currentElement.getParentNode();
            if (parentNode instanceof Element) {
                currentElement = (Element) parentNode;
            } else if (parentNode instanceof Document && eventName.equals(COMPOUND_DOC_TAG)) {
                // We're back at the top of the node stack!  Convert the buffered document into a Chemical.
                PubchemEntry entry = extractPCCompoundFeatures(bufferDoc);
                if (entry != null) {
                    return entry.asChemical();
                } else {
                    // Skip this entry if we can't process it correctly by resetting the world and continuing on.
                    bufferDoc = null;
                    currentElement = null;
                }
            } else {
                // This should not happen, but is here as a sanity check.
                throw new RuntimeException(String.format("Parent of XML element %s is of type %d, not Element",
                        currentElement.getTagName(), parentNode.getNodeType()));
            }
            break;

        // TODO: do we care about attributes or other XML structures?
        }
    }

    // Return null when we run out of chemicals, just like readLine().
    return null;
}

From source file:com.evolveum.polygon.connector.hcm.DocumentProcessing.java

public Map<String, Object> parseXMLData(HcmConnectorConfiguration conf, ResultsHandler handler,
        Map<String, Object> schemaAttributeMap, Filter query) {

    XMLInputFactory factory = XMLInputFactory.newInstance();
    try {//from   w ww.  java  2  s  . co m

        String uidAttributeName = conf.getUidAttribute();
        String primariId = conf.getPrimaryId();
        String startName = "";
        String value = null;

        StringBuilder assignmentXMLBuilder = null;

        List<String> builderList = new ArrayList<String>();

        Integer nOfIterations = 0;
        Boolean isSubjectToQuery = false;
        Boolean isAssigment = false;
        Boolean evaluateAttr = true;
        Boolean specificAttributeQuery = false;

        XMLEventReader eventReader = factory.createXMLEventReader(new FileReader(conf.getFilePath()));
        List<String> dictionary = populateDictionary(FIRSTFLAG);

        if (!attrsToGet.isEmpty()) {

            attrsToGet.add(uidAttributeName);
            attrsToGet.add(primariId);
            specificAttributeQuery = true;
            evaluateAttr = false;
            LOGGER.ok("The uid and primary id were added to the queried attribute list");

            schemaAttributeMap = modifySchemaAttributeMap(schemaAttributeMap);
        }

        while (eventReader.hasNext()) {

            XMLEvent event = eventReader.nextEvent();

            Integer code = event.getEventType();

            if (code == XMLStreamConstants.START_ELEMENT) {

                StartElement startElement = event.asStartElement();
                startName = startElement.getName().getLocalPart();

                if (!evaluateAttr && attrsToGet.contains(startName)) {

                    evaluateAttr = true;
                }

                if (!elementIsEmployeeData) {

                    if (startName.equals(EMPLOYEES)) {

                        if (dictionary.contains(nOfIterations.toString())) {
                            LOGGER.ok("The defined number of iterations has been hit: {0}",
                                    nOfIterations.toString());
                            break;
                        } else {
                            startName = "";
                            elementIsEmployeeData = true;
                            nOfIterations++;
                        }
                    }
                } else if (evaluateAttr) {

                    if (!isAssigment) {
                        if (!ASSIGNMENTTAG.equals(startName)) {

                        } else {
                            assignmentXMLBuilder = new StringBuilder();
                            isAssigment = true;
                        }
                    } else {

                        builderList = processAssignment(startName, null, START, builderList);
                    }

                    if (multiValuedAttributesList.contains(startName)) {

                        elementIsMultiValued = true;
                    }

                }

            } else if (elementIsEmployeeData) {

                if (code == XMLStreamConstants.CHARACTERS && evaluateAttr) {

                    Characters characters = event.asCharacters();

                    if (!characters.isWhiteSpace()) {

                        StringBuilder valueBuilder;
                        if (value != null) {
                            valueBuilder = new StringBuilder(value).append("")
                                    .append(characters.getData().toString());
                        } else {
                            valueBuilder = new StringBuilder(characters.getData().toString());
                        }
                        value = valueBuilder.toString();
                        // value = StringEscapeUtils.escapeXml10(value);
                        // LOGGER.info("The attribute value for: {0} is
                        // {1}", startName, value);
                    }
                } else if (code == XMLStreamConstants.END_ELEMENT) {

                    EndElement endElement = event.asEndElement();
                    String endName = endElement.getName().getLocalPart();

                    isSubjectToQuery = checkFilter(endName, value, query, uidAttributeName);

                    if (!isSubjectToQuery) {
                        attributeMap.clear();
                        elementIsEmployeeData = false;
                        value = null;

                        endName = EMPLOYEES;
                    }

                    if (endName.equals(EMPLOYEES)) {

                        attributeMap = handleEmployeeData(attributeMap, schemaAttributeMap, handler,
                                uidAttributeName, primariId);

                        elementIsEmployeeData = false;

                    } else if (evaluateAttr) {

                        if (endName.equals(startName)) {
                            if (value != null) {

                                if (!isAssigment) {
                                    if (!elementIsMultiValued) {

                                        attributeMap.put(startName, value);
                                    } else {

                                        multiValuedAttributeBuffer.put(startName, value);
                                    }
                                } else {

                                    value = StringEscapeUtils.escapeXml10(value);
                                    builderList = processAssignment(endName, value, VALUE, builderList);

                                    builderList = processAssignment(endName, null, END, builderList);
                                }
                                // LOGGER.info("Attribute name: {0} and the
                                // Attribute value: {1}", endName, value);
                                value = null;
                            }
                        } else {
                            if (endName.equals(ASSIGNMENTTAG)) {

                                builderList = processAssignment(endName, null, CLOSE, builderList);

                                // if (assigmentIsActive) {

                                for (String records : builderList) {
                                    assignmentXMLBuilder.append(records);

                                }
                                attributeMap.put(ASSIGNMENTTAG, assignmentXMLBuilder.toString());
                                // } else {
                                // }

                                builderList = new ArrayList<String>();
                                // assigmentIsActive = false;
                                isAssigment = false;

                            } else if (multiValuedAttributesList.contains(endName)) {
                                processMultiValuedAttributes(multiValuedAttributeBuffer);
                            }
                        }

                    }
                    if (specificAttributeQuery && evaluateAttr) {

                        evaluateAttr = false;
                    }
                }
            } else if (code == XMLStreamConstants.END_DOCUMENT) {
                handleBufferedData(uidAttributeName, primariId, handler);
            }
        }

    } catch (FileNotFoundException e) {
        StringBuilder errorBuilder = new StringBuilder("File not found at the specified path.")
                .append(e.getLocalizedMessage());
        LOGGER.error("File not found at the specified path: {0}", e);
        throw new ConnectorIOException(errorBuilder.toString());
    } catch (XMLStreamException e) {

        LOGGER.error("Unexpected processing error while parsing the .xml document : {0}", e);

        StringBuilder errorBuilder = new StringBuilder(
                "Unexpected processing error while parsing the .xml document. ")
                        .append(e.getLocalizedMessage());

        throw new ConnectorIOException(errorBuilder.toString());
    }
    return attributeMap;

}

From source file:edu.jhu.hlt.concrete.ingesters.bolt.BoltForumPostIngester.java

@Override
public Communication fromCharacterBasedFile(final Path path) throws IngestException {
    if (!Files.exists(path))
        throw new IngestException("No file at: " + path.toString());

    AnalyticUUIDGeneratorFactory f = new AnalyticUUIDGeneratorFactory();
    AnalyticUUIDGenerator gen = f.create();
    Communication c = new Communication();
    c.setUuid(gen.next());/*from   w w w  .  ja v a  2s.c o  m*/
    c.setType(this.getKind());
    c.setMetadata(TooledMetadataConverter.convert(this));

    try {
        ExistingNonDirectoryFile ef = new ExistingNonDirectoryFile(path);
        c.setId(ef.getName().split("\\.")[0]);
    } catch (NoSuchFileException | NotFileException e) {
        // might throw if path is a directory.
        throw new IngestException(path.toString() + " is not a file, or is a directory.");
    }

    String content;
    try (InputStream is = Files.newInputStream(path);
            BufferedInputStream bin = new BufferedInputStream(is, 1024 * 8 * 8);) {
        content = IOUtils.toString(bin, StandardCharsets.UTF_8);
        c.setText(content);
    } catch (IOException e) {
        throw new IngestException(e);
    }

    try (InputStream is = Files.newInputStream(path);
            BufferedInputStream bin = new BufferedInputStream(is, 1024 * 8 * 8);
            BufferedReader reader = new BufferedReader(new InputStreamReader(bin, StandardCharsets.UTF_8));) {
        XMLEventReader rdr = null;
        try {
            rdr = inF.createXMLEventReader(reader);

            // Below method moves the reader
            // to the first post element.
            Section headline = handleHeadline(rdr, content);
            headline.setUuid(gen.next());
            c.addToSectionList(headline);
            int start = headline.getTextSpan().getStart();
            int ending = headline.getTextSpan().getEnding();
            if (ending < start)
                ending = start; // @tongfei: handle empty headlines
            String htxt = c.getText().substring(start, ending);
            LOGGER.debug("headline text: {}", htxt);

            // Section indices.
            int sectNumber = 1;
            int subSect = 0;

            // Move iterator to post start element.
            this.iterateToPosts(rdr);

            // Offset pointer.
            int currOff = -1;

            SectionFactory sf = new SectionFactory(gen);

            // First post element.
            while (rdr.hasNext()) {
                XMLEvent nextEvent = rdr.nextEvent();
                currOff = nextEvent.getLocation().getCharacterOffset();
                if (currOff > 0) {
                    int currOffPlus = currOff + 20;
                    int currOffLess = currOff - 20;
                    LOGGER.debug("Offset: {}", currOff);
                    if (currOffPlus < content.length())
                        LOGGER.debug("Surrounding text: {}", content.substring(currOffLess, currOffPlus));
                }

                // First: see if document is going to end.
                // If yes: exit.
                if (nextEvent.isEndDocument())
                    break;

                // XMLEvent peeker = rdr.peek();

                // Check if start element.
                if (nextEvent.isStartElement()) {
                    StartElement se = nextEvent.asStartElement();
                    QName name = se.getName();
                    final String localName = name.getLocalPart();
                    LOGGER.debug("Hit start element: {}", localName);

                    //region
                    // Add sections for authors and datetimes for each bolt post
                    // by Tongfei Chen
                    Attribute attrAuthor = se.getAttributeByName(QName.valueOf("author"));
                    Attribute attrDateTime = se.getAttributeByName(QName.valueOf("datetime"));

                    if (attrAuthor != null && attrDateTime != null) {

                        int loc = attrAuthor.getLocation().getCharacterOffset();

                        int sectAuthorBeginningOffset = loc + "<post author=\"".length();

                        Section sectAuthor = sf.fromTextSpan(new TextSpan(sectAuthorBeginningOffset,
                                sectAuthorBeginningOffset + attrAuthor.getValue().length()), "author");
                        c.addToSectionList(sectAuthor);

                        int sectDateTimeBeginningOffset = sectAuthorBeginningOffset
                                + attrAuthor.getValue().length() + " datetime=".length();

                        Section sectDateTime = sf.fromTextSpan(
                                new TextSpan(sectDateTimeBeginningOffset,
                                        sectDateTimeBeginningOffset + attrDateTime.getValue().length()),
                                "datetime");
                        c.addToSectionList(sectDateTime);
                    }
                    //endregion

                    // Move past quotes, images, and links.
                    if (localName.equals(QUOTE_LOCAL_NAME)) {
                        this.handleQuote(rdr);
                    } else if (localName.equals(IMG_LOCAL_NAME)) {
                        this.handleImg(rdr);
                    } else if (localName.equals(LINK_LOCAL_NAME)) {
                        this.handleLink(rdr);
                    }

                    // not a start element
                } else if (nextEvent.isCharacters()) {
                    Characters chars = nextEvent.asCharacters();
                    int coff = chars.getLocation().getCharacterOffset();
                    if (!chars.isWhiteSpace()) {
                        // content to be captured
                        String fpContent = chars.getData();
                        LOGGER.debug("Character offset: {}", coff);
                        LOGGER.debug("Character based data: {}", fpContent);
                        // LOGGER.debug("Character data via offset diff: {}", content.substring(coff - fpContent.length(), coff));

                        SimpleImmutableEntry<Integer, Integer> pads = trimSpacing(fpContent);
                        final int tsb = currOff + pads.getKey();
                        final int tse = currOff + fpContent.length() - pads.getValue();
                        final String subs = content.substring(tsb, tse);
                        if (subs.replaceAll("\\p{Zs}", "").replaceAll("\\n", "").isEmpty()) {
                            LOGGER.info("Found empty section: skipping.");
                            continue;
                        }

                        LOGGER.debug("Section text: {}", subs);
                        TextSpan ts = new TextSpan(tsb, tse);

                        Section s = sf.fromTextSpan(ts, "post");
                        List<Integer> intList = new ArrayList<>();
                        intList.add(sectNumber);
                        intList.add(subSect);
                        s.setNumberList(intList);
                        c.addToSectionList(s);

                        subSect++;
                    }
                } else if (nextEvent.isEndElement()) {
                    EndElement ee = nextEvent.asEndElement();
                    currOff = ee.getLocation().getCharacterOffset();
                    QName name = ee.getName();
                    String localName = name.getLocalPart();
                    LOGGER.debug("Hit end element: {}", localName);
                    if (localName.equalsIgnoreCase(POST_LOCAL_NAME)) {
                        sectNumber++;
                        subSect = 0;
                    }
                }
            }
            return c;
        } catch (XMLStreamException | ConcreteException | StringIndexOutOfBoundsException x) {
            throw new IngestException(x);
        } finally {
            if (rdr != null)
                try {
                    rdr.close();
                } catch (XMLStreamException e) {
                    // not likely.
                    LOGGER.info("Error closing XMLReader.", e);
                }
        }
    } catch (IOException e) {
        throw new IngestException(e);
    }
}

From source file:microsoft.exchange.webservices.data.core.EwsXmlReader.java

/**
 * Reads the specified node type./*  www . j  a  va 2 s. co  m*/
 *
 * @param keepWhiteSpace Do not remove whitespace characters if true
 * @throws ServiceXmlDeserializationException  the service xml deserialization exception
 * @throws XMLStreamException the XML stream exception
 */
private void read(boolean keepWhiteSpace) throws ServiceXmlDeserializationException, XMLStreamException {
    // The caller to EwsXmlReader.Read expects
    // that there's another node to
    // read. Throw an exception if not true.
    while (true) {
        if (!xmlReader.hasNext()) {
            throw new ServiceXmlDeserializationException("Unexpected end of XML document.");
        } else {
            XMLEvent event = xmlReader.nextEvent();
            if (event.getEventType() == XMLStreamConstants.CHARACTERS) {
                Characters characters = (Characters) event;
                if (!keepWhiteSpace)
                    if (characters.isIgnorableWhiteSpace() || characters.isWhiteSpace()) {
                        continue;
                    }
            }
            this.prevEvent = this.presentEvent;
            this.presentEvent = event;
            break;
        }
    }
}

From source file:microsoft.exchange.webservices.data.core.EwsXmlReader.java

/**
 * Reads the value. Should return content element or text node as string
 * Present event must be START ELEMENT. After executing this function
 * Present event will be set on END ELEMENT
 *
 * @param keepWhiteSpace Do not remove whitespace characters if true
 * @return String/*from  w  w w .  j a v  a  2 s. co  m*/
 * @throws XMLStreamException the XML stream exception
 * @throws ServiceXmlDeserializationException the service xml deserialization exception
 */
public String readValue(boolean keepWhiteSpace) throws XMLStreamException, ServiceXmlDeserializationException {
    if (this.presentEvent.isStartElement()) {
        // Go to next event and check for Characters event
        this.read(keepWhiteSpace);
        if (this.presentEvent.isCharacters()) {
            final StringBuilder elementValue = new StringBuilder();
            do {
                if (this.getNodeType().nodeType == XmlNodeType.CHARACTERS) {
                    Characters characters = (Characters) this.presentEvent;
                    if (keepWhiteSpace || (!characters.isIgnorableWhiteSpace() && !characters.isWhiteSpace())) {
                        final String charactersData = characters.getData();
                        if (charactersData != null && !charactersData.isEmpty()) {
                            elementValue.append(charactersData);
                        }
                    }
                }
                this.read();
            } while (!this.presentEvent.isEndElement());
            // Characters chars = this.presentEvent.asCharacters();
            // String elementValue = chars.getData();
            // Advance to next event post Characters (ideally it will be End
            // Element)
            // this.read();
            return elementValue.toString();
        } else if (this.presentEvent.isEndElement()) {
            return "";
        } else {
            throw new ServiceXmlDeserializationException(
                    getReadValueErrMsg("Could not find " + XmlNodeType.getString(XmlNodeType.CHARACTERS)));
        }
    } else if (this.presentEvent.getEventType() == XmlNodeType.CHARACTERS && this.presentEvent.isCharacters()) {
        /*
         * if(this.presentEvent.asCharacters().getData().equals("<")) {
        */
        final String charData = this.presentEvent.asCharacters().getData();
        final StringBuilder data = new StringBuilder(charData == null ? "" : charData);
        do {
            this.read(keepWhiteSpace);
            if (this.getNodeType().nodeType == XmlNodeType.CHARACTERS) {
                Characters characters = (Characters) this.presentEvent;
                if (keepWhiteSpace || (!characters.isIgnorableWhiteSpace() && !characters.isWhiteSpace())) {
                    final String charactersData = characters.getData();
                    if (charactersData != null && !charactersData.isEmpty()) {
                        data.append(charactersData);
                    }
                }
            }
        } while (!this.presentEvent.isEndElement());
        return data.toString();// this.presentEvent. = new XMLEvent();
        /*
         * } else { Characters chars = this.presentEvent.asCharacters();
         * String elementValue = chars.getData(); // Advance to next event
         * post Characters (ideally it will be End // Element) this.read();
         * return elementValue; }
         */
    } else {
        throw new ServiceXmlDeserializationException(
                getReadValueErrMsg("Expected is " + XmlNodeType.getString(XmlNodeType.START_ELEMENT)));
    }

}

From source file:com.logiware.accounting.domain.EdiInvoice.java

private void setValue(Characters text) throws Exception {
    if (!text.isWhiteSpace()) {
        if (isHeader) {
            if ("Applicationreference".equals(elementType) && !"INVOICE".equalsIgnoreCase(text.getData())) {
                throw new AccountingException("Bad file. Not an Invoice.");
            } else if ("Reference".equals(elementType)) {
                ediReference = text.getData();
            } else if ("Sender".equals(elementType) && "Code".equals(characterType)) {
                ediCode = text.getData();
                VendorModel vendor = new EdiInvoiceDAO().getVendor(ediCode);
                if (null != vendor && CommonUtils.isNotEmpty(vendor.getVendorNumber())) {
                    vendorNumber = vendor.getVendorNumber();
                    vendorName = vendor.getVendorName();
                }/*from w  ww . j  av a2s .  c  o  m*/
            }
        } else if (isBody) {
            if (isInformation) {
                if ("Invoice".equals(elementType)) {
                    if ("Number".equals(characterType)) {
                        invoiceNumber = text.getData();
                        searchInvoiceNumber = invoiceNumber.replaceAll("[^\\p{Alpha}\\p{Digit}]+", "");
                    } else if ("Date".equals(characterType)) {
                        invoiceDate = DateUtils.parseDate(text.getData(), "yyyy-MM-dd");
                    }
                } else if ("RelatedReferences".equals(elementType)) {
                    if ("EFR".equals(characterType)) {
                        ourReference = text.getData();
                    } else if ("BLR".equals(characterType)) {
                        blNumber = text.getData();
                    } else if ("CR".equals(characterType)) {
                        yourReference1 = text.getData();
                    } else if ("TID".equals(characterType)) {
                        yourReference2 = text.getData();
                    }
                } else if ("Company".equals(elementType)) {
                    if ("Name".equals(characterType)) {
                        party = new EdiInvoiceParty();
                        party.setEdiInvoice(this);
                        party.setType("Company");
                        party.setName(text.getData());
                    } else if ("Street".equals(characterType)) {
                        party.setStreet(text.getData());
                    } else if ("Zip".equals(characterType)) {
                        party.setZip(text.getData());
                    } else if ("City".equals(characterType)) {
                        party.setCity(text.getData());
                    } else if ("Country".equals(characterType)) {
                        party.setCountry(text.getData());
                    } else if ("VATRegistrationNumber".equalsIgnoreCase(characterType)) {
                        party.setVatNumber(text.getData());
                    }
                } else if ("Vendor".equals(elementType)) {
                    if (isBank) {
                        if ("Name".equals(characterType)) {
                            bank = new EdiInvoiceBank();
                            bank.setEdiInvoice(this);
                            bank.setName(text.getData());
                        } else if ("Street".equals(characterType)) {
                            if (null == bank.getStreet1()) {
                                bank.setStreet1(text.getData());
                            } else {
                                bank.setStreet2(text.getData());
                            }
                        } else if ("Zip".equals(characterType)) {
                            bank.setZip(text.getData());
                        } else if ("City".equals(characterType)) {
                            bank.setCity(text.getData());
                        } else if ("Country".equals(characterType)) {
                            bank.setCountry(text.getData());
                        } else if ("UnCode".equals(characterType)) {
                            bank.setUnCode(text.getData());
                        } else if ("AccountNumber".equalsIgnoreCase(characterType)) {
                            bank.setAccount(text.getData());
                        } else if ("IBAN".equalsIgnoreCase(characterType)) {
                            bank.setIban(text.getData());
                        } else if ("BIC".equalsIgnoreCase(characterType)) {
                            bank.setBic(text.getData());
                        }
                    } else {
                        if ("Name".equals(characterType)) {
                            party = new EdiInvoiceParty();
                            party.setEdiInvoice(this);
                            party.setType("Vendor");
                            party.setName(text.getData());
                        } else if ("Street".equals(characterType)) {
                            party.setStreet(text.getData());
                        } else if ("Zip".equals(characterType)) {
                            party.setZip(text.getData());
                        } else if ("City".equals(characterType)) {
                            party.setCity(text.getData());
                        } else if ("Country".equals(characterType)) {
                            party.setCountry(text.getData());
                        } else if ("VATRegistrationNumber".equalsIgnoreCase(characterType)) {
                            party.setVatNumber(text.getData());
                        } else if ("CompanyRegistrationNumber".equalsIgnoreCase(characterType)) {
                            party.setRegistrationNumber(text.getData());
                        } else if ("CompanyLicenseNumber".equalsIgnoreCase(characterType)) {
                            party.setLicenseNumber(text.getData());
                        }
                    }
                } else if ("PaymentTerms".equalsIgnoreCase(elementType)) {
                    if ("Description".equalsIgnoreCase(characterType)) {
                        paymentTerms = text.getData();
                    }
                } else if ("ShipmentInformation".equalsIgnoreCase(elementType)) {
                    if ("Vessel".equalsIgnoreCase(characterType)) {
                        ediInvoiceShippingDetails = new EdiInvoiceShippingDetails();
                        ediInvoiceShippingDetails.setEdiInvoice(this);
                        ediInvoiceShippingDetails.setVessel(text.getData());
                    } else if ("Date".equalsIgnoreCase(characterType)) {
                        ediInvoiceShippingDetails.setDate(DateUtils.parseDate(text.getData(), "yyyy-MM-dd"));
                    } else if ("Routing".equalsIgnoreCase(characterType)) {
                        String routing = (null != ediInvoiceShippingDetails.getRouting()
                                ? ediInvoiceShippingDetails.getRouting()
                                : "") + text.getData();
                        ediInvoiceShippingDetails.setRouting(routing);
                    } else if ("Quantity".equalsIgnoreCase(characterType)) {
                        ediInvoiceShippingDetails.setPackageQuantity(text.getData());
                    } else if ("Description".equalsIgnoreCase(characterType)) {
                        ediInvoiceShippingDetails.setPackageDescription(text.getData());
                    } else if ("Weigth".equalsIgnoreCase(characterType)) {
                        ediInvoiceShippingDetails.setWeight(text.getData());
                    } else if ("Volume".equalsIgnoreCase(characterType)) {
                        ediInvoiceShippingDetails.setVolume(text.getData());
                    }
                }
            } else if (isDetails) {
                if ("Detail".equalsIgnoreCase(elementType)) {
                    if ("ItemDescription".equals(characterType)) {
                        detail = new EdiInvoiceDetail();
                        detail.setEdiInvoice(this);
                        detail.setDescription(text.getData());
                    } else if ("Quantity".equals(characterType)) {
                        detail.setQuantity(text.getData());
                    } else if ("CalculationCode".equals(characterType)) {
                        detail.setCalculationCode(text.getData());
                    } else if ("Price".equals(characterType)) {
                        detail.setPrice(text.getData());
                    } else if ("Rate".equals(characterType)) {
                        detail.setRate(text.getData());
                    } else if ("Currency".equals(characterType)) {
                        detail.setCurrency(text.getData());
                    } else if ("AmountVATExcl".equals(characterType)) {
                        detail.setVatExcludedAmount(text.getData());
                        detail.setApAmount(text.getData());
                        detail.setArAmount(text.getData());
                    } else if ("AmountVATIncl".equals(characterType)) {
                        detail.setVatIncludedAmount(text.getData());
                    } else if ("AmountVAT".equals(characterType)) {
                        detail.setVatAmount(text.getData());
                    } else if ("VATPercentage".equals(characterType)) {
                        detail.setVatPercentage(text.getData());
                    } else if ("BLReference".equalsIgnoreCase(characterType)) {
                        detail.setBlReference(text.getData());
                    }
                }
            } else if (isSummary) {
                if ("TotalMonetaryAmount".equalsIgnoreCase(elementType)) {
                    if ("TotalVATIncl".equals(characterType)) {
                        invoiceAmount = NumberUtils.parseNumber(text.getData());
                    }
                } else if ("TotalMonetaryAmountGroupByVAT".equalsIgnoreCase(elementType)) {
                    if ("TotalVAT".equals(characterType)) {
                        vatAmount = text.getData();
                    } else if ("VATPercentage".equals(characterType)) {
                        vatPercentage = text.getData();
                    }
                }
            }
        }
    }
}

From source file:org.apache.hadoop.gateway.filter.rewrite.impl.xml.XmlFilterReader.java

private void processCharacters(Characters event) throws XPathExpressionException {
    //System.out.println( "T[" + event.isCData() + "," + event.isWhiteSpace() + "," + event.isIgnorableWhiteSpace() + "]=" + event );
    Level level = stack.peek();/*ww w  . j a  v  a  2s  . co  m*/
    Node node = stack.peek().node;
    if (event.isCData()) {
        node.appendChild(document.createCDATASection(event.getData()));
    } else {
        node.appendChild(document.createTextNode(event.getData()));
    }
    if (!currentlyBuffering()) {
        String value = event.getData();
        if (!event.isWhiteSpace()) {
            if (level.scopeConfig == null || level.scopeConfig.getSelectors().isEmpty()) {
                value = filterText(extractQName(node), value, null);
            } else {
                UrlRewriteFilterPathDescriptor path = pickFirstMatchingPath(level);
                if (path instanceof UrlRewriteFilterApplyDescriptor) {
                    String rule = ((UrlRewriteFilterApplyDescriptor) path).rule();
                    value = filterText(extractQName(node), value, rule);
                }
            }
        }
        if (event.isCData()) {
            writer.write("<![CDATA[");
            writer.write(value);
            writer.write("]]>");
        } else {
            writer.write(StringEscapeUtils.escapeXml(value));
        }
    }
}

From source file:org.omnaest.utils.xml.XMLNestedMapConverter.java

/**
 * Template method for {@link #newNamespaceAwareMapFromXML(CharSequence)} and {@link #newMapFromXML(CharSequence)} which allows
 * to convert the {@link QName} based key values to other representations.
 * /*from  ww  w  .  j  ava 2s.c o  m*/
 * @param xmlContent
 * @return new (nested) {@link Map} instance
 */
protected <K> Map<K, Object> newMapFromXML(CharSequence xmlContent,
        final ElementConverter<QName, K> keyElementConverter) {
    //
    final Map<K, Object> retmap = new LinkedHashMap<K, Object>();

    //
    Assert.isNotNull(keyElementConverter, "keyElementConverter must not be null");

    //
    final ExceptionHandler exceptionHandler = this.exceptionHandler;

    //    
    try {
        //
        final XMLInputFactory xmlInputFactory = this.xmlInstanceContextFactory.newXmlInputFactory();
        Assert.isNotNull(xmlInputFactory, "xmlInputFactory must not be null");

        //
        final Reader reader = new CharSequenceReader(xmlContent);
        final XMLEventReader xmlEventReader = xmlInputFactory.createXMLEventReader(reader);

        //
        final class Helper {
            /* ********************************************** Variables ********************************************** */
            private List<TupleTwo<QName, Object>> stackList = new ArrayList<TupleTwo<QName, Object>>();

            /* ********************************************** Methods ********************************************** */

            /**
             * Manifests a single tag node recursively
             * 
             * @return
             * @throws XMLStreamException
             */
            @SuppressWarnings("unchecked")
            public TupleTwo<QName, Object> manifest() throws XMLStreamException {
                //
                TupleTwo<QName, Object> retval = null;

                //          
                while (xmlEventReader.hasNext()) {
                    //
                    final XMLEvent xmlEvent = xmlEventReader.nextEvent();

                    //
                    if (xmlEvent.isStartElement()) {
                        //
                        final StartElement startElement = xmlEvent.asStartElement();
                        final QName name = startElement.getName();

                        //
                        this.addNewStackElement().setValueFirst(name);

                        //
                        final Iterator<Attribute> attributeIterator = startElement.getAttributes();
                        if (attributeIterator.hasNext()) {
                            //
                            final Map<QName, Object> map = new LinkedHashMap<QName, Object>();
                            for (Attribute attribute : IterableUtils.valueOf(attributeIterator)) {
                                map.put(attribute.getName(), attribute.getValue());
                            }

                            //
                            this.updateCurrentStackValue(map);
                        }
                    } else if (xmlEvent.isEndElement()) {
                        //
                        retval = this.removeStackElement();

                        //
                        final Object manifestation = retval.getValueSecond();
                        final QName tagname = retval.getValueFirst();

                        //
                        updateCurrentStackValue(manifestation, tagname);
                    } else if (xmlEvent.isCharacters()) {
                        //
                        final Characters characters = xmlEvent.asCharacters();
                        if (!characters.isWhiteSpace()) {
                            //
                            final TupleTwo<QName, Object> currentStackValue = this.getCurrentStackValue();
                            currentStackValue.setValueSecond(
                                    ObjectUtils.defaultIfNull(currentStackValue.getValueSecond(), "")
                                            + characters.getData());

                        }
                    }

                }

                //
                return retval;
            }

            /**
             * Updates the current stack value
             * 
             * @param manifestation
             * @param tagname
             */
            private void updateCurrentStackValue(Object manifestation, QName tagname) {
                //
                final Map<QName, Object> tagNameToManifestationMap = new LinkedHashMap<QName, Object>();
                tagNameToManifestationMap.put(tagname, manifestation);
                this.updateCurrentStackValue(tagNameToManifestationMap);
            }

            @SuppressWarnings("unchecked")
            private void updateCurrentStackValue(Map<QName, Object> tagNameToManifestationMap) {
                //
                final TupleTwo<QName, Object> currentStackValue = this.getCurrentStackValue();

                //
                if (currentStackValue != null) {
                    //
                    Map<K, Object> map = null;
                    {
                        //
                        final Object valueSecond = currentStackValue.getValueSecond();
                        if (valueSecond instanceof Map) {
                            map = (Map<K, Object>) valueSecond;
                        } else {
                            //
                            map = new LinkedHashMap<K, Object>();
                            if (valueSecond instanceof String) {
                                map.put(keyElementConverter.convert(new QName("")), valueSecond);
                            }
                        }
                    }

                    //
                    for (Entry<QName, Object> tagNameToManifestationEntry : tagNameToManifestationMap
                            .entrySet()) {
                        //
                        final K tagname = keyElementConverter.convert(tagNameToManifestationEntry.getKey());
                        final Object manifestation = tagNameToManifestationEntry.getValue();

                        //
                        if (!map.containsKey(tagname)) {
                            map.put(tagname, manifestation);
                        } else {
                            //
                            final Object object = map.get(tagname);
                            if (object instanceof List) {
                                //
                                final List<Object> list = (List<Object>) object;
                                list.add(manifestation);
                            } else {
                                //
                                final List<Object> list = new ArrayList<Object>();
                                list.add(object);
                                list.add(manifestation);
                                map.put(tagname, list);
                            }
                        }
                    }

                    //
                    currentStackValue.setValueSecond(map);
                }
            }

            private TupleTwo<QName, Object> getCurrentStackValue() {
                return ListUtils.firstElement(this.stackList);
            }

            private TupleTwo<QName, Object> removeStackElement() {
                return ListUtils.removeFirst(this.stackList);
            }

            private TupleTwo<QName, Object> addNewStackElement() {
                //
                final TupleTwo<QName, Object> retval = new TupleTwo<QName, Object>();
                this.stackList.add(0, retval);
                return retval;
            }
        }

        //  
        try {
            final Helper helper = new Helper();
            final TupleTwo<QName, Object> result = helper.manifest();
            retmap.put(keyElementConverter.convert(result.getValueFirst()), result.getValueSecond());
        } catch (Exception e) {
            if (exceptionHandler != null) {
                exceptionHandler.handleException(e);
            }
        }

        //
        xmlEventReader.close();
        reader.close();

    } catch (Exception e) {
        if (exceptionHandler != null) {
            exceptionHandler.handleException(e);
        }
    }

    //
    return retmap;
}