Example usage for javax.xml.stream.events Characters getData

List of usage examples for javax.xml.stream.events Characters getData

Introduction

In this page you can find the example usage for javax.xml.stream.events Characters getData.

Prototype

public String getData();

Source Link

Document

Get the character data of this event

Usage

From source file:XMLEventReaderDemo.java

public static void main(String[] args) throws Exception {
    XMLInputFactory factory = XMLInputFactory.newInstance();
    Reader fileReader = new FileReader("Source.xml");
    XMLEventReader reader = factory.createXMLEventReader(fileReader);

    while (reader.hasNext()) {
        XMLEvent event = reader.nextEvent();
        if (event.isStartElement()) {
            StartElement element = (StartElement) event;
            System.out.println("Start Element: " + element.getName());

            Iterator iterator = element.getAttributes();
            while (iterator.hasNext()) {
                Attribute attribute = (Attribute) iterator.next();
                QName name = attribute.getName();
                String value = attribute.getValue();
                System.out.println("Attribute name/value: " + name + "/" + value);
            }/*from  www  .j  a va2s.  c o m*/
        }
        if (event.isEndElement()) {
            EndElement element = (EndElement) event;
            System.out.println("End element:" + element.getName());
        }
        if (event.isCharacters()) {
            Characters characters = (Characters) event;
            System.out.println("Text: " + characters.getData());
        }
    }
}

From source file:edu.jhu.hlt.concrete.ingesters.bolt.BoltForumPostIngester.java

private static Section handleHeadline(final XMLEventReader rdr, final String content)
        throws XMLStreamException, ConcreteException {
    // The first type is always a document start event. Skip it.
    rdr.nextEvent();//from  w  ww. j  av  a2s.  c om

    // The second type is a document ID block. Skip it.
    rdr.nextEvent();

    // The third type is a whitespace block. Skip it.
    rdr.nextEvent();

    // The next type is a headline start tag.
    XMLEvent hl = rdr.nextEvent();
    StartElement hlse = hl.asStartElement();
    QName hlqn = hlse.getName();
    final String hlPart = hlqn.getLocalPart();
    LOGGER.debug("QN: {}", hlPart);
    int hlPartOff = hlse.getLocation().getCharacterOffset();
    LOGGER.debug("HL part offset: {}", hlPartOff);

    // Text of the headline. This would be useful for purely getting
    // the content, but for offsets, it's not that useful.
    Characters cc = rdr.nextEvent().asCharacters();
    int charOff = cc.getLocation().getCharacterOffset();
    int clen = cc.getData().length();

    // The next part is the headline end element. Skip.
    rdr.nextEvent();

    // Whitespace. Skip.
    rdr.nextEvent();

    // Reader is now pointing at the first post.
    // Construct section, text span, etc.
    final int charOffPlusLen = charOff + clen;

    // Strip whitespace off
    TextSpan ts;
    if (STRIP_WHITESPACE_OFF_HEADLINE) {
        final String hlText = content.substring(charOff, charOffPlusLen);
        SimpleImmutableEntry<Integer, Integer> pads = trimSpacing(hlText);
        ts = new TextSpan(charOff + pads.getKey(), charOffPlusLen - pads.getValue());
    } else {
        ts = new TextSpan(charOff, charOffPlusLen);
    }
    assert ts.getStart() <= ts.getEnding() : "ts=" + ts;

    Section s = new Section();
    s.setKind("headline");
    s.setTextSpan(ts);
    List<Integer> intList = new ArrayList<>();
    intList.add(0);
    s.setNumberList(intList);
    return s;
}

From source file:StaxEvent.java

private Characters getNewCharactersEvent(Characters event) {
    if (event.getData().equalsIgnoreCase("Name1")) {
        return m_eventFactory.createCharacters(Calendar.getInstance().getTime().toString());
    } else {/*from  w  ww . java  2  s .  c o  m*/
        return event;
    }
}

From source file:eu.peppol.document.PayloadParserTest.java

/**
 * Takes a file holding an SBD/SBDH with an ASiC archive in base64 as payload and extracts the ASiC archive in binary format, while
 * calculating the message digest.//from w w w.  j  av a  2 s . c o  m
 *
 * @throws Exception
 */
@Test
public void parseSampleSbdWithAsic() throws Exception {

    InputStream resourceAsStream = PayloadParserTest.class.getClassLoader()
            .getResourceAsStream("sample-sbd-with-asic.xml");
    assertNotNull(resourceAsStream);

    Path xmlFile = Files.createTempFile("unit-test", ".xml");

    XMLEventReader xmlEventReader = XMLInputFactory.newInstance().createXMLEventReader(resourceAsStream,
            "UTF-8");
    FileOutputStream outputStream = new FileOutputStream(xmlFile.toFile());
    XMLEventWriter xmlEventWriter = XMLOutputFactory.newInstance().createXMLEventWriter(outputStream, "UTF-8");

    Path asicFile = Files.createTempFile("unit-test", ".asice");
    OutputStream asicOutputStream = Files.newOutputStream(asicFile);
    MessageDigest messageDigest = MessageDigest.getInstance("SHA-256");

    DigestOutputStream digestOutputStream = new DigestOutputStream(asicOutputStream, messageDigest);
    Base64OutputStream base64OutputStream = new Base64OutputStream(digestOutputStream, false);

    boolean insideAsicElement = false;

    while (xmlEventReader.hasNext()) {
        XMLEvent xmlEvent = xmlEventReader.nextEvent();

        switch (xmlEvent.getEventType()) {
        case XMLEvent.START_ELEMENT:
            String localPart = xmlEvent.asStartElement().getName().getLocalPart();
            if ("asic".equals(localPart)) {
                insideAsicElement = true;
            }
            break;
        case XMLEvent.END_ELEMENT:
            localPart = xmlEvent.asEndElement().getName().getLocalPart();
            if ("asic".equals(localPart)) {
                insideAsicElement = false;
            }
            break;

        case XMLEvent.CHARACTERS:
            // Whenever we are inside the ASiC XML element, spit
            // out the base64 encoded data into the base64 decoding output stream.
            if (insideAsicElement) {
                Characters characters = xmlEvent.asCharacters();
                base64OutputStream.write(characters.getData().getBytes("UTF-8"));
            }
            break;
        }
        xmlEventWriter.add(xmlEvent);
    }

    asicOutputStream.close();
    outputStream.close();
    log.debug("Wrote xml output to: " + xmlFile);
    log.debug("Wrote ASiC to:" + asicFile);
    log.debug("Digest: " + new String(Base64.getEncoder().encode(messageDigest.digest())));
}

From source file:StAXEventTreeViewer.java

public void buildTree(DefaultTreeModel treeModel, DefaultMutableTreeNode current, File file)
        throws XMLStreamException, FileNotFoundException {

    XMLInputFactory inputFactory = XMLInputFactory.newInstance();
    XMLEventReader reader = inputFactory.createXMLEventReader(new FileInputStream(file));
    while (reader.hasNext()) {
        XMLEvent event = reader.nextEvent();
        switch (event.getEventType()) {
        case XMLStreamConstants.START_DOCUMENT:
            StartDocument startDocument = (StartDocument) event;
            DefaultMutableTreeNode version = new DefaultMutableTreeNode(startDocument.getVersion());
            current.add(version);//from w w w.  ja  va2 s. co m

            current.add(new DefaultMutableTreeNode(startDocument.isStandalone()));
            current.add(new DefaultMutableTreeNode(startDocument.standaloneSet()));
            current.add(new DefaultMutableTreeNode(startDocument.encodingSet()));
            current.add(new DefaultMutableTreeNode(startDocument.getCharacterEncodingScheme()));
            break;
        case XMLStreamConstants.START_ELEMENT:
            StartElement startElement = (StartElement) event;
            QName elementName = startElement.getName();

            DefaultMutableTreeNode element = new DefaultMutableTreeNode(elementName.getLocalPart());
            current.add(element);
            current = element;

            if (!elementName.getNamespaceURI().equals("")) {
                String prefix = elementName.getPrefix();
                if (prefix.equals("")) {
                    prefix = "[None]";
                }
                DefaultMutableTreeNode namespace = new DefaultMutableTreeNode(
                        "prefix=" + prefix + ",URI=" + elementName.getNamespaceURI());
                current.add(namespace);
            }

            for (Iterator it = startElement.getAttributes(); it.hasNext();) {
                Attribute attr = (Attribute) it.next();
                DefaultMutableTreeNode attribute = new DefaultMutableTreeNode("Attribute (name="
                        + attr.getName().getLocalPart() + ",value=" + attr.getValue() + "')");
                String attURI = attr.getName().getNamespaceURI();
                if (!attURI.equals("")) {
                    String attPrefix = attr.getName().getPrefix();
                    if (attPrefix.equals("")) {
                        attPrefix = "[None]";
                    }
                    attribute.add(new DefaultMutableTreeNode("prefix = " + attPrefix + ", URI = " + attURI));
                }
                current.add(attribute);
            }
            break;
        case XMLStreamConstants.END_ELEMENT:
            current = (DefaultMutableTreeNode) current.getParent();
            break;
        case XMLStreamConstants.CHARACTERS:
            Characters characters = (Characters) event;
            if (!characters.isIgnorableWhiteSpace() && !characters.isWhiteSpace()) {
                String data = characters.getData();
                if (data.length() != 0) {
                    current.add(new DefaultMutableTreeNode(characters.getData()));
                }
            }
            break;
        case XMLStreamConstants.DTD:
            DTD dtde = (DTD) event;
            current.add(new DefaultMutableTreeNode(dtde.getDocumentTypeDeclaration()));
        default:
            System.out.println(event.getClass().getName());
        }
    }
}

From source file:com.joliciel.frenchTreebank.upload.TreebankXmlReader.java

public void characters(Characters charactersEvent) {
    // add the characters to tempVal
    tempVal += charactersEvent.getData();
    ;
}

From source file:org.lieuofs.geo.territoire.biz.dao.EtatTerritoireFichierXmlDao.java

/**************************************************/

@PostConstruct/*from w  w w  .j  a v a 2s .  co  m*/
public void chargerResource()
        throws IOException, XMLStreamException, FactoryConfigurationError, ParseException {
    XMLEventReader reader = XMLInputFactory.newInstance().createXMLEventReader(fichier.getInputStream(),
            this.charsetName);
    EtatTerritoirePersistant etatTerritoire = null;
    while (reader.hasNext()) {
        XMLEvent event = (XMLEvent) reader.next();
        if (event.isStartElement()) {
            StartElement element = (StartElement) event;
            String nomElem = element.getName().toString();
            if ("country".equals(nomElem)) {
                etatTerritoire = new EtatTerritoirePersistant();
            } else if ("id".equals(nomElem)) {
                Characters characters = (Characters) reader.next();
                etatTerritoire.setNumeroOFS(Integer.parseInt(characters.getData()));
            } else if ("unId".equals(nomElem)) {
                Characters characters = (Characters) reader.next();
                if (null == etatTerritoire.getInfosISO())
                    etatTerritoire.setInfosISO(new InfosONUetISO3166());
                etatTerritoire.getInfosISO().setCodeNumeriqueONU(Integer.parseInt(characters.getData()));
            } else if ("iso2Id".equals(nomElem)) {
                Characters characters = (Characters) reader.next();
                if (null == etatTerritoire.getInfosISO())
                    etatTerritoire.setInfosISO(new InfosONUetISO3166());
                etatTerritoire.getInfosISO().setCodeIsoAlpha2(characters.getData());
            } else if ("iso3Id".equals(nomElem)) {
                Characters characters = (Characters) reader.next();
                if (null == etatTerritoire.getInfosISO())
                    etatTerritoire.setInfosISO(new InfosONUetISO3166());
                etatTerritoire.getInfosISO().setCodeIsoAlpha3(characters.getData());
            } else if ("shortNameDe".equals(nomElem)) {
                Characters characters = (Characters) reader.next();
                etatTerritoire.ajouterFormeCourte("de", characters.getData());
            } else if ("shortNameFr".equals(nomElem)) {
                Characters characters = (Characters) reader.next();
                etatTerritoire.ajouterFormeCourte("fr", characters.getData());
            } else if ("shortNameIt".equals(nomElem)) {
                Characters characters = (Characters) reader.next();
                etatTerritoire.ajouterFormeCourte("it", characters.getData());
            } else if ("shortNameEn".equals(nomElem)) {
                Characters characters = (Characters) reader.next();
                etatTerritoire.ajouterFormeCourte("en", characters.getData());
            } else if ("officialNameDe".equals(nomElem)) {
                Characters characters = (Characters) reader.next();
                etatTerritoire.ajouterDesignationOfficielle("de", characters.getData());
            } else if ("officialNameFr".equals(nomElem)) {
                Characters characters = (Characters) reader.next();
                etatTerritoire.ajouterDesignationOfficielle("fr", characters.getData());
            } else if ("officialNameIt".equals(nomElem)) {
                Characters characters = (Characters) reader.next();
                etatTerritoire.ajouterDesignationOfficielle("it", characters.getData());
            } else if ("continent".equals(nomElem)) {
                Characters characters = (Characters) reader.next();
                etatTerritoire.setNumContinent(Integer.parseInt(characters.getData()));
            } else if ("region".equals(nomElem)) {
                Characters characters = (Characters) reader.next();
                etatTerritoire.setNumRegion(Integer.parseInt(characters.getData()));
            } else if ("state".equals(nomElem)) {
                Characters characters = (Characters) reader.next();
                etatTerritoire.setEtat(Boolean.valueOf(characters.getData()));
            } else if ("areaState".equals(nomElem)) {
                Characters characters = (Characters) reader.next();
                etatTerritoire.setNumEtatRattachement(Integer.parseInt(characters.getData()));
            } else if ("unMember".equals(nomElem)) {
                Characters characters = (Characters) reader.next();
                etatTerritoire.setMembreONU(Boolean.valueOf(characters.getData()));
            } else if ("unEntryDate".equals(nomElem)) {
                Characters characters = (Characters) reader.next();
                etatTerritoire.setDateEntreeONU(dateFmt.parse(characters.getData()));
            } else if ("recognizedCh".equals(nomElem)) {
                Characters characters = (Characters) reader.next();
                etatTerritoire.setReconnuSuisse(Boolean.valueOf(characters.getData()));
            } else if ("recognizedDate".equals(nomElem)) {
                Characters characters = (Characters) reader.next();
                etatTerritoire.setDateReconnaissance(dateFmt.parse(characters.getData()));
            } else if ("remarkDe".equals(nomElem)) {
                Characters characters = (Characters) reader.next();
                etatTerritoire.ajouterRemarque("de", characters.getData());
            } else if ("remarkFr".equals(nomElem)) {
                Characters characters = (Characters) reader.next();
                etatTerritoire.ajouterRemarque("fr", characters.getData());
            } else if ("remarkIt".equals(nomElem)) {
                Characters characters = (Characters) reader.next();
                etatTerritoire.ajouterRemarque("it", characters.getData());
            } else if ("entryValid".equals(nomElem)) {
                Characters characters = (Characters) reader.next();
                etatTerritoire.setValide(Boolean.valueOf(characters.getData()));
            } else if ("dateOfChange".equals(nomElem)) {
                Characters characters = (Characters) reader.next();
                etatTerritoire.setDateDernierChangement(dateFmt.parse(characters.getData()));
            }
        } else if (event.isEndElement()) {
            EndElement element = (EndElement) event;
            if ("country".equals(element.getName().toString())) {
                stockerEtatTerritoire(etatTerritoire);
            }
        }
    }
}

From source file:com.prowidesoftware.swift.model.mx.XmlEventWriter.java

public void add(final XMLEvent event) throws XMLStreamException {
    if (event != null) {
        log.finest("XmlEventType: " + event.getEventType());
        try {/*from ww  w .  j  av a  2s .com*/
            final int type = event.getEventType();
            switch (type) {
            case XMLEvent.START_DOCUMENT:
                if (this.includeXMLDeclaration) {
                    log.finer(">> START_DOCUMENT");
                    log.finer("START_DOCUMENT XMLEvent " + ToStringBuilder.reflectionToString(event));
                    final String str = "<?xml version=\"1.0\" encoding=\""
                            + ((StartDocument) event).getCharacterEncodingScheme() + "\"?>";
                    out.write(str);
                    logStep(str);
                } else {
                    log.finer("skipping xml declaration");
                }
                break;

            case XMLEvent.START_ELEMENT:
                this.startElementCount++;
                closeStartTagIfNeeded();
                log.finer(">> START_ELEMENT");
                indent.append(' ');
                final StartElement se = event.asStartElement();
                @SuppressWarnings("rawtypes")
                final Iterator it = se.getNamespaces();
                while (it.hasNext()) {
                    log.fine("ns: " + it.next());
                }
                /*---------------------------------------------------------------------------------------
                 * 2015.03 miguel
                 * Cuidado con esta condicion! esto generaba el bug de que no abria el Document anidado dentro del xs:any
                 * Esto es porque este document delayed solo se completa cuando recibe un namespace, pensado como elemento inicial
                 * esto DEEEEBEEEEEEEEEEe corregirse cuando se cambie la serializacion, si se cambia
                 * porque si el document queda dentro de un elemento payload, entonces en count es != 1 y debe revisarse como se identifica el primer 
                 * document y un document anidado.
                 *  
                 */
                if (StringUtils.equals(se.getName().getLocalPart(), this.rootElement)
                        && this.startElementCount == 1) { // 2015.03 miguel: ESTE era el bug de esprow, que aparecian tags anidados de document cerrando que no abria, era porque entraban por aca sin esta condicion de depth count
                    delayedStart = se;
                    log.finer("local part is Document, initializing delayed start, startElementCount="
                            + this.startElementCount);
                } else {
                    final String s = "\n" + indent + "<" + prefix() + se.getName().getLocalPart() /* + ">" */;
                    out.write(s);

                    logStep(s);

                    /* 2014.11 miguel
                     * para soportar atributos en lugar de cerrar aca seteamos un flag para indicar 
                     * que hace falta cerrar el startTag
                     */
                    startTagIncomplete = true;
                    if (se.isNamespace()) {
                        log.fine("is ns in start XMLEvent " + ToStringBuilder.reflectionToString(event));
                    }
                }
                break;

            case XMLEvent.NAMESPACE:
                log.finer(">> NAMESPACE");
                final Namespace ne = (Namespace) event;
                if (delayedStart != null) {
                    final String s = "\n" + indent + "<" + prefix() + delayedStart.getName().getLocalPart()
                            + " " + "xmlns" + (this.prefix != null ? ":" + this.prefix : "") + "=\""
                            + ne.getValue() + "\" xmlns:xsi=\"" + ne.getName() + "\"" + ">";
                    out.write(s);
                    logStep(s);
                    delayedStart = null;
                } else {
                    log.fine("NAMESPACE XMLEvent " + ToStringBuilder.reflectionToString(event));
                }
                break;

            case XMLEvent.CHARACTERS:
                log.finer(">> CHARACTERS");
                closeStartTagIfNeeded();
                final Characters ce = event.asCharacters();
                final char[] arr = ce.getData().toCharArray();
                out.write(escape(arr));
                logStep(ce.getData());
                break;

            case XMLEvent.END_ELEMENT:
                log.finer(">> END_ELEMENT");
                closeStartTagIfNeeded();
                indent.deleteCharAt(0);
                final EndElement ee = event.asEndElement();
                final String str2 = "</" + prefix() + ee.getName().getLocalPart() + ">\n" + indent;
                out.write(str2);
                logStep(str2);
                break;

            case XMLEvent.END_DOCUMENT:
                log.finer(">> END_DOCUMENT");
                closeStartTagIfNeeded();
                /*  2014.10 miguel
                 *  No need to do anything while writing to a string 
                 */
                log.finer("END_DOCUMENT XMLEvent " + ToStringBuilder.reflectionToString(event));
                break;

            case XMLEvent.ATTRIBUTE:
                log.finer(">> ATTRIBUTE");
                final Attribute a = (Attribute) event;
                final String str3 = " " + a.getName() + "=\"" + a.getValue() + "\" ";
                out.write(str3);
                log.fine(ToStringBuilder.reflectionToString(a));
                logStep(str3);
                break;

            default:
                log.info("getEventType " + event.getEventType());
                log.info("PW Unhandled XMLEvent " + ToStringBuilder.reflectionToString(event));
                break;
            }
        } catch (IOException e) {
            log.log(Level.SEVERE, "PW I/O error: " + e);
            log.log(Level.FINER, "PW I/O error: ", e);
            throw new XMLStreamException(e);
        }
    }
}

From source file:com.google.code.activetemplates.impl.TemplateCompilerImpl.java

private void doCompile(String name, CompileContext cc) throws XMLStreamException {

    while (cc.hasNextEvent()) {

        XMLEvent e = cc.nextEvent();

        //Location loc = e.getLocation();

        if (e.isAttribute()) {
            //System.out.println("Adding " + e);

            // attributes added during tag processing and under the same tag
            // get handled here, outcome is always PROCESS_ALL

            Attribute a = (Attribute) e;
            if (h.isAttributeHandled(a.getName())) {
                h.processAttribute(cc, a);
            } else {
                String value = a.getValue();
                String nvalue = processText(cc, value);
                if (nvalue != null) {
                    a = cc.getElementFactory().createAttribute(a.getName(), nvalue);
                }/*from   ww w .  j  a v  a  2s  . c o m*/
                //System.out.println("Adding " + e);
                cc.getWriter().add(a);
            }

        } else if (e.isStartElement()) {

            StartElement se = e.asStartElement();

            Processing processing = Processing.DEFAULT;

            // collect namespaces
            @SuppressWarnings("unchecked")
            Iterator<Namespace> nsit = se.getNamespaces();
            List<Namespace> namespaces = new ArrayList<Namespace>();

            while (nsit.hasNext()) {
                Namespace ns = nsit.next();
                if (excludedNamespaces.contains(ns.getNamespaceURI())) {
                    processing = Processing.REPLACE;
                } else {
                    namespaces.add(ns);
                }
            }

            // collect attributes
            @SuppressWarnings("unchecked")
            Iterator<Attribute> it = se.getAttributes();
            List<Attribute> attributes = new LinkedList<Attribute>();
            while (it.hasNext()) {
                attributes.add(it.next());
            }

            // collect any separate attribute and namespace xml events
            while (cc.hasNextEvent()) {
                if (cc.peekEvent().isNamespace()) {
                    namespaces.add((Namespace) cc.nextEvent());
                    processing = Processing.REPLACE;
                } else if (cc.peekEvent().isAttribute()) {
                    attributes.add((Attribute) cc.nextEvent());
                    processing = Processing.REPLACE;
                } else {
                    break;
                }
            }

            // preprocess attributes
            it = attributes.iterator();
            attributes = new ArrayList<Attribute>();

            while (it.hasNext() && processing != Processing.SKIP) {
                Attribute a = it.next();

                if (h.isAttributeHandled(a.getName())) {
                    processing = Processing.REPLACE;

                    AttributeHandler.Outcome o = h.processAttribute(cc, a);
                    if (o == Outcome.PROCESS_NONE) {
                        processing = Processing.SKIP;
                    }

                } else {
                    String value = a.getValue();
                    String nvalue = processText(cc, value);
                    if (nvalue != null) {
                        a = cc.getElementFactory().createAttribute(a.getName(), nvalue);
                        processing = Processing.REPLACE;
                    }

                    attributes.add(a);
                }
            }

            if (processing == Processing.SKIP) {

                skipChildren(cc, false);

            } else {

                if (processing == Processing.REPLACE) {
                    // replace element with new one
                    se = cc.getElementFactory().createStartElement(se.getName(), attributes.iterator(),
                            namespaces.iterator());
                }

                // handle start element
                if (h.isElementHandled(se.getName())) {
                    ElementHandler.Outcome o = h.processStartElement(cc, se);
                    cc.flushEventQueue();
                    switch (o) {
                    case PROCESS_SIBLINGS:
                        skipChildren(cc, true);
                        break;
                    }
                } else {
                    //System.out.println("Adding " + se);
                    cc.getWriter().add(se);
                    cc.flushEventQueue(); // flush events added by any attribute handlers
                }
            }

        } else if (e.isEndElement()) {

            // handle end element
            if (h.isElementHandled(e.asEndElement().getName())) {
                h.processEndElement(cc, e.asEndElement());
                cc.flushEventQueue();
            } else {
                //System.out.println("Adding " + e);
                cc.getWriter().add(e);
            }

        } else if (e.isCharacters()) {

            // process text
            Characters ce = e.asCharacters();
            String s = ce.getData();
            String ns = processText(cc, s);
            if (ns != null) {
                ce = cc.getElementFactory().createCharacters(ns);
            }
            //System.out.println("Adding " + e);
            cc.getWriter().add(ce);

        }

    }

}

From source file:edu.jhu.hlt.concrete.ingesters.webposts.WebPostIngester.java

private Section handleBeginning(final XMLEventReader rdr, final String content, final Communication cptr)
        throws XMLStreamException, ConcreteException {
    // The first type is always a document start event. Skip it.
    rdr.nextEvent();/*  ww  w .j  a  va 2s. c om*/

    // The second type is a document block. Skip it.
    rdr.nextEvent();

    // The third type is a whitespace block. Skip it.
    rdr.nextEvent();

    // The next type is a docid start tag.
    rdr.nextEvent();

    // Text of the docid.
    Characters cc = rdr.nextEvent().asCharacters();
    String idTxt = cc.getData().trim();
    cptr.setId(idTxt);

    // The next part is the docid end element. Skip.
    rdr.nextEvent();

    // Whitespace. Skip.
    rdr.nextEvent();

    // Reader is now pointing at the doctype.
    // XMLEvent doctypeStart = rdr.nextEvent();
    rdr.nextEvent();
    // StartElement dtse = doctypeStart.asStartElement();

    // Doc type content.
    Characters docTypeChars = rdr.nextEvent().asCharacters();
    String docTypeContent = docTypeChars.getData().trim();
    cptr.setType(docTypeContent);

    // Doctype end. Skip.
    rdr.nextEvent();
    // Whitespace. skip.
    rdr.nextEvent();
    // Datetime start.
    rdr.nextEvent();

    // Datetime value.
    Characters dtChars = rdr.nextEvent().asCharacters();
    // TODO: parse this

    String dtValue = dtChars.getData().trim();

    DateTime dt = this.dtf.parseDateTime(dtValue).toDateTime(DateTimeZone.UTC);
    LOGGER.debug("Got DateTime: {}", dt.toString());
    long millis = dt.getMillis();
    cptr.setStartTime(millis / 1000);

    // Datetime end.
    rdr.nextEvent();
    // WS
    rdr.nextEvent();
    // Body begin.
    rdr.nextEvent();
    // WS
    rdr.nextEvent();

    // Headline begin.
    XMLEvent hl = rdr.nextEvent();
    StartElement hlse = hl.asStartElement();
    QName hlqn = hlse.getName();
    final String hlPart = hlqn.getLocalPart();
    LOGGER.debug("QN: {}", hlPart);

    // Headline text.
    Characters hlChars = rdr.nextEvent().asCharacters();
    final int charOff = hlChars.getLocation().getCharacterOffset();
    final int clen = hlChars.getData().length();

    // Construct section, text span, etc.
    final int endTextOffset = charOff + clen;
    final String hlText = content.substring(charOff, endTextOffset);

    SimpleImmutableEntry<Integer, Integer> pads = trimSpacing(hlText);
    TextSpan ts = new TextSpan(charOff + pads.getKey(), endTextOffset - pads.getValue());

    Section s = new Section();
    s.setKind("headline");
    s.setTextSpan(ts);
    List<Integer> intList = new ArrayList<>();
    intList.add(0);
    s.setNumberList(intList);
    return s;
}