eu.interedition.text.xml.XMLParser.java Source code

Java tutorial

Introduction

Here is the source code for eu.interedition.text.xml.XMLParser.java

Source

/*
 * #%L
 * Text: A text model with range-based markup via standoff annotations.
 * %%
 * Copyright (C) 2010 - 2011 The Interedition Development Group
 * %%
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * #L%
 */
package eu.interedition.text.xml;

import com.google.common.base.Preconditions;
import com.google.common.base.Throwables;
import com.google.common.io.Closeables;
import eu.interedition.text.Text;
import eu.interedition.text.TextConsumer;
import eu.interedition.text.TextRepository;

import javax.xml.stream.*;
import java.io.IOException;
import java.io.Reader;
import java.util.Stack;

public class XMLParser {
    private final XMLInputFactory xmlInputFactory = XML.createXMLInputFactory();

    private TextRepository textRepository;

    public void setTextRepository(TextRepository textRepository) {
        this.textRepository = textRepository;
    }

    public Text parse(Text source, final XMLParserConfiguration configuration)
            throws IOException, XMLStreamException {
        Preconditions.checkArgument(source.getType() == Text.Type.XML);
        final Text target = textRepository.create(Text.Type.TXT);
        final XMLParserState state = new XMLParserState(source, target, configuration);
        try {
            textRepository.read(source, new XMLParserTextConsumer(state));
            Reader textReader = null;
            try {
                return textRepository.write(state.getTarget(), textReader = state.readText());
            } finally {
                Closeables.close(textReader, false);
            }
        } catch (Throwable t) {
            Throwables.propagateIfInstanceOf(t, IOException.class);
            Throwables.propagateIfInstanceOf(Throwables.getRootCause(t), XMLStreamException.class);
            throw Throwables.propagate(t);
        }
    }

    private class XMLParserTextConsumer implements TextConsumer {
        private final XMLParserState state;

        public XMLParserTextConsumer(XMLParserState state) {
            this.state = state;
        }

        public void read(Reader content, long contentLength) throws IOException {
            XMLStreamReader reader = null;
            final Stack<XMLEntity> entities = new Stack<XMLEntity>();
            try {
                reader = xmlInputFactory.createXMLStreamReader(content);
                state.start();
                while (reader.hasNext()) {
                    final int event = reader.next();
                    state.mapOffsetDelta(0, reader.getLocation().getCharacterOffset() - state.getSourceOffset());

                    switch (event) {
                    case XMLStreamConstants.START_ELEMENT:
                        state.endText();
                        state.nextSibling();
                        state.start(entities.push(XMLEntity.newElement(reader)));
                        break;
                    case XMLStreamConstants.END_ELEMENT:
                        state.endText();
                        state.end(entities.pop());
                        break;
                    case XMLStreamConstants.COMMENT:
                        state.endText();
                        state.nextSibling();
                        state.emptyEntity(XMLEntity.newComment(reader));
                        break;
                    case XMLStreamConstants.PROCESSING_INSTRUCTION:
                        state.endText();
                        state.nextSibling();
                        state.emptyEntity(XMLEntity.newPI(reader));
                        break;
                    case XMLStreamConstants.CHARACTERS:
                    case XMLStreamConstants.ENTITY_REFERENCE:
                    case XMLStreamConstants.CDATA:
                        state.newText(reader.getText());
                        break;
                    }
                }

                state.end();
            } catch (XMLStreamException e) {
                throw Throwables.propagate(e);
            } finally {
                if (reader != null) {
                    try {
                        reader.close();
                    } catch (XMLStreamException e) {
                    }
                }
            }
        }
    }
}