eu.interedition.text.xml.XMLParserState.java Source code

Java tutorial

Introduction

Here is the source code for eu.interedition.text.xml.XMLParserState.java

Source

/*
 * #%L
 * Text: A text model with range-based markup via standoff annotations.
 * %%
 * Copyright (C) 2010 - 2011 The Interedition Development Group
 * %%
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * #L%
 */
package eu.interedition.text.xml;

import com.google.common.base.Throwables;
import com.google.common.io.FileBackedOutputStream;
import eu.interedition.text.Range;
import eu.interedition.text.Text;
import eu.interedition.text.TextConstants;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.Collections;
import java.util.List;
import java.util.Stack;

/**
 * @author <a href="http://gregor.middell.net/" title="Homepage">Gregor Middell</a>
 */
public class XMLParserState {
    private static final Logger LOG = LoggerFactory.getLogger(XMLParserState.class);

    private final Text source;
    private final Text target;
    private final XMLParserConfiguration configuration;

    private final List<XMLParserModule> modules;

    private final Stack<XMLEntity> elementContext = new Stack<XMLEntity>();
    private final Stack<Boolean> inclusionContext = new Stack<Boolean>();
    private final Stack<Boolean> spacePreservationContext = new Stack<Boolean>();
    private final Stack<Integer> nodePath = new Stack<Integer>();

    private final FileBackedOutputStream textBuffer;

    private long textStartOffset = -1;
    private char lastChar;

    private long sourceOffset = 0;
    private long textOffset = 0;
    private Range sourceOffsetRange = Range.NULL;
    private Range textOffsetRange = Range.NULL;

    XMLParserState(Text source, Text target, XMLParserConfiguration configuration) {
        this.source = source;
        this.target = target;
        this.configuration = configuration;
        this.modules = configuration.getModules();
        this.textBuffer = new FileBackedOutputStream(configuration.getTextBufferSize(), true);
        this.lastChar = (configuration.isRemoveLeadingWhitespace() ? ' ' : 0);
    }

    public Text getSource() {
        return source;
    }

    public Text getTarget() {
        return target;
    }

    public XMLParserConfiguration getConfiguration() {
        return configuration;
    }

    public List<XMLParserModule> getModules() {
        return Collections.unmodifiableList(modules);
    }

    public Stack<Boolean> getInclusionContext() {
        return inclusionContext;
    }

    public boolean isIncluded() {
        return inclusionContext.isEmpty() || inclusionContext.peek();
    }

    public Stack<Boolean> getSpacePreservationContext() {
        return spacePreservationContext;
    }

    public boolean isSpacePreserved() {
        return !spacePreservationContext.isEmpty() && spacePreservationContext.peek();
    }

    public Stack<XMLEntity> getElementContext() {
        return elementContext;
    }

    public boolean isContainerElement() {
        return !elementContext.isEmpty() && configuration.isContainerElement(elementContext.peek());
    }

    public boolean isLineElement() {
        return !elementContext.isEmpty() && configuration.isLineElement(elementContext.peek());
    }

    public boolean isNotable() {
        return !elementContext.isEmpty() && configuration.isNotable(elementContext.peek());
    }

    public Stack<Integer> getNodePath() {
        return nodePath;
    }

    public long getTextOffset() {
        return textOffset;
    }

    public long getSourceOffset() {
        return sourceOffset;
    }

    public long getTextStartOffset() {
        return textStartOffset;
    }

    public void insert(String text, boolean fromSource) {
        if (LOG.isTraceEnabled()) {
            LOG.trace("Inserting Text: '" + text.replaceAll("[\r\n]+", "\\\\n") + "' ("
                    + (fromSource ? "from source" : "generated") + ")");
        }
        try {
            final int textLength = text.length();
            final StringBuilder inserted = new StringBuilder();
            if (fromSource) {
                final boolean preserveSpace = isSpacePreserved();
                for (int cc = 0; cc < textLength; cc++) {
                    char currentChar = text.charAt(cc);
                    if (!preserveSpace && configuration.isCompressingWhitespace()
                            && Character.isWhitespace(lastChar) && Character.isWhitespace(currentChar)) {
                        mapOffsetDelta(0, 1);
                        continue;
                    }
                    if (currentChar == '\n' || currentChar == '\r') {
                        currentChar = ' ';
                    }
                    textBuffer.write(Character.toString(lastChar = currentChar).getBytes(Text.CHARSET));
                    inserted.append(lastChar);
                    mapOffsetDelta(1, 1);
                }
            } else {
                textBuffer.write(text.getBytes(Text.CHARSET));
                inserted.append(text);
                mapOffsetDelta(inserted.length(), 0);
            }

            final String insertedStr = inserted.toString();
            for (XMLParserModule m : configuration.getModules()) {
                m.insertText(text, insertedStr, this);
            }
        } catch (IOException e) {
            throw Throwables.propagate(e);
        }
    }

    public Reader readText() throws IOException {
        textBuffer.flush();
        return new InputStreamReader(textBuffer.getSupplier().getInput(), Text.CHARSET);
    }

    void start() {
        if (LOG.isTraceEnabled()) {
            LOG.trace("Start of document");
        }

        this.nodePath.push(0);
        for (XMLParserModule m : modules) {
            m.start(this);
        }
    }

    void end() {
        emitOffsetMapping();
        if (LOG.isTraceEnabled()) {
            LOG.trace("End of document");
        }
        for (XMLParserModule m : modules) {
            m.end(this);
        }
        this.nodePath.pop();
    }

    void start(XMLEntity entity) {
        if (LOG.isTraceEnabled()) {
            LOG.trace("Start of " + entity);
        }

        final boolean parentIncluded = (inclusionContext.isEmpty() ? true : inclusionContext.peek());
        inclusionContext.push(parentIncluded ? !configuration.excluded(entity) : configuration.included(entity));

        spacePreservationContext.push(spacePreservationContext.isEmpty() ? false : spacePreservationContext.peek());
        final String xmlSpace = entity.getAttributes().get(TextConstants.XML_SPACE_ATTR_NAME);
        if (xmlSpace != null) {
            spacePreservationContext.pop();
            spacePreservationContext.push("preserve".equalsIgnoreCase(xmlSpace));
        }

        nodePath.push(0);
        elementContext.push(entity);

        for (XMLParserModule m : modules) {
            m.start(entity, this);
        }
    }

    void end(XMLEntity entity) {
        if (LOG.isTraceEnabled()) {
            LOG.trace("End of " + entity);
        }

        for (XMLParserModule m : modules) {
            m.end(entity, this);
        }

        elementContext.pop();
        nodePath.pop();
        spacePreservationContext.pop();
        inclusionContext.pop();
    }

    void emptyEntity(XMLEntity entity) {
        start(entity);
        end(entity);
    }

    void nextSibling() {
        if (LOG.isTraceEnabled()) {
            LOG.trace("Next sibling");
        }

        nodePath.push(nodePath.pop() + 1);
    }

    void endText() {
        if (textStartOffset >= 0 && textOffset > textStartOffset) {
            if (LOG.isTraceEnabled()) {
                LOG.trace("End of text node");
            }
            for (XMLParserModule m : modules) {
                m.endText(this);
            }
        }
        textStartOffset = -1;
    }

    void newText(String text) throws IOException {
        if (textStartOffset < 0) {
            nextSibling();
            textStartOffset = textOffset;

            if (LOG.isTraceEnabled()) {
                LOG.trace("Start of text node");
            }
            for (XMLParserModule m : modules) {
                m.startText(this);
            }
        }

        if (LOG.isTraceEnabled()) {
            LOG.trace("Text: '" + text.replaceAll("[\r\n]+", "\\\\n") + "'");
        }
        for (XMLParserModule m : modules) {
            m.text(text, this);
        }
    }

    void mapOffsetDelta(long addToText, long addToSource) {
        if (addToText == 0 && addToSource == 0) {
            return;
        }

        if (LOG.isTraceEnabled()) {
            LOG.trace("Moving offsets: text += " + addToText + "; source += " + addToSource);
        }

        final long textOffsetRangeLength = textOffsetRange.length();
        final long sourceOffsetRangeLength = sourceOffsetRange.length();

        if (addToText == 0 && textOffsetRangeLength == 0) {
            sourceOffsetRange = new Range(sourceOffsetRange.getStart(), sourceOffsetRange.getEnd() + addToSource);
        } else if (addToSource == 0 && sourceOffsetRangeLength == 0) {
            textOffsetRange = new Range(textOffsetRange.getStart(), textOffsetRange.getEnd() + addToText);
        } else if (textOffsetRangeLength == sourceOffsetRangeLength && addToText == addToSource) {
            sourceOffsetRange = new Range(sourceOffsetRange.getStart(), sourceOffsetRange.getEnd() + addToSource);
            textOffsetRange = new Range(textOffsetRange.getStart(), textOffsetRange.getEnd() + addToText);
        } else {
            emitOffsetMapping();
            sourceOffsetRange = new Range(sourceOffsetRange.getEnd(), sourceOffsetRange.getEnd() + addToSource);
            textOffsetRange = new Range(textOffsetRange.getEnd(), textOffsetRange.getEnd() + addToText);
        }

        this.textOffset += addToText;
        this.sourceOffset += addToSource;
    }

    void emitOffsetMapping() {
        if (textOffsetRange.length() == 0 && sourceOffsetRange.length() == 0) {
            return;
        }

        if (LOG.isTraceEnabled()) {
            LOG.trace("New offset mapping: text = " + textOffsetRange + "==> source += " + sourceOffsetRange);
        }
        for (XMLParserModule m : modules) {
            m.offsetMapping(this, textOffsetRange, sourceOffsetRange);
        }
    }
}