eu.interedition.text.Text.java Source code

Java tutorial

Introduction

Here is the source code for eu.interedition.text.Text.java

Source

/*
 * #%L
 * Text: A text model with range-based markup via standoff annotations.
 * %%
 * Copyright (C) 2010 - 2011 The Interedition Development Group
 * %%
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * #L%
 */
package eu.interedition.text;

import com.google.common.base.Objects;
import com.google.common.base.Preconditions;
import com.google.common.base.Throwables;
import com.google.common.collect.Maps;
import com.google.common.io.CharStreams;
import com.google.common.io.Closeables;
import com.google.common.io.FileBackedOutputStream;
import com.google.common.io.InputSupplier;
import eu.interedition.text.util.TextDigestingFilterReader;
import eu.interedition.text.xml.XML;
import org.codehaus.stax2.XMLInputFactory2;
import org.codehaus.stax2.XMLOutputFactory2;
import org.hibernate.Hibernate;
import org.hibernate.Session;

import javax.persistence.Column;
import javax.persistence.Entity;
import javax.persistence.Enumerated;
import javax.persistence.GeneratedValue;
import javax.persistence.Id;
import javax.persistence.JoinColumn;
import javax.persistence.Lob;
import javax.persistence.ManyToOne;
import javax.persistence.Table;
import javax.xml.stream.XMLEventReader;
import javax.xml.stream.XMLEventWriter;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import javax.xml.stream.XMLStreamWriter;
import java.io.BufferedReader;
import java.io.FilterReader;
import java.io.FilterWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.Reader;
import java.io.Writer;
import java.nio.charset.Charset;
import java.sql.Clob;
import java.sql.SQLException;
import java.util.SortedMap;
import java.util.SortedSet;

import static eu.interedition.text.util.TextDigestingFilterReader.NULL_DIGEST;

@Entity
@Table(name = "interedition_text")
public class Text {
    public static final Charset CHARSET = Charset.forName("UTF-8");

    private static int MEMORY_BUFFER_THRESHOLD = 1001024;

    private static final XMLInputFactory2 XML_INPUT_FACTORY = XML.createXMLInputFactory();
    private static final XMLOutputFactory2 XML_OUTPUT_FACTORY = XML.createXMLOutputFactory();

    public enum Type {
        TXT, XML
    }

    protected long id;
    protected Annotation layer;
    protected Type type;
    protected long length;
    protected byte[] digest;
    protected Clob content;

    @Id
    @GeneratedValue
    public long getId() {
        return id;
    }

    public void setId(long id) {
        this.id = id;
    }

    @ManyToOne
    @JoinColumn(name = "layer_id")
    public Annotation getLayer() {
        return layer;
    }

    public void setLayer(Annotation layer) {
        this.layer = layer;
    }

    @Enumerated
    @Column(name = "text_type", nullable = false)
    public Type getType() {
        return type;
    }

    public void setType(Type type) {
        this.type = type;
    }

    @Column(name = "content_length", nullable = false)
    public long getLength() {
        return length;
    }

    public void setLength(long length) {
        this.length = length;
    }

    @Column(name = "content_digest", length = 64)
    public byte[] getDigest() {
        return digest;
    }

    public void setDigest(byte[] digest) {
        this.digest = digest;
    }

    @Lob
    public Clob getContent() {
        return content;
    }

    public void setContent(Clob content) {
        this.content = content;
    }

    public InputSupplier<Reader> read() {
        return new InputSupplier<Reader>() {
            @Override
            public Reader getInput() throws IOException {
                try {
                    return content.getCharacterStream();
                } catch (SQLException e) {
                    throw Throwables.propagate(e);
                }
            }
        };
    }

    public InputSupplier<Reader> read(final TextRange range) {
        return new InputSupplier<Reader>() {
            @Override
            public Reader getInput() throws IOException {
                try {
                    return new RangeFilteringReader(content.getCharacterStream(), range);
                } catch (SQLException e) {
                    throw Throwables.propagate(e);
                }
            }
        };
    }

    public SortedMap<TextRange, String> read(final SortedSet<TextRange> ranges) throws IOException {
        try {
            final SortedMap<TextRange, String> results = Maps.newTreeMap();
            for (TextRange range : ranges) {
                results.put(range, content.getSubString(range.getStart() + 1, (int) range.length()));
            }
            return results;
        } catch (SQLException e) {
            throw Throwables.propagate(e);
        }
    }

    public void read(final XMLStreamWriter xml) throws IOException, XMLStreamException {
        Preconditions.checkArgument(getType() == Text.Type.XML);
        Reader xmlStream = null;
        XMLEventReader xmlReader = null;
        XMLEventWriter xmlWriter = null;
        try {
            xmlReader = XML_INPUT_FACTORY.createXMLEventReader(xmlStream = read().getInput());
            xmlWriter = XML_OUTPUT_FACTORY.createXMLEventWriter(xml);
            xmlWriter.add(xmlReader);
        } catch (XMLStreamException e) {
            throw Throwables.propagate(e);
        } finally {
            XML.closeQuietly(xmlWriter);
            XML.closeQuietly(xmlReader);
            Closeables.close(xmlStream, false);
        }
    }

    public Text write(Session session, Reader content) throws IOException {
        final FileBackedOutputStream buf = createBuffer();
        CountingWriter tempWriter = null;
        try {
            CharStreams.copy(content, tempWriter = new CountingWriter(new OutputStreamWriter(buf, Text.CHARSET)));
        } finally {
            Closeables.close(tempWriter, false);
        }

        Reader bufReader = null;
        try {
            return write(session, bufReader = new InputStreamReader(buf.getSupplier().getInput(), Text.CHARSET),
                    tempWriter.length);
        } finally {
            Closeables.close(bufReader, false);
        }
    }

    public Text write(Session session, Reader contents, long contentLength) throws IOException {
        Text text = (Text) session.merge(this);

        final TextDigestingFilterReader digestingFilterReader = new TextDigestingFilterReader(
                new BufferedReader(contents));
        text.setLength(contentLength);
        text.setContent(Hibernate.getLobCreator(session).createClob(digestingFilterReader, contentLength));

        session.flush();
        session.refresh(text);

        text.setDigest(digestingFilterReader.digest());

        return text;
    }

    @Override
    public String toString() {
        return Objects.toStringHelper(this).addValue(layer).add("type", type).add("length", length)
                .add("id", Long.toString(id)).toString();
    }

    @Override
    public boolean equals(Object obj) {
        if (id != 0 && obj != null && obj instanceof Text) {
            return id == ((Text) obj).id;
        }
        return super.equals(obj);
    }

    @Override
    public int hashCode() {
        return (id == 0 ? super.hashCode() : Objects.hashCode(id));
    }

    public static Text create(Session session, Annotation layer, Text.Type type) {
        Text text = new Text();
        text.setLayer(layer);
        text.setType(type);
        text.setContent(Hibernate.getLobCreator(session).createClob(""));
        text.setLength(0);

        text = (Text) session.merge(text);
        session.flush();
        session.refresh(text);

        text.setDigest(NULL_DIGEST);
        return text;
    }

    public static Text create(Session session, Annotation layer, XMLStreamReader xml)
            throws IOException, XMLStreamException {
        final FileBackedOutputStream xmlBuf = createBuffer();
        XMLEventReader xmlEventReader = null;
        XMLEventWriter xmlEventWriter = null;
        try {
            xmlEventReader = XML_INPUT_FACTORY.createXMLEventReader(xml);
            xmlEventWriter = XML_OUTPUT_FACTORY.createXMLEventWriter(new OutputStreamWriter(xmlBuf, Text.CHARSET));
            xmlEventWriter.add(xmlEventReader);
        } finally {
            XML.closeQuietly(xmlEventWriter);
            XML.closeQuietly(xmlEventReader);
            Closeables.close(xmlBuf, false);
        }

        Reader xmlBufReader = null;
        try {
            xmlBufReader = new InputStreamReader(xmlBuf.getSupplier().getInput(), Text.CHARSET);
            return create(session, layer, Text.Type.XML).write(session, xmlBufReader);
        } finally {
            Closeables.close(xmlBufReader, false);
        }
    }

    public static Text create(Session session, Annotation layer, Reader content) throws IOException {
        return create(session, layer, Text.Type.TXT).write(session, content);
    }

    private static FileBackedOutputStream createBuffer() {
        return new FileBackedOutputStream(MEMORY_BUFFER_THRESHOLD, true);
    }

    private static class CountingWriter extends FilterWriter {

        public long length = 0;

        public CountingWriter(Writer out) {
            super(out);
        }

        @Override
        public void write(int c) throws IOException {
            super.write(c);
            length++;
        }

        @Override
        public void write(char[] cbuf, int off, int len) throws IOException {
            super.write(cbuf, off, len);
            length += len;
        }

        @Override
        public void write(String str, int off, int len) throws IOException {
            super.write(str, off, len);
            length += len;
        }

    }

    private static class RangeFilteringReader extends FilterReader {

        private final TextRange range;
        private int offset = 0;

        public RangeFilteringReader(Reader in, TextRange range) {
            super(in);
            this.range = range;
        }

        @Override
        public int read() throws IOException {
            while (offset < range.getStart()) {
                final int read = doRead();
                if (read < 0) {
                    return read;
                }
            }
            if (offset >= range.getEnd()) {
                return -1;
            }

            return doRead();
        }

        protected int doRead() throws IOException {
            final int read = super.read();
            if (read >= 0) {
                ++offset;
            }
            return read;
        }

        @Override
        public int read(char[] cbuf, int off, int len) throws IOException {
            int read = 0;
            int last;
            while ((read < len) && ((last = read()) >= 0)) {
                cbuf[off + read++] = (char) last;
            }
            return ((len > 0 && read == 0) ? -1 : read);
        }
    }
}