RecordingOutputStream.java :  » Web-Crawler » heritrix » org » archive » io » Java Open Source

Java Open Source » Web Crawler » heritrix 
heritrix » org » archive » io » RecordingOutputStream.java
/* ReplayableOutputStream
 *
 * $Id: RecordingOutputStream.java 5080 2007-04-13 20:30:49Z gojomo $
 *
 * Created on Sep 23, 2003
 *
 * Copyright (C) 2003 Internet Archive.
 *
 * This file is part of the Heritrix web crawler (crawler.archive.org).
 *
 * Heritrix is free software; you can redistribute it and/or modify
 * it under the terms of the GNU Lesser Public License as published by
 * the Free Software Foundation; either version 2.1 of the License, or
 * any later version.
 *
 * Heritrix is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser Public License for more details.
 *
 * You should have received a copy of the GNU Lesser Public License
 * along with Heritrix; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */
package org.archive.io;

import it.unimi.dsi.fastutil.io.FastBufferedOutputStream;

import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.logging.Level;
import java.util.logging.Logger;

import org.archive.util.IoUtils;


/**
 * An output stream that records all writes to wrapped output
 * stream.
 *
 * A RecordingOutputStream can be wrapped around any other
 * OutputStream to record all bytes written to it.  You can
 * then request a ReplayInputStream to read those bytes.
 *
 * <p>The RecordingOutputStream uses an in-memory buffer and
 * backing disk file to allow it to record streams of
 * arbitrary length limited only by available disk space.
 *
 * <p>As long as the stream recorded is smaller than the
 * in-memory buffer, no disk access will occur.
 *
 * <p>Recorded content can be recovered as a ReplayInputStream
 * (via getReplayInputStream() or, for only the content after
 * the content-begin-mark is set, getContentReplayInputStream() )
 * or as a ReplayCharSequence (via getReplayCharSequence()).
 *
 * <p>This class is also used as a straight output stream
 * by {@link RecordingInputStream} to which it records all reads.
 * {@link RecordingInputStream} is exploiting the file backed buffer
 * facility of this class passing <code>null</code> for the stream
 * to wrap.  TODO: Make a FileBackedOutputStream class that is
 * subclassed by RecordingInputStream.
 *
 * @author gojomo
 *
 */
public class RecordingOutputStream extends OutputStream {
    protected static Logger logger =
        Logger.getLogger(RecordingOutputStream.class.getName());
    
    /**
     * Size of recording.
     *
     * Later passed to ReplayInputStream on creation.  It uses it to know when
     * EOS.
     */
    private long size = 0;

    private String backingFilename;
    private OutputStream diskStream = null;

    /**
     * Buffer we write recordings to.
     *
     * We write all recordings here first till its full.  Thereafter we
     * write the backing file.
     */
    private byte[] buffer;

    /** current virtual position in the recording */
    private long position;
    
    /** flag to disable recording */
    private boolean recording;
    
    /**
     * Reusable buffer for FastBufferedOutputStream
     */
    protected byte[] bufStreamBuf = 
        new byte [ FastBufferedOutputStream.DEFAULT_BUFFER_SIZE ];
    
    /**
     * True if we're to digest content.
     */
    private boolean shouldDigest = false;
 
    /**
     * Digest instance.
     */
    private MessageDigest digest = null;

    /**
     * Define for SHA1 alogarithm.
     */
    private static final String SHA1 = "SHA1";

    /**
     * Maximum amount of header material to accept without the content
     * body beginning -- if more, throw a RecorderTooMuchHeaderException.
     * TODO: make configurable? make smaller?
     */
    protected static final long MAX_HEADER_MATERIAL = 1024*1024; // 1MB
    
    // configurable max length, max time limits
    /** maximum length of material to record before throwing exception */ 
    protected long maxLength = Long.MAX_VALUE;
    /** maximum time to record before throwing exception */ 
    protected long timeoutMs = Long.MAX_VALUE;
    /** maximum rate to record (adds delays to hit target rate) */ 
    protected long maxRateBytesPerMs = Long.MAX_VALUE;
    /** time recording begins for timeout, rate calculations */ 
    protected long startTime = Long.MAX_VALUE;
    
    /**
     * When recording HTTP, where the content-body starts.
     */
    private long contentBeginMark;

    /**
     * Stream to record.
     */
    private OutputStream out = null;

    // mark/reset support 
    /** furthest position reached before any reset()s */
    private long maxPosition = 0;
    /** remembered position to reset() to */ 
    private long markPosition = 0; 

    /**
     * Create a new RecordingOutputStream.
     *
     * @param bufferSize Buffer size to use.
     * @param backingFilename Name of backing file to use.
     */
    public RecordingOutputStream(int bufferSize, String backingFilename) {
        this.buffer = new byte[bufferSize];
        this.backingFilename = backingFilename;
        recording = true;
    }

    /**
     * Wrap the given stream, both recording and passing along any data written
     * to this RecordingOutputStream.
     *
     * @throws IOException If failed creation of backing file.
     */
    public void open() throws IOException {
        this.open(null);
    }

    /**
     * Wrap the given stream, both recording and passing along any data written
     * to this RecordingOutputStream.
     *
     * @param wrappedStream Stream to wrap.  May be null for case where we
     * want to write to a file backed stream only.
     *
     * @throws IOException If failed creation of backing file.
     */
    public void open(OutputStream wrappedStream) throws IOException {
        if(isOpen()) {
            // error; should not be opening/wrapping in an unclosed 
            // stream remains open
            throw new IOException("ROS already open for "
                    +Thread.currentThread().getName());
        }
        this.out = wrappedStream;
        this.position = 0;
        this.markPosition = 0;
        this.maxPosition = 0; 
        this.size = 0;
        this.contentBeginMark = -1;
        // ensure recording turned on
        this.recording = true;
        // Always begins false; must use startDigest() to begin
        this.shouldDigest = false;
        if (this.diskStream != null) {
            closeDiskStream();
        }
        if (this.diskStream == null) {
            // TODO: Fix so we only make file when its actually needed.
            FileOutputStream fis = new FileOutputStream(this.backingFilename);
            
            this.diskStream = new RecyclingFastBufferedOutputStream(fis, bufStreamBuf);
        }
        startTime = System.currentTimeMillis();
    }

    public void write(int b) throws IOException {
        if(position<maxPosition) {
            // revisiting previous content; do nothing but advance position
            position++;
            return; 
        }
        if(recording) {
            record(b);
        }
        if (this.out != null) {
            this.out.write(b);
        }
        checkLimits();
    }

    public void write(byte[] b, int off, int len) throws IOException {
        if(position < maxPosition) {
            if(position+len<=maxPosition) {
                // revisiting; do nothing but advance position
                position += len;
                return;
            }
            // consume part of the array doing nothing but advancing position
            long consumeRange = maxPosition - position; 
            position += consumeRange;
            off += consumeRange;
            len -= consumeRange; 
        }
        if(recording) {
            record(b, off, len);
        }
        if (this.out != null) {
            this.out.write(b, off, len);
        }
        checkLimits();
    }
    
    /**
     * Check any enforced limits. 
     */
    protected void checkLimits() throws RecorderIOException {
        // too much material before finding end of headers? 
        if (contentBeginMark<0) {
            // no mark yet
            if(position>MAX_HEADER_MATERIAL) {
                throw new RecorderTooMuchHeaderException();
            }
        }
        // overlong?
        if(position>maxLength) {
            throw new RecorderLengthExceededException(); 
        }
        // taking too long? 
        long duration = System.currentTimeMillis() - startTime + 1; // !divzero
        if(duration>timeoutMs) {
            throw new RecorderTimeoutException(); 
        }
        // need to throttle reading to hit max configured rate? 
        if(position/duration > maxRateBytesPerMs) {
            long desiredDuration = position / maxRateBytesPerMs;
            try {
                Thread.sleep(desiredDuration-duration);
            } catch (InterruptedException e) {
                logger.log(Level.WARNING,
                        "bandwidth throttling sleep interrupted", e);
            } 
        }
    }

    /**
     * Record the given byte for later recovery
     *
     * @param b Int to record.
     *
     * @exception IOException Failed write to backing file.
     */
    private void record(int b) throws IOException {
        if (this.shouldDigest) {
            this.digest.update((byte)b);
        }
        if (this.position >= this.buffer.length) {
            // TODO: Its possible to call write w/o having first opened a
            // stream.  Protect ourselves against this.
            assert this.diskStream != null: "Diskstream is null";
            this.diskStream.write(b);
        } else {
            this.buffer[(int) this.position] = (byte) b;
        }
        this.position++;
    }

    /**
     * Record the given byte-array range for recovery later
     *
     * @param b Buffer to record.
     * @param off Offset into buffer at which to start recording.
     * @param len Length of buffer to record.
     *
     * @exception IOException Failed write to backing file.
     */
    private void record(byte[] b, int off, int len) throws IOException {
        if(this.shouldDigest) {
            assert this.digest != null: "Digest is null.";
            this.digest.update(b, off, len);
        }
        tailRecord(b, off, len);
    }

    /**
     * Record without digesting.
     * 
     * @param b Buffer to record.
     * @param off Offset into buffer at which to start recording.
     * @param len Length of buffer to record.
     *
     * @exception IOException Failed write to backing file.
     */
    private void tailRecord(byte[] b, int off, int len) throws IOException {
        if(this.position >= this.buffer.length){
            // TODO: Its possible to call write w/o having first opened a
            // stream.  Lets protect ourselves against this.
            if (this.diskStream == null) {
                throw new IOException("diskstream is null");
            }
            this.diskStream.write(b, off, len);
            this.position += len;
        } else {
            assert this.buffer != null: "Buffer is null";
            int toCopy = (int)Math.min(this.buffer.length - this.position, len);
            assert b != null: "Passed buffer is null";
            System.arraycopy(b, off, this.buffer, (int)this.position, toCopy);
            this.position += toCopy;
            // TODO verify these are +1 -1 right
            if (toCopy < len) {
                tailRecord(b, off + toCopy, len - toCopy);
            }
        }
    }

    public void close() throws IOException {
        if(contentBeginMark<0) {
            // if unset, consider 0 posn as content-start
            // (so that a -1 never survives to replay step)
            contentBeginMark = 0;
        }
        if (this.out != null) {
            this.out.close();
            this.out = null;
        }
        closeRecorder();
    }
    
    protected synchronized void closeDiskStream()
    throws IOException {
        if (this.diskStream != null) {
            this.diskStream.close();
            this.diskStream = null;
        }
    }

    public void closeRecorder() throws IOException {
        recording = false;
        closeDiskStream(); // if any
        // This setting of size is important.  Its passed to ReplayInputStream
        // on creation.  It uses it to know EOS.
        if (this.size == 0) {
            this.size = this.position;
        }
    }

    /* (non-Javadoc)
     * @see java.io.OutputStream#flush()
     */
    public void flush() throws IOException {
        if (this.out != null) {
            this.out.flush();
        }
        if (this.diskStream != null) {
            this.diskStream.flush();
        }
    }

    public ReplayInputStream getReplayInputStream() throws IOException {
        return getReplayInputStream(0);
    }
    
    public ReplayInputStream getReplayInputStream(long skip) throws IOException {
        // If this method is being called, then assumption must be that the
        // stream is closed. If it ain't, then the stream gotten won't work
        // -- the size will zero so any attempt at a read will get back EOF.
        assert this.out == null: "Stream is still open.";
        ReplayInputStream replay = new ReplayInputStream(this.buffer, 
                this.size, this.contentBeginMark, this.backingFilename);
        replay.skip(skip);
        return replay; 
    }

    /**
     * Return a replay stream, cued up to begining of content
     *
     * @throws IOException
     * @return An RIS.
     */
    public ReplayInputStream getContentReplayInputStream() throws IOException {
        return getReplayInputStream(this.contentBeginMark);
    }

    public long getSize() {
        return this.size;
    }

    /**
     * Remember the current position as the start of the "response
     * body". Useful when recording HTTP traffic as a way to start
     * replays after the headers.
     */
    public void markContentBegin() {
        this.contentBeginMark = this.position;
        startDigest();
    }

    /**
     * Return stored content-begin-mark (which is also end-of-headers)
     */
    public long getContentBegin() {
        return this.contentBeginMark;
    }
    
    /**
     * Starts digesting recorded data, if a MessageDigest has been
     * set.
     */
    public void startDigest() {
        if (this.digest != null) {
            this.digest.reset();
            this.shouldDigest = true;
        }
    }

    /**
     * Convenience method for setting SHA1 digest.
     * @see #setDigest(String)
     */
    public void setSha1Digest() {
        setDigest(SHA1);
    }
    

    /**
     * Sets a digest function which may be applied to recorded data.
     * The difference between calling this method and {@link #setDigest(MessageDigest)}
     * is that this method tries to reuse MethodDigest instance if already allocated
     * and of appropriate algorithm.
     * @param algorithm Message digest algorithm to use.
     * @see #setDigest(MessageDigest)
     */
    public void setDigest(String algorithm) {
        try {
            // Reuse extant digest if its sha1 algorithm.
            if (this.digest == null ||
                    !this.digest.getAlgorithm().equals(algorithm)) {
                setDigest(MessageDigest.getInstance(algorithm));
            }
        } catch (NoSuchAlgorithmException e) {
            e.printStackTrace();
        }
    }

    /**
     * Sets a digest function which may be applied to recorded data.
     *
     * As usually only a subset of the recorded data should
     * be fed to the digest, you must also call startDigest()
     * to begin digesting.
     *
     * @param md Message digest function to use.
     */
    public void setDigest(MessageDigest md) {
        this.digest = md;
    }

    /**
     * Return the digest value for any recorded, digested data. Call
     * only after all data has been recorded; otherwise, the running
     * digest state is ruined.
     *
     * @return the digest final value
     */
    public byte[] getDigestValue() {
        if(this.digest == null) {
            return null;
        }
        return this.digest.digest();
    }

    public ReplayCharSequence getReplayCharSequence() throws IOException {
        return getReplayCharSequence(null);
    }

    public ReplayCharSequence getReplayCharSequence(String characterEncoding) 
    throws IOException {
        return getReplayCharSequence(characterEncoding, this.contentBeginMark);
    }
    
    /**
     * @param characterEncoding Encoding of recorded stream.
     * @return A ReplayCharSequence  Will return null if an IOException.  Call
     * close on returned RCS when done.
     * @throws IOException
     */
    public ReplayCharSequence getReplayCharSequence(String characterEncoding, 
            long startOffset) throws IOException {
        // TODO: handled transfer-encoding: chunked content-bodies properly
        float maxBytesPerChar = IoUtils.encodingMaxBytesPerChar(characterEncoding);
        if(maxBytesPerChar<=1) {
            // single
            // TODO: take into account single-byte encoding may be non-default
            return new ByteReplayCharSequence(
                    this.buffer, 
                    this.size, 
                    startOffset,
                    this.backingFilename);
        } else {
            // multibyte 
            if(this.size <= this.buffer.length) {
                // raw data is all in memory; do in memory
                return new MultiByteReplayCharSequence(
                        this.buffer, 
                        this.size, 
                        startOffset,
                        characterEncoding);
                
            } else {
                // raw data overflows to disk; use temp file
                ReplayInputStream ris = getReplayInputStream(startOffset);
                ReplayCharSequence rcs = new MultiByteReplayCharSequence(
                        ris, 
                        this.backingFilename,
                        characterEncoding);
                ris.close(); 
                return rcs;
            }
            
        }
        
    }

    public long getResponseContentLength() {
        return this.size - this.contentBeginMark;
    }

    /**
     * @return True if this ROS is open.
     */
    public boolean isOpen() {
        return this.out != null;
    }
    
    /**
     * When used alongside a mark-supporting RecordingInputStream, remember
     * a position reachable by a future reset().
     */
    public void mark() {
        // remember this position for subsequent reset()
        this.markPosition = position; 
    }
    
    /**
     * When used alongside a mark-supporting RecordingInputStream, reset 
     * the position to that saved by previous mark(). Until the position 
     * again reached "new" material, none of the bytes pushed to this 
     * stream will be digested or recorded. 
     */
    public void reset() {
        // take note of furthest-position-reached to avoid double-recording
        maxPosition = Math.max(maxPosition, position); 
        // reset to previous position
        position = markPosition;
    }
    
    /**
     * Set limits on length, time, and rate to enforce.
     * 
     * @param length
     * @param milliseconds
     * @param rateKBps
     */
    public void setLimits(long length, long milliseconds, long rateKBps) {
        maxLength = (length>0) ? length : Long.MAX_VALUE;
        timeoutMs = (milliseconds>0) ? milliseconds : Long.MAX_VALUE;
        maxRateBytesPerMs = (rateKBps>0) ? rateKBps*1024/1000 : Long.MAX_VALUE;
    }
    
    /**
     * Reset limits to effectively-unlimited defaults
     */
    public void resetLimits() {
        maxLength = Long.MAX_VALUE;
        timeoutMs = Long.MAX_VALUE;
        maxRateBytesPerMs = Long.MAX_VALUE;
    }
    
    /**
     * Return number of bytes that could be recorded without hitting 
     * length limit
     * 
     * @return long byte count
     */
    public long getRemainingLength() {
        return maxLength - position; 
    }
}
java2s.com  | Contact Us | Privacy Policy
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.