mitm.common.extractor.impl.DefaultTextExtractor.java Source code

Java tutorial

Introduction

Here is the source code for mitm.common.extractor.impl.DefaultTextExtractor.java

Source

/*
 * Copyright (c) 2010-2011, Martijn Brinkers, Djigzo.
 * 
 * This file is part of Djigzo email encryption.
 *
 * Djigzo is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License 
 * version 3, 19 November 2007 as published by the Free Software 
 * Foundation.
 *
 * Djigzo is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public 
 * License along with Djigzo. If not, see <http://www.gnu.org/licenses/>
 *
 * Additional permission under GNU AGPL version 3 section 7
 * 
 * If you modify this Program, or any covered work, by linking or 
 * combining it with aspectjrt.jar, aspectjweaver.jar, tyrex-1.0.3.jar, 
 * freemarker.jar, dom4j.jar, mx4j-jmx.jar, mx4j-tools.jar, 
 * spice-classman-1.0.jar, spice-loggerstore-0.5.jar, spice-salt-0.8.jar, 
 * spice-xmlpolicy-1.0.jar, saaj-api-1.3.jar, saaj-impl-1.3.jar, 
 * wsdl4j-1.6.1.jar (or modified versions of these libraries), 
 * containing parts covered by the terms of Eclipse Public License, 
 * tyrex license, freemarker license, dom4j license, mx4j license,
 * Spice Software License, Common Development and Distribution License
 * (CDDL), Common Public License (CPL) the licensors of this Program grant 
 * you additional permission to convey the resulting work.
 */
package mitm.common.extractor.impl;

import java.io.IOException;
import java.io.Writer;

import mitm.common.extractor.TextExtractor;
import mitm.common.extractor.TextExtractorContext;
import mitm.common.extractor.TextExtractorEventHandler;
import mitm.common.extractor.impl.TextExtractorUtils.TextExtractorWriterHandler;
import mitm.common.util.Check;
import mitm.common.util.RewindableInputStream;

import org.apache.commons.lang.CharUtils;

/**
 * TextExtractor implementation that tries to extract text from the input and assumes that the text is ASCII
 * 
 * This TextExtractor should only be used as a 'last resort' extractor when there is not other extractor that
 * can handle the data.
 * 
 * @author Martijn Brinkers
 *
 */
public class DefaultTextExtractor implements TextExtractor {
    /*
     * The threshold at which a temp file will be used.
     */
    private final int threshold;

    /*
     * The maximum size a part can get (this is to prevent against 'zip bombs').
     */
    private final long maxPartSize;

    public DefaultTextExtractor(int threshold, long maxPartSize) {
        if (threshold < 0) {
            throw new IllegalArgumentException("threshold must be >= 0");
        }

        if (maxPartSize < 0) {
            throw new IllegalArgumentException("maxPartSize must be >= 0");
        }

        this.threshold = threshold;
        this.maxPartSize = maxPartSize;
    }

    private void readText(RewindableInputStream input, TextExtractorContext context, Writer writer)
            throws IOException {
        int c;

        while ((c = input.read()) != -1) {
            if (CharUtils.isAsciiPrintable((char) c)) {
                writer.write(c);
            }
        }
    }

    @Override
    public void extract(final RewindableInputStream input, final TextExtractorContext context,
            TextExtractorEventHandler handler) throws IOException {
        Check.notNull(input, "input");
        Check.notNull(context, "context");
        Check.notNull(handler, "handler");

        TextExtractorWriterHandler writerHandler = new TextExtractorWriterHandler() {
            @Override
            public void write(Writer writer) throws IOException {
                readText(input, context, writer);
            }
        };

        TextExtractorUtils.fireTextEvent(handler, context, writerHandler, threshold, maxPartSize);
    }

    public int getThreshold() {
        return threshold;
    }

    public long getMaxPartSize() {
        return maxPartSize;
    }
}