com.cloudera.flume.handlers.hdfs.TestEscapedCustomOutputDfs.java Source code

Introduction

Here is the source code for com.cloudera.flume.handlers.hdfs.TestEscapedCustomOutputDfs.java
Source

/**
 * Licensed to Cloudera, Inc. under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  Cloudera, Inc. licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.cloudera.flume.handlers.hdfs;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;

import java.io.BufferedReader;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.util.zip.GZIPInputStream;

import org.apache.hadoop.io.compress.BZip2Codec;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionCodecFactory;
import org.apache.hadoop.io.compress.DefaultCodec;
import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.log4j.Logger;
import org.junit.Test;

import com.cloudera.flume.conf.Context;
import com.cloudera.flume.conf.FlumeBuilder;
import com.cloudera.flume.conf.FlumeConfiguration;
import com.cloudera.flume.conf.FlumeSpecException;
import com.cloudera.flume.conf.SinkFactory.SinkBuilder;
import com.cloudera.flume.core.Attributes;
import com.cloudera.flume.core.Event;
import com.cloudera.flume.core.EventImpl;
import com.cloudera.flume.core.EventSink;
import com.cloudera.flume.handlers.avro.AvroJsonOutputFormat;
import com.cloudera.flume.handlers.text.SyslogEntryFormat;
import com.cloudera.flume.handlers.text.output.Log4jOutputFormat;
import com.cloudera.flume.handlers.text.output.OutputFormat;
import com.cloudera.util.FileUtil;

/**
 * This just checks to see if I actually do have control of output format from
 * FlumeConfiguration (a configuration file).
 */
public class TestEscapedCustomOutputDfs {

    final public static Logger LOG = Logger.getLogger(TestEscapedCustomOutputDfs.class);

    void checkOutputFormat(String format, OutputFormat of) throws IOException, InterruptedException {
        checkOutputFormat(format, of, "None", null);
    }

    void checkOutputFormat(String format, OutputFormat of, String codecName, CompressionCodec codec)
            throws IOException, InterruptedException {
        // set the output format.
        FlumeConfiguration conf = FlumeConfiguration.get();
        conf.set(FlumeConfiguration.COLLECTOR_OUTPUT_FORMAT, format);
        conf.set(FlumeConfiguration.COLLECTOR_DFS_COMPRESS_CODEC, codecName);

        // build a sink that outputs to that format.
        File f = FileUtil.mktempdir();
        SinkBuilder builder = EscapedCustomDfsSink.builder();
        EventSink snk = builder.create(new Context(), "file:///" + f.getPath() + "/sub-%{service}");
        Event e = new EventImpl("this is a test message".getBytes());
        Attributes.setString(e, "service", "foo");
        snk.open();
        snk.append(e);
        snk.close();

        ByteArrayOutputStream exWriter = new ByteArrayOutputStream();
        of.format(exWriter, e);
        exWriter.close();
        String expected = new String(exWriter.toByteArray());

        // check the output to make sure it is what we expected.

        // handle compression codec / extensions when checking.
        String ext = ""; // file extension
        if (codec != null) {
            ext = codec.getDefaultExtension();
        }
        InputStream in = new FileInputStream(f.getPath() + "/sub-foo" + ext);
        if (codec != null) {
            in = codec.createInputStream(in);
        }
        byte[] buf = new byte[1];
        StringBuilder output = new StringBuilder();
        // read the file
        while ((in.read(buf)) > 0) {
            output.append(new String(buf));
        }
        in.close(); // Must close for windows to delete
        assertEquals(expected, output.toString());

        // This doesn't get deleted in windows but the core test succeeds
        assertTrue("temp folder successfully deleted", FileUtil.rmr(f));
    }

    @Test
    public void testAvroOutputFormat() throws IOException, InterruptedException {
        checkOutputFormat("avrojson", new AvroJsonOutputFormat());
    }

    @Test
    public void testSyslogOutputFormat() throws IOException, InterruptedException {
        checkOutputFormat("syslog", new SyslogEntryFormat());
    }

    @Test
    public void testLog4jOutputFormat() throws IOException, InterruptedException {
        checkOutputFormat("log4j", new Log4jOutputFormat());
    }

    /**
     * Test to write few log lines, compress using gzip, write to disk, read back
     * the compressed file and verify the written lines. This test alone doesn't
     * test GZipCodec with its Native Libs. java.library.path must contain the
     * path to the hadoop native libs for this to happen.
     *
     * @throws IOException
     * @throws InterruptedException
     */
    @Test
    public void testGZipCodec() throws IOException, InterruptedException {
        checkOutputFormat("syslog", new SyslogEntryFormat(), "GzipCodec", new GzipCodec());
    }

    /**
     * Test to write few log lines, compress using bzip2, write to disk, read back
     * the compressed file and verify the written lines.
     *
     * @throws IOException
     * @throws InterruptedException
     */
    @Test
    public void testBZip2Codec() throws IOException, InterruptedException {
        checkOutputFormat("syslog", new SyslogEntryFormat(), "BZip2Codec", new BZip2Codec());
    }

    /**
     * Test to write few log lines, compress using bzip2, write to disk, read back
     * the compressed file and verify the written lines.
     *
     * This test uses the wrong case for the codec name.
     *
     * @throws IOException
     * @throws InterruptedException
     */
    @Test
    public void testBZip2CodecWrongCase() throws IOException, InterruptedException {
        checkOutputFormat("syslog", new SyslogEntryFormat(), "bzip2Codec", new BZip2Codec());
    }

    /**
     * Test to write few log lines, compress using default, write to disk, read
     * back the compressed file and verify the written lines.
     *
     * @throws InterruptedException
     */
    @Test
    public void testDefaultCodec() throws IOException, InterruptedException {
        DefaultCodec codec = new DefaultCodec();
        codec.setConf(FlumeConfiguration.get()); // default needs conf
        checkOutputFormat("syslog", new SyslogEntryFormat(), "DefaultCodec", codec);
    }

    public void testCodecs() {
        LOG.info(CompressionCodecFactory.getCodecClasses(FlumeConfiguration.get()));
    }

    @Test
    public void testOutputFormats() throws FlumeSpecException {
        // format
        String src = "escapedFormatDfs(\"file:///tmp/test/testfilename\",\"\", avro)";
        FlumeBuilder.buildSink(new Context(), src);

        // format
        src = "escapedFormatDfs(\"file:///tmp/test/testfilename\",\"\", seqfile)";
        FlumeBuilder.buildSink(new Context(), src);

        // format
        src = "escapedFormatDfs(\"file:///tmp/test/testfilename\",\"\", seqfile(\"bzip2\"))";
        FlumeBuilder.buildSink(new Context(), src);
    }

}