com.cloudera.flume.handlers.hdfs.TestDFSWrite.java Source code

Java tutorial

Introduction

Here is the source code for com.cloudera.flume.handlers.hdfs.TestDFSWrite.java

Source

/**
 * Licensed to Cloudera, Inc. under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  Cloudera, Inc. licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.cloudera.flume.handlers.hdfs;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import static org.junit.Assume.assumeTrue;

import java.io.IOException;
import java.util.Arrays;

import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.SequenceFile.Writer;
import org.apache.hadoop.io.compress.DefaultCodec;
import org.junit.Test;

import com.cloudera.flume.conf.Context;
import com.cloudera.flume.conf.FlumeConfiguration;
import com.cloudera.flume.conf.SinkFactory.SinkBuilder;
import com.cloudera.flume.core.Event;
import com.cloudera.flume.core.EventImpl;
import com.cloudera.flume.core.EventSink;
import com.cloudera.util.OSUtils;

/**
 * This tests writing to a distributed hdfs. It assumes the namenode is
 * localhost.
 * 
 */
public class TestDFSWrite {
    @Test
    public void testDirectWrite() throws IOException {
        FlumeConfiguration conf = FlumeConfiguration.get();

        Path path = new Path("file:///tmp/testfile");
        FileSystem hdfs = path.getFileSystem(conf);
        hdfs.deleteOnExit(path);

        String STRING = "Hello World";

        // writing
        FSDataOutputStream dos = hdfs.create(path);
        dos.writeUTF(STRING);
        dos.close();

        // reading
        FSDataInputStream dis = hdfs.open(path);
        String s = dis.readUTF();
        System.out.println(s);
        assertEquals(STRING, s);

        dis.close();

        hdfs.close();
    }

    @Test
    public void testHDFSSequenceFileWrite() throws IOException {
        FlumeConfiguration conf = FlumeConfiguration.get();

        Path path = new Path("file:///tmp/testfile");
        FileSystem hdfs = path.getFileSystem(conf);
        hdfs.deleteOnExit(path);

        Event e = new EventImpl("EVENT".getBytes());

        Writer w = SequenceFile.createWriter(hdfs, conf, path, WriteableEventKey.class, WriteableEvent.class);

        // writing
        w.append(new WriteableEventKey(e), new WriteableEvent(e));
        w.close();

        FileStatus stats = hdfs.getFileStatus(path);
        assertTrue(stats.getLen() > 0);

        // reading
        SequenceFile.Reader r = new SequenceFile.Reader(hdfs, path, conf);
        WriteableEventKey k = new WriteableEventKey();
        WriteableEvent evt = new WriteableEvent();
        r.next(k, evt);

        assertEquals(evt.getTimestamp(), e.getTimestamp());
        assertEquals(evt.getNanos(), e.getNanos());
        assertEquals(evt.getPriority(), e.getPriority());
        assertTrue(Arrays.equals(evt.getBody(), e.getBody()));

        hdfs.close();
    }

    @Test
    public void testHDFSEventSink() throws IOException, InterruptedException {
        FlumeConfiguration conf = FlumeConfiguration.get();
        String str = "file:///tmp/testfile";
        Path path = new Path(str);
        DFSEventSink sink = new DFSEventSink(str);

        FileSystem hdfs = path.getFileSystem(conf);

        sink.open();
        Event e = new EventImpl("EVENT".getBytes());
        sink.append(e);
        sink.close();

        FileStatus stats = hdfs.getFileStatus(path);
        assertTrue(stats.getLen() > 0);

        // // reading
        SequenceFile.Reader r = new SequenceFile.Reader(hdfs, path, conf);
        WriteableEventKey k = new WriteableEventKey();
        WriteableEvent evt = new WriteableEvent();
        r.next(k, evt);

        assertEquals(evt.getTimestamp(), e.getTimestamp());
        assertEquals(evt.getNanos(), e.getNanos());
        assertEquals(evt.getPriority(), e.getPriority());
        assertTrue(Arrays.equals(evt.getBody(), e.getBody()));

        hdfs.close();
    }

    /**
     * Test that pathnames are being correctly substituted.
     * 
     * @throws InterruptedException
     */
    @Test
    public void testTaggedWrites() throws IOException, InterruptedException {
        FlumeConfiguration conf = FlumeConfiguration.get();
        String template = "file:///tmp/testfile-%{mytag}";
        String real1 = "file:///tmp/testfile-one";
        String real2 = "file:///tmp/testfile-two";

        DFSEventSink sink = new DFSEventSink(template);

        Event e1 = new EventImpl("EVENT1".getBytes());
        e1.set("mytag", "one".getBytes());
        Event e2 = new EventImpl("EVENT2".getBytes());
        e2.set("mytag", "two".getBytes());
        sink.open();
        sink.append(e1);
        sink.append(e2);
        sink.close();

        Path path1 = new Path(real1);
        Path path2 = new Path(real2);

        FileSystem hdfs = path1.getFileSystem(conf);
        FileStatus stats = hdfs.getFileStatus(path1);
        assertTrue("File " + real1 + " not found but expected", stats.getLen() > 0);
        stats = hdfs.getFileStatus(new Path(real2));
        assertTrue("File " + real2 + " not found but expected", stats.getLen() > 0);

        SequenceFile.Reader r = new SequenceFile.Reader(hdfs, path1, conf);
        WriteableEventKey k = new WriteableEventKey();
        WriteableEvent evt = new WriteableEvent();
        r.next(k, evt);

        assertEquals(evt.getTimestamp(), e1.getTimestamp());
        assertEquals(evt.getNanos(), e1.getNanos());
        assertEquals(evt.getPriority(), e1.getPriority());
        assertTrue(Arrays.equals(evt.getBody(), e1.getBody()));

        r = new SequenceFile.Reader(hdfs, path2, conf);
        k = new WriteableEventKey();
        evt = new WriteableEvent();
        r.next(k, evt);

        assertEquals(evt.getTimestamp(), e2.getTimestamp());
        assertEquals(evt.getNanos(), e2.getNanos());
        assertEquals(evt.getPriority(), e2.getPriority());
        assertTrue(Arrays.equals(evt.getBody(), e2.getBody()));

        hdfs.close();
    }

    // // Why does this test fail? It doesn't make sense.

    // // an old version used code similar to this and failed. Want to
    // // preserve this to see if it is a bug.
    // // TODO (jon) Why does this fail to write data?
    @Test
    public void testWhyFail() throws IOException {

        // There a was a failure case using :
        FlumeConfiguration conf = FlumeConfiguration.get();
        Path path = new Path("file:///tmp/testfile");
        FileSystem hdfs = path.getFileSystem(conf);

        // writing
        FSDataOutputStream dos = hdfs.create(path);
        hdfs.deleteOnExit(path);

        // this version's Writer has ownOutputStream=false.
        Writer writer = SequenceFile.createWriter(conf, dos, WriteableEventKey.class, WriteableEvent.class,
                SequenceFile.CompressionType.NONE, new DefaultCodec());

        Event e = new EventImpl("EVENT".getBytes());

        writer.append(new WriteableEventKey(e), new WriteableEvent(e));
        writer.sync();
        writer.close();

        dos.close(); // It is strange that I have to close the underlying
        // FSDataOutputStream.

        // WTF: nothing written by this writer!
        FileStatus stats = hdfs.getFileStatus(path);
        assertTrue(stats.getLen() > 0);
        // it should have written something but it failed.
    }

    /**
     * No exception expected
     */
    @Test
    public void testBadArgsParseOK() {
        SinkBuilder sb = DFSEventSink.builder();
        sb.build(new Context(), "/foo/msgs");
    }

    /**
     * wrong # args, exception expected
     */
    @Test(expected = IllegalArgumentException.class)
    public void testBadArgsParseOkInstanceFail() {
        SinkBuilder sb = DFSEventSink.builder();
        sb.build(new Context());
    }

    /**
     * Failure occurs when opened due to permissions.
     * 
     * @throws InterruptedException
     */
    @Test
    public void testBadArgsOpenFail() throws IOException, InterruptedException {
        assumeTrue(!OSUtils.isWindowsOS());

        SinkBuilder sb = DFSEventSink.builder();
        EventSink snk = sb.build(new Context(), "/foo/msgs");
        try {
            snk.open();
        } catch (IOException e) {
            return;
        }
        fail("expected exception due to perms");

    }
}