Example usage for org.apache.hadoop.io.compress DefaultCodec setConf

List of usage examples for org.apache.hadoop.io.compress DefaultCodec setConf

Introduction

In this page you can find the example usage for org.apache.hadoop.io.compress DefaultCodec setConf.

Prototype

@Override
    public void setConf(Configuration conf) 

Source Link

Usage

From source file:TestCodec.java

License:Open Source License

public static void main(String[] args) throws IOException {
    Configuration conf = new Configuration();
    DefaultCodec codec = new DefaultCodec();
    codec.setConf(conf);
    DataOutputBuffer chunksWriteBuffer = new DataOutputBuffer();
    CompressionOutputStream compressionOutputStream = codec.createOutputStream(chunksWriteBuffer);

    DataInputBuffer chunkReadBuffer = new DataInputBuffer();
    CompressionInputStream compressionInputStream = codec.createInputStream(chunkReadBuffer);
    String str = "laksjldfkjalskdjfl;aksjdflkajsldkfjalksjdflkajlsdkfjlaksjdflka";
    compressionOutputStream.write(str.getBytes());
    compressionOutputStream.finish();//from ww  w .j av a 2s.  c o  m
    byte[] data = chunksWriteBuffer.getData();
    System.out.println(str.length());
    System.out.println(chunksWriteBuffer.getLength());

    chunkReadBuffer.reset(data, chunksWriteBuffer.getLength());

    DataOutputBuffer dob = new DataOutputBuffer();
    IOUtils.copyBytes(compressionInputStream, dob, conf);
    System.out.println(dob.getData());

}

From source file:com.asakusafw.runtime.io.sequencefile.SequenceFileUtilTest.java

License:Apache License

/**
 * Creates a compressed sequence file./* w w  w  .jav a 2 s .  c om*/
 * @throws Exception if failed
 */
@Test
public void write_compressed() throws Exception {
    DefaultCodec codec = new DefaultCodec();
    codec.setConf(conf);

    Path path = new Path("testing");

    LongWritable key = new LongWritable();
    LongWritable value = new LongWritable();
    try (OutputStream out = new FileOutputStream(fs.pathToFile(path));
            SequenceFile.Writer writer = SequenceFileUtil.openWriter(new BufferedOutputStream(out), conf,
                    key.getClass(), value.getClass(), codec);) {
        for (long i = 0; i < 300000; i++) {
            key.set(i);
            value.set(i + 1);
            writer.append(key, value);
        }
    }

    try (SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf)) {
        for (long i = 0; i < 300000; i++) {
            assertThat(reader.next(key, value), is(true));
            assertThat(key.get(), is(i));
            assertThat(value.get(), is(i + 1));
        }
        assertThat(reader.next(key, value), is(false));
    }
}

From source file:com.cloudera.flume.handlers.hdfs.TestEscapedCustomOutputDfs.java

License:Apache License

/**
 * Test to write few log lines, compress using default, write to disk, read
 * back the compressed file and verify the written lines.
 *
 * @throws InterruptedException//from w ww .j  av a  2  s.  c  om
 */
@Test
public void testDefaultCodec() throws IOException, InterruptedException {
    DefaultCodec codec = new DefaultCodec();
    codec.setConf(FlumeConfiguration.get()); // default needs conf
    checkOutputFormat("syslog", new SyslogEntryFormat(), "DefaultCodec", codec);
}

From source file:com.hdfs.concat.crush.CrushReducerParameterizedTest.java

License:Apache License

/**
 * Every file in this unit test is named "file" followed by a number. This method will create a sequence file with as many lines
 * as the number in the file name. The keys in the file will count from one to the number. The values in the file will count
 * from 100n + 1 to 100n + n. This way each file will have distinct contents so long as no two files have the same name.
 *///from  ww  w .  ja va  2 s .  c  o  m
private Text writeFile(File srcDir, String fileName, Format format) throws IOException {

    int fileNum = Integer.parseInt(fileName.substring(4));

    File file = new File(srcDir, fileName);

    if (Format.TEXT == format) {
        PrintWriter writer = new PrintWriter(new FileOutputStream(file));

        for (int k = 1, v = 100 * fileNum + 1; k <= fileNum; k++, v++) {
            writer.printf("%d\t%d\n", k, v);
        }

        writer.close();
    } else {
        CustomWritable key = new CustomWritable();
        CustomWritable value = new CustomWritable();

        DefaultCodec codec = new DefaultCodec();
        codec.setConf(job);

        Writer writer = SequenceFile.createWriter(fs, job, new Path(file.getAbsolutePath()),
                CustomWritable.class, CustomWritable.class, compressionType, codec);

        for (int k = 1, v = 100 * fileNum + 1; k <= fileNum; k++, v++) {
            key.set(k);
            value.set(v);

            writer.append(key, value);
        }

        writer.close();
    }

    return new Text(file.getAbsolutePath());
}

From source file:com.streamsets.pipeline.stage.destination.hdfs.writer.TestActiveRecordWriters.java

License:Apache License

@Test
public void testWritersLifecycle() throws Exception {
    URI uri = new URI("file:///");
    Configuration conf = new HdfsConfiguration();
    String prefix = "prefix";
    String template = getTestDir().toString()
            + "/${YYYY()}/${MM()}/${DD()}/${hh()}/${mm()}/${ss()}/${record:value('/')}";
    TimeZone timeZone = TimeZone.getTimeZone("UTC");
    long cutOffSecs = 2;
    long cutOffSize = 10000;
    long cutOffRecords = 2;
    HdfsFileType fileType = HdfsFileType.SEQUENCE_FILE;
    DefaultCodec compressionCodec = new DefaultCodec();
    compressionCodec.setConf(conf);
    SequenceFile.CompressionType compressionType = SequenceFile.CompressionType.BLOCK;
    String keyEL = "uuid()";
    DataGeneratorFactory generatorFactory = new DummyDataGeneratorFactory(null);
    RecordWriterManager mgr = new RecordWriterManager(uri, conf, prefix, template, timeZone, cutOffSecs,
            cutOffSize, cutOffRecords, fileType, compressionCodec, compressionType, keyEL, generatorFactory,
            ContextInfoCreator.createTargetContext(HdfsDTarget.class, "testWritersLifecycle", false,
                    OnRecordError.TO_ERROR, null),
            "dirPathTemplate");
    Assert.assertTrue(mgr.validateDirTemplate("g", "dirPathTemplate", new ArrayList<Stage.ConfigIssue>()));
    ActiveRecordWriters writers = new ActiveRecordWriters(mgr);

    Date now = new Date();

    // record older than cut off
    Date recordDate = new Date(now.getTime() - 3 * 1000 - 1);
    Record record = RecordCreator.create();
    record.set(Field.create("a"));
    Assert.assertNull(writers.get(now, recordDate, record));

    recordDate = new Date(now.getTime());
    RecordWriter writer = writers.get(now, recordDate, record);
    Assert.assertNotNull(writer);//  w  w  w  . java 2  s. co m
    Path tempPath = writer.getPath();
    writer.write(record);
    writers.release(writer);
    //writer should still be open
    Assert.assertFalse(writer.isClosed());

    writer = writers.get(now, recordDate, record);
    writer.write(record);
    writers.release(writer);
    //writer should be close because of going over record count threshold
    Assert.assertTrue(writer.isClosed());

    //we should be able to get a new writer as the cutoff didn't kick in yet
    writer = writers.get(now, recordDate, record);
    Assert.assertNotNull(writer);
    writers.purge();
    //purging should not close the writer as the cutoff didn't kick in yet
    Assert.assertFalse(writer.isClosed());

    Thread.sleep(3001);
    writers.purge();
    //purging should  close the writer as the cutoff kicked in yet
    Assert.assertTrue(writer.isClosed());

    //verifying closeAll() closes writers
    writer = writers.get(new Date(), new Date(), record);
    Assert.assertNotNull(writer);
    writers.closeAll();
    Assert.assertTrue(writer.isClosed());
}

From source file:com.streamsets.pipeline.stage.destination.hdfs.writer.TestRecordWriterManager.java

License:Apache License

@Test
public void testTextFileCompression() throws Exception {
    DefaultCodec codec = new DefaultCodec();
    codec.setConf(new Configuration());
    testTextFile(codec);/*w w  w. j av a2  s .c o  m*/
}

From source file:com.streamsets.pipeline.stage.destination.hdfs.writer.TestRecordWriterManager.java

License:Apache License

@Test
public void testSeqFileCompression() throws Exception {
    DefaultCodec codec = new DefaultCodec();
    codec.setConf(new Configuration());
    testSeqFile(codec, SequenceFile.CompressionType.RECORD);
    testSeqFile(codec, SequenceFile.CompressionType.BLOCK);
}

From source file:com.streamsets.pipeline.stage.destination.hdfs.writer.TestRecordWriterManager.java

License:Apache License

@Test
public void testGetWriter() throws Exception {
    URI uri = new URI("file:///");
    Configuration conf = new HdfsConfiguration();
    final String prefix = "prefix";
    String template = getTestDir().toString()
            + "/${YYYY()}/${MM()}/${DD()}/${hh()}/${mm()}/${ss()}/${record:value('/')}";
    TimeZone timeZone = TimeZone.getTimeZone("UTC");
    long cutOffSecs = 10;
    long cutOffSize = 5;
    long cutOffRecords = 2;
    HdfsFileType fileType = HdfsFileType.TEXT;
    DefaultCodec compressionCodec = new DefaultCodec();
    compressionCodec.setConf(conf);
    SequenceFile.CompressionType compressionType = null;
    String keyEL = null;/*  w w  w  .j av a  2  s . c om*/
    DataGeneratorFactory generatorFactory = new DummyDataGeneratorFactory(null);
    RecordWriterManager mgr = new RecordWriterManager(uri, conf, prefix, template, timeZone, cutOffSecs,
            cutOffSize, cutOffRecords, fileType, compressionCodec, compressionType, keyEL, generatorFactory,
            targetContext, "dirPathTemplate");
    Assert.assertTrue(mgr.validateDirTemplate("g", "dirPathTemplate", new ArrayList<Stage.ConfigIssue>()));

    FileSystem fs = FileSystem.get(uri, conf);
    Date now = getFixedDate();

    // record older than cut off
    Date recordDate = new Date(now.getTime() - 10 * 1000 - 1);
    Record record = RecordCreator.create();
    record.set(Field.create("a"));
    Assert.assertNull(mgr.getWriter(now, recordDate, record));

    // record qualifies, first file
    recordDate = new Date(now.getTime() - 10 * 1000 + 1);
    RecordWriter writer = mgr.getWriter(now, recordDate, record);
    Assert.assertNotNull(writer);
    Path tempPath = writer.getPath();
    Assert.assertEquals(mgr.getPath(recordDate, record), tempPath);
    Path finalPath = mgr.commitWriter(writer);
    //committing a closed writer is a NOP
    Assert.assertNull(mgr.commitWriter(writer));

    Assert.assertEquals(1, getFinalFileNameCount(fs, tempPath.getParent(), prefix));

    // record qualifies, second file
    writer = mgr.getWriter(now, recordDate, record);
    finalPath = mgr.commitWriter(writer);

    Assert.assertEquals(2, getFinalFileNameCount(fs, tempPath.getParent(), prefix));

    // record qualifies, leaving temp file
    writer = mgr.getWriter(now, recordDate, record);
    writer.close();

    // record qualifies, it should roll temp file and create 4th file
    writer = mgr.getWriter(now, recordDate, record);
    finalPath = mgr.commitWriter(writer);
    Assert.assertFalse(fs.exists(tempPath));
    Assert.assertEquals(4, getFinalFileNameCount(fs, tempPath.getParent(), prefix));

    // verifying thresholds because of record count
    writer = mgr.getWriter(now, recordDate, record);
    Assert.assertFalse(mgr.isOverThresholds(writer));
    writer.write(record);
    writer.flush();
    Assert.assertFalse(mgr.isOverThresholds(writer));
    writer.write(record);
    writer.flush();
    Assert.assertTrue(mgr.isOverThresholds(writer));
    writer.write(record);
    mgr.commitWriter(writer);

    // verifying thresholds because of file size
    writer = mgr.getWriter(now, recordDate, record);
    Assert.assertFalse(mgr.isOverThresholds(writer));
    record.set(Field.create("0123456789012345678901234567890123456789012345678901234567890123456789"));
    writer.write(record);
    writer.flush();
    Assert.assertTrue(mgr.isOverThresholds(writer));
    mgr.commitWriter(writer);
}

From source file:com.streamsets.pipeline.stage.destination.hdfs.writer.TestRecordWriterManager.java

License:Apache License

@Test
public void testThresholdRecords() throws Exception {
    URI uri = new URI("file:///");
    Configuration conf = new HdfsConfiguration();
    String prefix = "prefix";
    String template = getTestDir().toString()
            + "/${YYYY()}/${MM()}/${DD()}/${hh()}/${mm()}/${ss()}/${record:value('/')}";
    TimeZone timeZone = TimeZone.getTimeZone("UTC");
    long cutOffSecs = 10;
    long cutOffSize = 50000;
    long cutOffRecords = 2;
    HdfsFileType fileType = HdfsFileType.TEXT;
    DefaultCodec compressionCodec = new DefaultCodec();
    compressionCodec.setConf(conf);
    SequenceFile.CompressionType compressionType = null;
    String keyEL = null;/*from   w ww .  ja  va2 s.com*/
    DataGeneratorFactory generatorFactory = new DummyDataGeneratorFactory(null);
    RecordWriterManager mgr = new RecordWriterManager(uri, conf, prefix, template, timeZone, cutOffSecs,
            cutOffSize, cutOffRecords, fileType, compressionCodec, compressionType, keyEL, generatorFactory,
            targetContext, "dirPathTemplate");
    Assert.assertTrue(mgr.validateDirTemplate("g", "dirPathTemplate", new ArrayList<Stage.ConfigIssue>()));

    Date now = getFixedDate();

    Date recordDate = now;
    Record record = RecordCreator.create();
    record.set(Field.create("a"));
    RecordWriter writer = mgr.getWriter(now, recordDate, record);
    Assert.assertNotNull(writer);
    for (int i = 0; i < 2; i++) {
        Assert.assertFalse(mgr.isOverThresholds(writer));
        writer.write(record);
        writer.flush();
    }
    Assert.assertTrue(mgr.isOverThresholds(writer));
    mgr.commitWriter(writer);
}

From source file:com.streamsets.pipeline.stage.destination.hdfs.writer.TestRecordWriterManager.java

License:Apache License

@Test
public void testThresholdSize() throws Exception {
    URI uri = new URI("file:///");
    Configuration conf = new HdfsConfiguration();
    String prefix = "prefix";
    String template = getTestDir().toString()
            + "/${YYYY()}/${MM()}/${DD()}/${hh()}/${mm()}/${ss()}/${record:value('/')}";
    TimeZone timeZone = TimeZone.getTimeZone("UTC");
    long cutOffSecs = 10;
    long cutOffSize = 4;
    long cutOffRecords = 20;
    HdfsFileType fileType = HdfsFileType.TEXT;
    DefaultCodec compressionCodec = new DefaultCodec();
    compressionCodec.setConf(conf);
    SequenceFile.CompressionType compressionType = null;
    String keyEL = null;//from ww  w.jav a2 s  . com
    DataGeneratorFactory generatorFactory = new DummyDataGeneratorFactory(null);
    RecordWriterManager mgr = new RecordWriterManager(uri, conf, prefix, template, timeZone, cutOffSecs,
            cutOffSize, cutOffRecords, fileType, compressionCodec, compressionType, keyEL, generatorFactory,
            targetContext, "dirPathTemplate");
    Assert.assertTrue(mgr.validateDirTemplate("g", "dirPathTemplate", new ArrayList<Stage.ConfigIssue>()));
    Date now = getFixedDate();

    Date recordDate = now;
    Record record = RecordCreator.create();
    record.set(Field.create("a"));
    RecordWriter writer = mgr.getWriter(now, recordDate, record);
    Assert.assertNotNull(writer);
    for (int i = 0; i < 2; i++) {
        Assert.assertFalse(mgr.isOverThresholds(writer));
        writer.write(record);
        writer.flush();
    }
    Assert.assertTrue(mgr.isOverThresholds(writer));
    mgr.commitWriter(writer);
}