Example usage for org.apache.hadoop.io.compress DefaultCodec DefaultCodec

List of usage examples for org.apache.hadoop.io.compress DefaultCodec DefaultCodec

Introduction

In this page you can find the example usage for org.apache.hadoop.io.compress DefaultCodec DefaultCodec.

Prototype

DefaultCodec

Source Link

Usage

From source file:com.facebook.presto.hive.HivePageSink.java

License:Apache License

private String generateRandomFileName(String outputFormat) {
    // text format files must have the correct extension when compressed
    String extension = "";
    if (HiveConf.getBoolVar(conf, COMPRESSRESULT)
            && HiveIgnoreKeyTextOutputFormat.class.getName().equals(outputFormat)) {
        extension = new DefaultCodec().getDefaultExtension();

        String compressionCodecClass = conf.get("mapred.output.compression.codec");
        if (compressionCodecClass != null) {
            try {
                Class<? extends CompressionCodec> codecClass = conf.getClassByName(compressionCodecClass)
                        .asSubclass(CompressionCodec.class);
                extension = ReflectionUtil.newInstance(codecClass, conf).getDefaultExtension();
            } catch (ClassNotFoundException e) {
                throw new PrestoException(HIVE_UNSUPPORTED_FORMAT,
                        "Compression codec not found: " + compressionCodecClass, e);
            } catch (RuntimeException e) {
                throw new PrestoException(HIVE_UNSUPPORTED_FORMAT,
                        "Failed to load compression codec: " + compressionCodecClass, e);
            }//from   w w  w . j  a  v  a  2 s  .  c  o m
        }
    }
    return filePrefix + "_" + randomUUID() + extension;
}

From source file:com.facebook.presto.hive.HiveWriterFactory.java

License:Apache License

public static String getFileExtension(JobConf conf, StorageFormat storageFormat) {
    // text format files must have the correct extension when compressed
    if (!HiveConf.getBoolVar(conf, COMPRESSRESULT)
            || !HiveIgnoreKeyTextOutputFormat.class.getName().equals(storageFormat.getOutputFormat())) {
        return "";
    }/*from   w  w  w  .ja  v  a  2 s  . co  m*/

    String compressionCodecClass = conf.get("mapred.output.compression.codec");
    if (compressionCodecClass == null) {
        return new DefaultCodec().getDefaultExtension();
    }

    try {
        Class<? extends CompressionCodec> codecClass = conf.getClassByName(compressionCodecClass)
                .asSubclass(CompressionCodec.class);
        return ReflectionUtil.newInstance(codecClass, conf).getDefaultExtension();
    } catch (ClassNotFoundException e) {
        throw new PrestoException(HIVE_UNSUPPORTED_FORMAT,
                "Compression codec not found: " + compressionCodecClass, e);
    } catch (RuntimeException e) {
        throw new PrestoException(HIVE_UNSUPPORTED_FORMAT,
                "Failed to load compression codec: " + compressionCodecClass, e);
    }
}

From source file:com.hdfs.concat.crush.CrushReducerParameterizedTest.java

License:Apache License

/**
 * Every file in this unit test is named "file" followed by a number. This method will create a sequence file with as many lines
 * as the number in the file name. The keys in the file will count from one to the number. The values in the file will count
 * from 100n + 1 to 100n + n. This way each file will have distinct contents so long as no two files have the same name.
 *//*  w  w  w  .ja  v  a 2  s. c om*/
private Text writeFile(File srcDir, String fileName, Format format) throws IOException {

    int fileNum = Integer.parseInt(fileName.substring(4));

    File file = new File(srcDir, fileName);

    if (Format.TEXT == format) {
        PrintWriter writer = new PrintWriter(new FileOutputStream(file));

        for (int k = 1, v = 100 * fileNum + 1; k <= fileNum; k++, v++) {
            writer.printf("%d\t%d\n", k, v);
        }

        writer.close();
    } else {
        CustomWritable key = new CustomWritable();
        CustomWritable value = new CustomWritable();

        DefaultCodec codec = new DefaultCodec();
        codec.setConf(job);

        Writer writer = SequenceFile.createWriter(fs, job, new Path(file.getAbsolutePath()),
                CustomWritable.class, CustomWritable.class, compressionType, codec);

        for (int k = 1, v = 100 * fileNum + 1; k <= fileNum; k++, v++) {
            key.set(k);
            value.set(v);

            writer.append(key, value);
        }

        writer.close();
    }

    return new Text(file.getAbsolutePath());
}

From source file:com.hdfs.concat.crush.integration.CrushMapReduceTest.java

License:Apache License

@Before
@Override/*  ww w.jav a  2  s. c om*/
public void setUp() throws Exception {
    super.setUp();

    job = createJobConf();

    job.setBoolean("mapred.output.compress", true);
    job.set("mapred.output.compression.type", CompressionType.BLOCK.name());
    job.set("mapred.output.compression.codec", CustomCompressionCodec.class.getName());

    FileSystem fs = getFileSystem();

    Path homeDirPath = fs.makeQualified(new Path("."));

    homeDir = homeDirPath.toUri().getPath();

    fs.delete(homeDirPath, true);

    defaultCodec = new DefaultCodec();
    defaultCodec.setConf(job);

    customCodec = new CustomCompressionCodec();
    customCodec.setConf(job);
}

From source file:com.m6d.filecrush.crush.integration.CrushMapReduceTest.java

License:Apache License

@Before
@Override/*w w  w.  ja  va2s.  co  m*/
public void setUp() throws Exception {
    super.setUp();

    job = createJobConf();

    job.setBoolean("mapreduce.output.fileoutputformat.compress", true);
    job.set("mapreduce.output.fileoutputformat.compress.type", CompressionType.BLOCK.name());
    job.set("mapreduce.output.fileoutputformat.compress.codec", CustomCompressionCodec.class.getName());

    FileSystem fs = getFileSystem();

    Path homeDirPath = fs.makeQualified(new Path("."));

    homeDir = homeDirPath.toUri().getPath();

    fs.delete(homeDirPath, true);

    defaultCodec = new DefaultCodec();
    defaultCodec.setConf(job);

    customCodec = new CustomCompressionCodec();
    customCodec.setConf(job);
}

From source file:com.myhp.hive.rcfile.RCFileGenerator.java

License:Apache License

private static void genData(String format, int numRows, String output, String plainOutput) throws Exception {
    int numFields = 0;
    if (format.equals("student")) {
        rand = new Random(numRows);
        numFields = 3;/*from w  w w  . j a  va2 s .  c om*/
    } else if (format.equals("voter")) {
        rand = new Random(1000000000 + numRows);
        numFields = 4;
    } else if (format.equals("alltypes")) {
        rand = new Random(2000000000L + numRows);
        numFields = 10;
    }

    RCFileOutputFormat.setColumnNumber(conf, numFields);
    RCFile.Writer writer = new RCFile.Writer(fs, conf, getFile(output), null, new DefaultCodec());

    PrintWriter pw = new PrintWriter(new FileWriter(plainOutput));

    for (int j = 0; j < numRows; j++) {
        BytesRefArrayWritable row = new BytesRefArrayWritable(numFields);

        byte[][] fields = null;

        if (format.equals("student")) {
            byte[][] f = { randomName().getBytes("UTF-8"),
                    Integer.valueOf(randomAge()).toString().getBytes("UTF-8"),
                    Double.valueOf(randomGpa()).toString().getBytes("UTF-8") };
            fields = f;
        } else if (format.equals("voter")) {
            byte[][] f = { randomName().getBytes("UTF-8"),
                    Integer.valueOf(randomAge()).toString().getBytes("UTF-8"),
                    randomRegistration().getBytes("UTF-8"),
                    Double.valueOf(randomContribution()).toString().getBytes("UTF-8") };
            fields = f;
        } else if (format.equals("alltypes")) {
            byte[][] f = { Integer.valueOf(rand.nextInt(Byte.MAX_VALUE)).toString().getBytes("UTF-8"),
                    Integer.valueOf(rand.nextInt(Short.MAX_VALUE)).toString().getBytes("UTF-8"),
                    Integer.valueOf(rand.nextInt()).toString().getBytes("UTF-8"),
                    Long.valueOf(rand.nextLong()).toString().getBytes("UTF-8"),
                    Float.valueOf(rand.nextFloat() * 1000).toString().getBytes("UTF-8"),
                    Double.valueOf(rand.nextDouble() * 1000000).toString().getBytes("UTF-8"),
                    randomName().getBytes("UTF-8"), randomMap(), randomArray() };
            fields = f;
        }

        for (int i = 0; i < fields.length; i++) {
            BytesRefWritable field = new BytesRefWritable(fields[i], 0, fields[i].length);
            row.set(i, field);
            pw.print(new String(fields[i]));
            if (i != fields.length - 1)
                pw.print("\t");
            else
                pw.println();
        }

        writer.append(row);
    }

    writer.close();
    pw.close();
}

From source file:com.streamsets.pipeline.stage.destination.hdfs.writer.TestActiveRecordWriters.java

License:Apache License

@Test
public void testWritersLifecycle() throws Exception {
    URI uri = new URI("file:///");
    Configuration conf = new HdfsConfiguration();
    String prefix = "prefix";
    String template = getTestDir().toString()
            + "/${YYYY()}/${MM()}/${DD()}/${hh()}/${mm()}/${ss()}/${record:value('/')}";
    TimeZone timeZone = TimeZone.getTimeZone("UTC");
    long cutOffSecs = 2;
    long cutOffSize = 10000;
    long cutOffRecords = 2;
    HdfsFileType fileType = HdfsFileType.SEQUENCE_FILE;
    DefaultCodec compressionCodec = new DefaultCodec();
    compressionCodec.setConf(conf);/*from w  w  w .  ja  v a 2  s.c  o  m*/
    SequenceFile.CompressionType compressionType = SequenceFile.CompressionType.BLOCK;
    String keyEL = "uuid()";
    DataGeneratorFactory generatorFactory = new DummyDataGeneratorFactory(null);
    RecordWriterManager mgr = new RecordWriterManager(uri, conf, prefix, template, timeZone, cutOffSecs,
            cutOffSize, cutOffRecords, fileType, compressionCodec, compressionType, keyEL, generatorFactory,
            ContextInfoCreator.createTargetContext(HdfsDTarget.class, "testWritersLifecycle", false,
                    OnRecordError.TO_ERROR, null),
            "dirPathTemplate");
    Assert.assertTrue(mgr.validateDirTemplate("g", "dirPathTemplate", new ArrayList<Stage.ConfigIssue>()));
    ActiveRecordWriters writers = new ActiveRecordWriters(mgr);

    Date now = new Date();

    // record older than cut off
    Date recordDate = new Date(now.getTime() - 3 * 1000 - 1);
    Record record = RecordCreator.create();
    record.set(Field.create("a"));
    Assert.assertNull(writers.get(now, recordDate, record));

    recordDate = new Date(now.getTime());
    RecordWriter writer = writers.get(now, recordDate, record);
    Assert.assertNotNull(writer);
    Path tempPath = writer.getPath();
    writer.write(record);
    writers.release(writer);
    //writer should still be open
    Assert.assertFalse(writer.isClosed());

    writer = writers.get(now, recordDate, record);
    writer.write(record);
    writers.release(writer);
    //writer should be close because of going over record count threshold
    Assert.assertTrue(writer.isClosed());

    //we should be able to get a new writer as the cutoff didn't kick in yet
    writer = writers.get(now, recordDate, record);
    Assert.assertNotNull(writer);
    writers.purge();
    //purging should not close the writer as the cutoff didn't kick in yet
    Assert.assertFalse(writer.isClosed());

    Thread.sleep(3001);
    writers.purge();
    //purging should  close the writer as the cutoff kicked in yet
    Assert.assertTrue(writer.isClosed());

    //verifying closeAll() closes writers
    writer = writers.get(new Date(), new Date(), record);
    Assert.assertNotNull(writer);
    writers.closeAll();
    Assert.assertTrue(writer.isClosed());
}

From source file:com.streamsets.pipeline.stage.destination.hdfs.writer.TestRecordWriterManager.java

License:Apache License

@Test
public void testCompressionCodec() throws Exception {
    testPath(new DefaultCodec());
}

From source file:com.streamsets.pipeline.stage.destination.hdfs.writer.TestRecordWriterManager.java

License:Apache License

@Test
public void testTextFileCompression() throws Exception {
    DefaultCodec codec = new DefaultCodec();
    codec.setConf(new Configuration());
    testTextFile(codec);/*from ww w .j a v  a  2  s . co  m*/
}

From source file:com.streamsets.pipeline.stage.destination.hdfs.writer.TestRecordWriterManager.java

License:Apache License

@Test
public void testSeqFileCompression() throws Exception {
    DefaultCodec codec = new DefaultCodec();
    codec.setConf(new Configuration());
    testSeqFile(codec, SequenceFile.CompressionType.RECORD);
    testSeqFile(codec, SequenceFile.CompressionType.BLOCK);
}