Example usage for org.apache.hadoop.mapred JobConf JobConf

List of usage examples for org.apache.hadoop.mapred JobConf JobConf

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf JobConf.

Prototype

public JobConf() 

Source Link

Document

Construct a map/reduce job configuration.

Usage

From source file:com.facebook.presto.hive.BenchmarkHiveFileFormats.java

License:Apache License

public static RecordWriter createRecordWriter(List<? extends TpchColumn<?>> columns, File outputFile,
        HiveOutputFormat<?, ?> outputFormat, CompressionType compressionCodec) throws Exception {
    JobConf jobConf = new JobConf();
    ReaderWriterProfiler.setProfilerOptions(jobConf);
    if (compressionCodec != CompressionType.none) {
        CompressionCodec codec = new CompressionCodecFactory(new Configuration())
                .getCodecByName(compressionCodec.toString());
        jobConf.set(COMPRESS_CODEC, codec.getClass().getName());
        jobConf.set(COMPRESS_TYPE, org.apache.hadoop.io.SequenceFile.CompressionType.BLOCK.toString());
        jobConf.set("parquet.compression", compressionCodec.toString());
        jobConf.set("parquet.enable.dictionary", "true");
        switch (compressionCodec) {
        case gzip:
            jobConf.set("hive.exec.orc.default.compress", "ZLIB");
            jobConf.set("hive.exec.orc.compress", "ZLIB");
            break;
        case snappy:
            jobConf.set("hive.exec.orc.default.compress", "SNAPPY");
            jobConf.set("hive.exec.orc.compress", "SNAPPY");
            break;
        default://from   w ww.  j a  v  a  2  s . c o m
            throw new IllegalArgumentException("Unsupported compression codec: " + compressionCodec);
        }
    } else {
        jobConf.set("parquet.enable.dictionary", "true");
        jobConf.set("hive.exec.orc.default.compress", "NONE");
        jobConf.set("hive.exec.orc.compress", "NONE");
    }

    RecordWriter recordWriter = outputFormat.getHiveRecordWriter(jobConf, new Path(outputFile.toURI()),
            Text.class, compressionCodec != CompressionType.none, createTableProperties(columns),
            new Progressable() {
                @Override
                public void progress() {
                }
            });

    return recordWriter;
}

From source file:com.facebook.presto.hive.parquet.ParquetTester.java

License:Apache License

public void assertRoundTrip(ObjectInspector objectInspector, Iterable<?> writeValues, Iterable<?> readValues,
        Type type) throws Exception {
    for (WriterVersion version : versions) {
        for (CompressionCodecName compressionCodecName : compressions) {
            try (TempFile tempFile = new TempFile("test", "parquet")) {
                JobConf jobConf = new JobConf();
                jobConf.setEnum(ParquetOutputFormat.COMPRESSION, compressionCodecName);
                jobConf.setBoolean(ParquetOutputFormat.ENABLE_DICTIONARY, true);
                jobConf.setEnum(ParquetOutputFormat.WRITER_VERSION, version);
                writeParquetColumn(jobConf, tempFile.getFile(), compressionCodecName, objectInspector,
                        writeValues.iterator());
                assertFileContents(jobConf, objectInspector, tempFile, readValues, type);
            }/*from w w w  .  ja va  2 s. c  o m*/
        }
    }
}

From source file:com.facebook.presto.hive.TestHiveFileFormats.java

License:Apache License

@Test
public void testRCText() throws Exception {
    JobConf jobConf = new JobConf();
    RCFileOutputFormat outputFormat = new RCFileOutputFormat();
    @SuppressWarnings("rawtypes")
    RCFileInputFormat inputFormat = new RCFileInputFormat();
    @SuppressWarnings("deprecation")
    SerDe serde = new ColumnarSerDe();
    File file = File.createTempFile("presto_test", "rc-text");
    try {/*www .j ava 2 s. co m*/
        FileSplit split = createTestFile(file.getAbsolutePath(), outputFormat, serde, null);
        @SuppressWarnings("unchecked")
        RecordReader<?, BytesRefArrayWritable> recordReader = (RecordReader<?, BytesRefArrayWritable>) inputFormat
                .getRecordReader(split, jobConf, Reporter.NULL);
        Properties splitProperties = new Properties();
        splitProperties.setProperty("serialization.lib",
                "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe");
        splitProperties.setProperty("columns", COLUMN_NAMES_STRING);
        splitProperties.setProperty("columns.types", COLUMN_TYPES);
        RecordCursor cursor = new ColumnarTextHiveRecordCursor<>(recordReader, split.getLength(),
                splitProperties, new ArrayList<HivePartitionKey>(), getColumns(), DateTimeZone.getDefault(),
                DateTimeZone.getDefault());

        checkCursor(cursor);
    } finally {
        //noinspection ResultOfMethodCallIgnored
        file.delete();
    }
}

From source file:com.facebook.presto.hive.TestHiveFileFormats.java

License:Apache License

@Test
public void testRCBinary() throws Exception {
    JobConf jobConf = new JobConf();
    RCFileOutputFormat outputFormat = new RCFileOutputFormat();
    @SuppressWarnings("rawtypes")
    RCFileInputFormat inputFormat = new RCFileInputFormat();
    @SuppressWarnings("deprecation")
    SerDe serde = new LazyBinaryColumnarSerDe();
    File file = File.createTempFile("presto_test", "rc-binary");
    try {//from   w w  w .j  a  va  2s. com
        FileSplit split = createTestFile(file.getAbsolutePath(), outputFormat, serde, null);
        @SuppressWarnings("unchecked")
        RecordReader<?, BytesRefArrayWritable> recordReader = (RecordReader<?, BytesRefArrayWritable>) inputFormat
                .getRecordReader(split, jobConf, Reporter.NULL);
        Properties splitProperties = new Properties();
        splitProperties.setProperty("serialization.lib",
                "org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe");
        splitProperties.setProperty("columns", COLUMN_NAMES_STRING);
        splitProperties.setProperty("columns.types", COLUMN_TYPES);
        RecordCursor cursor = new ColumnarBinaryHiveRecordCursor<>(recordReader, split.getLength(),
                splitProperties, new ArrayList<HivePartitionKey>(), getColumns(), DateTimeZone.getDefault());

        checkCursor(cursor);
    } finally {
        //noinspection ResultOfMethodCallIgnored
        file.delete();
    }
}

From source file:com.facebook.presto.hive.TestOrcPageSourceMemoryTracking.java

License:Apache License

public static FileSplit createTestFile(String filePath, HiveOutputFormat<?, ?> outputFormat,
        @SuppressWarnings("deprecation") SerDe serDe, String compressionCodec, List<TestColumn> testColumns,
        int numRows) throws Exception {
    // filter out partition keys, which are not written to the file
    testColumns = ImmutableList.copyOf(filter(testColumns, not(TestColumn::isPartitionKey)));

    JobConf jobConf = new JobConf();
    ReaderWriterProfiler.setProfilerOptions(jobConf);

    Properties tableProperties = new Properties();
    tableProperties.setProperty("columns", Joiner.on(',').join(transform(testColumns, TestColumn::getName)));
    tableProperties.setProperty("columns.types",
            Joiner.on(',').join(transform(testColumns, TestColumn::getType)));
    serDe.initialize(CONFIGURATION, tableProperties);

    if (compressionCodec != null) {
        CompressionCodec codec = new CompressionCodecFactory(CONFIGURATION).getCodecByName(compressionCodec);
        jobConf.set(COMPRESS_CODEC, codec.getClass().getName());
        jobConf.set(COMPRESS_TYPE, SequenceFile.CompressionType.BLOCK.toString());
    }/*  w w w.  ja  va 2  s. co  m*/

    RecordWriter recordWriter = createRecordWriter(new Path(filePath), CONFIGURATION);

    try {
        SettableStructObjectInspector objectInspector = getStandardStructObjectInspector(
                ImmutableList.copyOf(transform(testColumns, TestColumn::getName)),
                ImmutableList.copyOf(transform(testColumns, TestColumn::getObjectInspector)));

        Object row = objectInspector.create();

        List<StructField> fields = ImmutableList.copyOf(objectInspector.getAllStructFieldRefs());

        for (int rowNumber = 0; rowNumber < numRows; rowNumber++) {
            for (int i = 0; i < testColumns.size(); i++) {
                Object writeValue = testColumns.get(i).getWriteValue();
                if (writeValue instanceof Slice) {
                    writeValue = ((Slice) writeValue).getBytes();
                }
                objectInspector.setStructFieldData(row, fields.get(i), writeValue);
            }

            Writable record = serDe.serialize(row, objectInspector);
            recordWriter.write(record);
            if (rowNumber % STRIPE_ROWS == STRIPE_ROWS - 1) {
                flushStripe(recordWriter);
            }
        }
    } finally {
        recordWriter.close(false);
    }

    Path path = new Path(filePath);
    path.getFileSystem(CONFIGURATION).setVerifyChecksum(true);
    File file = new File(filePath);
    return new FileSplit(path, 0, file.length(), new String[0]);
}

From source file:com.facebook.presto.hive.TestS3SelectPushdown.java

License:Apache License

@BeforeClass
public void setUp() {
    inputFormat = new TextInputFormat();
    inputFormat.configure(new JobConf());
}

From source file:com.facebook.presto.orc.OrcTester.java

License:Apache License

static RecordWriter createOrcRecordWriter(File outputFile, Format format, Compression compression,
        ObjectInspector columnObjectInspector) throws IOException {
    JobConf jobConf = new JobConf();
    jobConf.set("hive.exec.orc.write.format", format == ORC_12 ? "0.12" : "0.11");
    jobConf.set("hive.exec.orc.default.compress", compression.name());
    ReaderWriterProfiler.setProfilerOptions(jobConf);

    return new OrcOutputFormat().getHiveRecordWriter(jobConf, new Path(outputFile.toURI()), Text.class,
            compression != NONE, createTableProperties("test", columnObjectInspector.getTypeName()), () -> {
            });//from   ww w  .  j  av  a2 s .c o m
}

From source file:com.facebook.presto.orc.OrcTester.java

License:Apache License

private static RecordWriter createDwrfRecordWriter(File outputFile, Compression compressionCodec,
        ObjectInspector columnObjectInspector) throws IOException {
    JobConf jobConf = new JobConf();
    jobConf.set("hive.exec.orc.default.compress", compressionCodec.name());
    jobConf.set("hive.exec.orc.compress", compressionCodec.name());
    OrcConf.setIntVar(jobConf, OrcConf.ConfVars.HIVE_ORC_ENTROPY_STRING_THRESHOLD, 1);
    OrcConf.setIntVar(jobConf, OrcConf.ConfVars.HIVE_ORC_DICTIONARY_ENCODING_INTERVAL, 2);
    OrcConf.setBoolVar(jobConf, OrcConf.ConfVars.HIVE_ORC_BUILD_STRIDE_DICTIONARY, true);
    ReaderWriterProfiler.setProfilerOptions(jobConf);

    return new com.facebook.hive.orc.OrcOutputFormat().getHiveRecordWriter(jobConf,
            new Path(outputFile.toURI()), Text.class, compressionCodec != NONE,
            createTableProperties("test", columnObjectInspector.getTypeName()), () -> {
            });//from w w  w .j  av a2 s .c o  m
}

From source file:com.facebook.presto.orc.TestCachingOrcDataSource.java

License:Apache License

private static FileSinkOperator.RecordWriter createOrcRecordWriter(File outputFile, Format format,
        Compression compression, ObjectInspector columnObjectInspector) throws IOException {
    JobConf jobConf = new JobConf();
    jobConf.set("hive.exec.orc.write.format", format == ORC_12 ? "0.12" : "0.11");
    jobConf.set("hive.exec.orc.default.compress", compression.name());
    ReaderWriterProfiler.setProfilerOptions(jobConf);

    Properties tableProperties = new Properties();
    tableProperties.setProperty("columns", "test");
    tableProperties.setProperty("columns.types", columnObjectInspector.getTypeName());
    tableProperties.setProperty("orc.stripe.size", "1200000");

    return new OrcOutputFormat().getHiveRecordWriter(jobConf, new Path(outputFile.toURI()), Text.class,
            compression != NONE, tableProperties, () -> {
            });/*from   ww  w  . j a v a2 s .  c o m*/
}

From source file:com.facebook.presto.raptor.storage.OrcRowSink.java

License:Apache License

private static JobConf createJobConf() {
    JobConf jobConf = new JobConf();
    jobConf.setClassLoader(JobConf.class.getClassLoader());
    return new JobConf();
}