List of usage examples for org.apache.hadoop.mapred JobConf JobConf
public JobConf()
From source file:com.facebook.presto.hive.BenchmarkHiveFileFormats.java
License:Apache License
public static RecordWriter createRecordWriter(List<? extends TpchColumn<?>> columns, File outputFile, HiveOutputFormat<?, ?> outputFormat, CompressionType compressionCodec) throws Exception { JobConf jobConf = new JobConf(); ReaderWriterProfiler.setProfilerOptions(jobConf); if (compressionCodec != CompressionType.none) { CompressionCodec codec = new CompressionCodecFactory(new Configuration()) .getCodecByName(compressionCodec.toString()); jobConf.set(COMPRESS_CODEC, codec.getClass().getName()); jobConf.set(COMPRESS_TYPE, org.apache.hadoop.io.SequenceFile.CompressionType.BLOCK.toString()); jobConf.set("parquet.compression", compressionCodec.toString()); jobConf.set("parquet.enable.dictionary", "true"); switch (compressionCodec) { case gzip: jobConf.set("hive.exec.orc.default.compress", "ZLIB"); jobConf.set("hive.exec.orc.compress", "ZLIB"); break; case snappy: jobConf.set("hive.exec.orc.default.compress", "SNAPPY"); jobConf.set("hive.exec.orc.compress", "SNAPPY"); break; default://from w ww. j a v a 2 s . c o m throw new IllegalArgumentException("Unsupported compression codec: " + compressionCodec); } } else { jobConf.set("parquet.enable.dictionary", "true"); jobConf.set("hive.exec.orc.default.compress", "NONE"); jobConf.set("hive.exec.orc.compress", "NONE"); } RecordWriter recordWriter = outputFormat.getHiveRecordWriter(jobConf, new Path(outputFile.toURI()), Text.class, compressionCodec != CompressionType.none, createTableProperties(columns), new Progressable() { @Override public void progress() { } }); return recordWriter; }
From source file:com.facebook.presto.hive.parquet.ParquetTester.java
License:Apache License
public void assertRoundTrip(ObjectInspector objectInspector, Iterable<?> writeValues, Iterable<?> readValues, Type type) throws Exception { for (WriterVersion version : versions) { for (CompressionCodecName compressionCodecName : compressions) { try (TempFile tempFile = new TempFile("test", "parquet")) { JobConf jobConf = new JobConf(); jobConf.setEnum(ParquetOutputFormat.COMPRESSION, compressionCodecName); jobConf.setBoolean(ParquetOutputFormat.ENABLE_DICTIONARY, true); jobConf.setEnum(ParquetOutputFormat.WRITER_VERSION, version); writeParquetColumn(jobConf, tempFile.getFile(), compressionCodecName, objectInspector, writeValues.iterator()); assertFileContents(jobConf, objectInspector, tempFile, readValues, type); }/*from w w w . ja va 2 s. c o m*/ } } }
From source file:com.facebook.presto.hive.TestHiveFileFormats.java
License:Apache License
@Test public void testRCText() throws Exception { JobConf jobConf = new JobConf(); RCFileOutputFormat outputFormat = new RCFileOutputFormat(); @SuppressWarnings("rawtypes") RCFileInputFormat inputFormat = new RCFileInputFormat(); @SuppressWarnings("deprecation") SerDe serde = new ColumnarSerDe(); File file = File.createTempFile("presto_test", "rc-text"); try {/*www .j ava 2 s. co m*/ FileSplit split = createTestFile(file.getAbsolutePath(), outputFormat, serde, null); @SuppressWarnings("unchecked") RecordReader<?, BytesRefArrayWritable> recordReader = (RecordReader<?, BytesRefArrayWritable>) inputFormat .getRecordReader(split, jobConf, Reporter.NULL); Properties splitProperties = new Properties(); splitProperties.setProperty("serialization.lib", "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe"); splitProperties.setProperty("columns", COLUMN_NAMES_STRING); splitProperties.setProperty("columns.types", COLUMN_TYPES); RecordCursor cursor = new ColumnarTextHiveRecordCursor<>(recordReader, split.getLength(), splitProperties, new ArrayList<HivePartitionKey>(), getColumns(), DateTimeZone.getDefault(), DateTimeZone.getDefault()); checkCursor(cursor); } finally { //noinspection ResultOfMethodCallIgnored file.delete(); } }
From source file:com.facebook.presto.hive.TestHiveFileFormats.java
License:Apache License
@Test public void testRCBinary() throws Exception { JobConf jobConf = new JobConf(); RCFileOutputFormat outputFormat = new RCFileOutputFormat(); @SuppressWarnings("rawtypes") RCFileInputFormat inputFormat = new RCFileInputFormat(); @SuppressWarnings("deprecation") SerDe serde = new LazyBinaryColumnarSerDe(); File file = File.createTempFile("presto_test", "rc-binary"); try {//from w w w .j a va 2s. com FileSplit split = createTestFile(file.getAbsolutePath(), outputFormat, serde, null); @SuppressWarnings("unchecked") RecordReader<?, BytesRefArrayWritable> recordReader = (RecordReader<?, BytesRefArrayWritable>) inputFormat .getRecordReader(split, jobConf, Reporter.NULL); Properties splitProperties = new Properties(); splitProperties.setProperty("serialization.lib", "org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe"); splitProperties.setProperty("columns", COLUMN_NAMES_STRING); splitProperties.setProperty("columns.types", COLUMN_TYPES); RecordCursor cursor = new ColumnarBinaryHiveRecordCursor<>(recordReader, split.getLength(), splitProperties, new ArrayList<HivePartitionKey>(), getColumns(), DateTimeZone.getDefault()); checkCursor(cursor); } finally { //noinspection ResultOfMethodCallIgnored file.delete(); } }
From source file:com.facebook.presto.hive.TestOrcPageSourceMemoryTracking.java
License:Apache License
public static FileSplit createTestFile(String filePath, HiveOutputFormat<?, ?> outputFormat, @SuppressWarnings("deprecation") SerDe serDe, String compressionCodec, List<TestColumn> testColumns, int numRows) throws Exception { // filter out partition keys, which are not written to the file testColumns = ImmutableList.copyOf(filter(testColumns, not(TestColumn::isPartitionKey))); JobConf jobConf = new JobConf(); ReaderWriterProfiler.setProfilerOptions(jobConf); Properties tableProperties = new Properties(); tableProperties.setProperty("columns", Joiner.on(',').join(transform(testColumns, TestColumn::getName))); tableProperties.setProperty("columns.types", Joiner.on(',').join(transform(testColumns, TestColumn::getType))); serDe.initialize(CONFIGURATION, tableProperties); if (compressionCodec != null) { CompressionCodec codec = new CompressionCodecFactory(CONFIGURATION).getCodecByName(compressionCodec); jobConf.set(COMPRESS_CODEC, codec.getClass().getName()); jobConf.set(COMPRESS_TYPE, SequenceFile.CompressionType.BLOCK.toString()); }/* w w w. ja va 2 s. co m*/ RecordWriter recordWriter = createRecordWriter(new Path(filePath), CONFIGURATION); try { SettableStructObjectInspector objectInspector = getStandardStructObjectInspector( ImmutableList.copyOf(transform(testColumns, TestColumn::getName)), ImmutableList.copyOf(transform(testColumns, TestColumn::getObjectInspector))); Object row = objectInspector.create(); List<StructField> fields = ImmutableList.copyOf(objectInspector.getAllStructFieldRefs()); for (int rowNumber = 0; rowNumber < numRows; rowNumber++) { for (int i = 0; i < testColumns.size(); i++) { Object writeValue = testColumns.get(i).getWriteValue(); if (writeValue instanceof Slice) { writeValue = ((Slice) writeValue).getBytes(); } objectInspector.setStructFieldData(row, fields.get(i), writeValue); } Writable record = serDe.serialize(row, objectInspector); recordWriter.write(record); if (rowNumber % STRIPE_ROWS == STRIPE_ROWS - 1) { flushStripe(recordWriter); } } } finally { recordWriter.close(false); } Path path = new Path(filePath); path.getFileSystem(CONFIGURATION).setVerifyChecksum(true); File file = new File(filePath); return new FileSplit(path, 0, file.length(), new String[0]); }
From source file:com.facebook.presto.hive.TestS3SelectPushdown.java
License:Apache License
@BeforeClass public void setUp() { inputFormat = new TextInputFormat(); inputFormat.configure(new JobConf()); }
From source file:com.facebook.presto.orc.OrcTester.java
License:Apache License
static RecordWriter createOrcRecordWriter(File outputFile, Format format, Compression compression, ObjectInspector columnObjectInspector) throws IOException { JobConf jobConf = new JobConf(); jobConf.set("hive.exec.orc.write.format", format == ORC_12 ? "0.12" : "0.11"); jobConf.set("hive.exec.orc.default.compress", compression.name()); ReaderWriterProfiler.setProfilerOptions(jobConf); return new OrcOutputFormat().getHiveRecordWriter(jobConf, new Path(outputFile.toURI()), Text.class, compression != NONE, createTableProperties("test", columnObjectInspector.getTypeName()), () -> { });//from ww w . j av a2 s .c o m }
From source file:com.facebook.presto.orc.OrcTester.java
License:Apache License
private static RecordWriter createDwrfRecordWriter(File outputFile, Compression compressionCodec, ObjectInspector columnObjectInspector) throws IOException { JobConf jobConf = new JobConf(); jobConf.set("hive.exec.orc.default.compress", compressionCodec.name()); jobConf.set("hive.exec.orc.compress", compressionCodec.name()); OrcConf.setIntVar(jobConf, OrcConf.ConfVars.HIVE_ORC_ENTROPY_STRING_THRESHOLD, 1); OrcConf.setIntVar(jobConf, OrcConf.ConfVars.HIVE_ORC_DICTIONARY_ENCODING_INTERVAL, 2); OrcConf.setBoolVar(jobConf, OrcConf.ConfVars.HIVE_ORC_BUILD_STRIDE_DICTIONARY, true); ReaderWriterProfiler.setProfilerOptions(jobConf); return new com.facebook.hive.orc.OrcOutputFormat().getHiveRecordWriter(jobConf, new Path(outputFile.toURI()), Text.class, compressionCodec != NONE, createTableProperties("test", columnObjectInspector.getTypeName()), () -> { });//from w w w .j av a2 s .c o m }
From source file:com.facebook.presto.orc.TestCachingOrcDataSource.java
License:Apache License
private static FileSinkOperator.RecordWriter createOrcRecordWriter(File outputFile, Format format, Compression compression, ObjectInspector columnObjectInspector) throws IOException { JobConf jobConf = new JobConf(); jobConf.set("hive.exec.orc.write.format", format == ORC_12 ? "0.12" : "0.11"); jobConf.set("hive.exec.orc.default.compress", compression.name()); ReaderWriterProfiler.setProfilerOptions(jobConf); Properties tableProperties = new Properties(); tableProperties.setProperty("columns", "test"); tableProperties.setProperty("columns.types", columnObjectInspector.getTypeName()); tableProperties.setProperty("orc.stripe.size", "1200000"); return new OrcOutputFormat().getHiveRecordWriter(jobConf, new Path(outputFile.toURI()), Text.class, compression != NONE, tableProperties, () -> { });/*from ww w . j a v a2 s . c o m*/ }
From source file:com.facebook.presto.raptor.storage.OrcRowSink.java
License:Apache License
private static JobConf createJobConf() { JobConf jobConf = new JobConf(); jobConf.setClassLoader(JobConf.class.getClassLoader()); return new JobConf(); }