Example usage for org.apache.hadoop.mapred JobConf set

List of usage examples for org.apache.hadoop.mapred JobConf set

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf set.

Prototype

public void set(String name, String value) 

Source Link

Document

Set the value of the name property.

Usage

From source file:com.facebook.presto.hive.TestOrcPageSourceMemoryTracking.java

License:Apache License

public static FileSplit createTestFile(String filePath, HiveOutputFormat<?, ?> outputFormat,
        @SuppressWarnings("deprecation") SerDe serDe, String compressionCodec, List<TestColumn> testColumns,
        int numRows) throws Exception {
    // filter out partition keys, which are not written to the file
    testColumns = ImmutableList.copyOf(filter(testColumns, not(TestColumn::isPartitionKey)));

    JobConf jobConf = new JobConf();
    ReaderWriterProfiler.setProfilerOptions(jobConf);

    Properties tableProperties = new Properties();
    tableProperties.setProperty("columns", Joiner.on(',').join(transform(testColumns, TestColumn::getName)));
    tableProperties.setProperty("columns.types",
            Joiner.on(',').join(transform(testColumns, TestColumn::getType)));
    serDe.initialize(CONFIGURATION, tableProperties);

    if (compressionCodec != null) {
        CompressionCodec codec = new CompressionCodecFactory(CONFIGURATION).getCodecByName(compressionCodec);
        jobConf.set(COMPRESS_CODEC, codec.getClass().getName());
        jobConf.set(COMPRESS_TYPE, SequenceFile.CompressionType.BLOCK.toString());
    }/* w  w  w . j  a  v a 2 s. c  o m*/

    RecordWriter recordWriter = createRecordWriter(new Path(filePath), CONFIGURATION);

    try {
        SettableStructObjectInspector objectInspector = getStandardStructObjectInspector(
                ImmutableList.copyOf(transform(testColumns, TestColumn::getName)),
                ImmutableList.copyOf(transform(testColumns, TestColumn::getObjectInspector)));

        Object row = objectInspector.create();

        List<StructField> fields = ImmutableList.copyOf(objectInspector.getAllStructFieldRefs());

        for (int rowNumber = 0; rowNumber < numRows; rowNumber++) {
            for (int i = 0; i < testColumns.size(); i++) {
                Object writeValue = testColumns.get(i).getWriteValue();
                if (writeValue instanceof Slice) {
                    writeValue = ((Slice) writeValue).getBytes();
                }
                objectInspector.setStructFieldData(row, fields.get(i), writeValue);
            }

            Writable record = serDe.serialize(row, objectInspector);
            recordWriter.write(record);
            if (rowNumber % STRIPE_ROWS == STRIPE_ROWS - 1) {
                flushStripe(recordWriter);
            }
        }
    } finally {
        recordWriter.close(false);
    }

    Path path = new Path(filePath);
    path.getFileSystem(CONFIGURATION).setVerifyChecksum(true);
    File file = new File(filePath);
    return new FileSplit(path, 0, file.length(), new String[0]);
}

From source file:com.facebook.presto.orc.OrcTester.java

License:Apache License

static RecordWriter createOrcRecordWriter(File outputFile, Format format, Compression compression,
        ObjectInspector columnObjectInspector) throws IOException {
    JobConf jobConf = new JobConf();
    jobConf.set("hive.exec.orc.write.format", format == ORC_12 ? "0.12" : "0.11");
    jobConf.set("hive.exec.orc.default.compress", compression.name());
    ReaderWriterProfiler.setProfilerOptions(jobConf);

    return new OrcOutputFormat().getHiveRecordWriter(jobConf, new Path(outputFile.toURI()), Text.class,
            compression != NONE, createTableProperties("test", columnObjectInspector.getTypeName()), () -> {
            });/*from   w w  w  . ja  va  2s.  com*/
}

From source file:com.facebook.presto.orc.OrcTester.java

License:Apache License

private static RecordWriter createDwrfRecordWriter(File outputFile, Compression compressionCodec,
        ObjectInspector columnObjectInspector) throws IOException {
    JobConf jobConf = new JobConf();
    jobConf.set("hive.exec.orc.default.compress", compressionCodec.name());
    jobConf.set("hive.exec.orc.compress", compressionCodec.name());
    OrcConf.setIntVar(jobConf, OrcConf.ConfVars.HIVE_ORC_ENTROPY_STRING_THRESHOLD, 1);
    OrcConf.setIntVar(jobConf, OrcConf.ConfVars.HIVE_ORC_DICTIONARY_ENCODING_INTERVAL, 2);
    OrcConf.setBoolVar(jobConf, OrcConf.ConfVars.HIVE_ORC_BUILD_STRIDE_DICTIONARY, true);
    ReaderWriterProfiler.setProfilerOptions(jobConf);

    return new com.facebook.hive.orc.OrcOutputFormat().getHiveRecordWriter(jobConf,
            new Path(outputFile.toURI()), Text.class, compressionCodec != NONE,
            createTableProperties("test", columnObjectInspector.getTypeName()), () -> {
            });/*from ww w . j  av  a  2  s  . c o  m*/
}

From source file:com.facebook.presto.orc.TestCachingOrcDataSource.java

License:Apache License

private static FileSinkOperator.RecordWriter createOrcRecordWriter(File outputFile, Format format,
        Compression compression, ObjectInspector columnObjectInspector) throws IOException {
    JobConf jobConf = new JobConf();
    jobConf.set("hive.exec.orc.write.format", format == ORC_12 ? "0.12" : "0.11");
    jobConf.set("hive.exec.orc.default.compress", compression.name());
    ReaderWriterProfiler.setProfilerOptions(jobConf);

    Properties tableProperties = new Properties();
    tableProperties.setProperty("columns", "test");
    tableProperties.setProperty("columns.types", columnObjectInspector.getTypeName());
    tableProperties.setProperty("orc.stripe.size", "1200000");

    return new OrcOutputFormat().getHiveRecordWriter(jobConf, new Path(outputFile.toURI()), Text.class,
            compression != NONE, tableProperties, () -> {
            });//www.  j a  va  2  s .  com
}

From source file:com.facebook.presto.rcfile.RcFileTester.java

License:Apache License

private static RecordWriter createRcFileWriterOld(File outputFile, Compression compression,
        ObjectInspector columnObjectInspector) throws IOException {
    JobConf jobConf = new JobConf(false);
    Optional<String> codecName = compression.getCodecName();
    if (codecName.isPresent()) {
        jobConf.set(COMPRESS_CODEC, codecName.get());
    }/*from w  w w .j a  va 2 s.  c om*/

    return new RCFileOutputFormat().getHiveRecordWriter(jobConf, new Path(outputFile.toURI()), Text.class,
            codecName.isPresent(), createTableProperties("test", columnObjectInspector.getTypeName()), () -> {
            });
}

From source file:com.firewallid.io.HBaseWrite.java

public void save(String tableName, JavaPairRDD<String, String> savePairRDD, String destColumn)
        throws IOException {
    /* Check hbase table */
    if (!HBaseTableUtils.istableExists(tableName)) {
        throw new TableNotFoundException();
    }// w  w  w .  j  ava  2 s  .  com

    /* Check column family */
    if (!HBaseTableUtils.isFamilyExists(tableName, destColumn.split(":")[0])) {
        throw new NoSuchColumnFamilyException();
    }

    /* Save to HBase */
    JobConf jobConf = new JobConf();
    jobConf.setOutputFormat(TableOutputFormat.class);
    jobConf.set(TableOutputFormat.OUTPUT_TABLE, tableName);

    savePairRDD.mapToPair((Tuple2<String, String> t) -> convertRowToPut(t._1, destColumn, t._2))
            .filter((Tuple2<ImmutableBytesWritable, Put> t1) -> t1 != null).saveAsHadoopDataset(jobConf);
}

From source file:com.firewallid.io.HBaseWrite.java

public void save(String tableName, JavaPairRDD<String, Map<String, String>> savePairRDD,
        List<String> destFamilys) throws IOException {
    /* Check hbase table */
    if (!HBaseTableUtils.istableExists(tableName)) {
        throw new TableNotFoundException();
    }/*ww w.  j  a v  a 2s.  co m*/

    /* Check column family */
    if (!HBaseTableUtils.isFamilyExists(tableName, destFamilys)) {
        throw new NoSuchColumnFamilyException();
    }

    /* Save to HBase */
    JobConf jobConf = new JobConf();
    jobConf.setOutputFormat(TableOutputFormat.class);
    jobConf.set(TableOutputFormat.OUTPUT_TABLE, tableName);

    savePairRDD.mapToPair((Tuple2<String, Map<String, String>> t) -> convertRowToPut(t))
            .filter((Tuple2<ImmutableBytesWritable, Put> t1) -> t1 != null).saveAsHadoopDataset(jobConf);
}

From source file:com.github.dryangkun.hbase.tidx.hive.HBaseStorageHandler.java

License:Apache License

@Override
public void configureJobConf(TableDesc tableDesc, JobConf jobConf) {
    try {/*from   w w w  .j ava 2  s  . com*/
        HBaseSerDe.configureJobConf(tableDesc, jobConf);
        /*
         * HIVE-6356
         * The following code change is only needed for hbase-0.96.0 due to HBASE-9165, and
         * will not be required once Hive bumps up its hbase version). At that time , we will
         * only need TableMapReduceUtil.addDependencyJars(jobConf) here.
         */
        if (counterClass != null) {
            TableMapReduceUtil.addDependencyJars(jobConf, HBaseStorageHandler.class, TableInputFormatBase.class,
                    counterClass);
        } else {
            TableMapReduceUtil.addDependencyJars(jobConf, HBaseStorageHandler.class,
                    TableInputFormatBase.class);
        }
        if (HiveConf.getVar(jobConf, HiveConf.ConfVars.HIVE_HBASE_SNAPSHOT_NAME) != null) {
            // There is an extra dependency on MetricsRegistry for snapshot IF.
            TableMapReduceUtil.addDependencyJars(jobConf, MetricsRegistry.class);
        }
        Set<String> merged = new LinkedHashSet<String>(jobConf.getStringCollection("tmpjars"));

        Job copy = new Job(jobConf);
        TableMapReduceUtil.addDependencyJars(copy);
        merged.addAll(copy.getConfiguration().getStringCollection("tmpjars"));
        jobConf.set("tmpjars", StringUtils.arrayToString(merged.toArray(new String[0])));

        // Get credentials using the configuration instance which has HBase properties
        JobConf hbaseJobConf = new JobConf(getConf());
        org.apache.hadoop.hbase.mapred.TableMapReduceUtil.initCredentials(hbaseJobConf);
        ShimLoader.getHadoopShims().mergeCredentials(jobConf, hbaseJobConf);
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}

From source file:com.github.dryangkun.hbase.tidx.hive.HiveHBaseTableOutputFormat.java

License:Apache License

/**
 * Update the out table, and output an empty key as the key.
 *
 * @param jc the job configuration file// w  w w.  j a  va 2s .c  o  m
 * @param finalOutPath the final output table name
 * @param valueClass the value class
 * @param isCompressed whether the content is compressed or not
 * @param tableProperties the table info of the corresponding table
 * @param progress progress used for status report
 * @return the RecordWriter for the output file
 */

@Override
public void checkOutputSpecs(FileSystem fs, JobConf jc) throws IOException {

    //obtain delegation tokens for the job
    if (UserGroupInformation.getCurrentUser().hasKerberosCredentials()) {
        TableMapReduceUtil.initCredentials(jc);
    }

    String hbaseTableName = jc.get(HBaseSerDe.HBASE_TABLE_NAME);
    jc.set(TableOutputFormat.OUTPUT_TABLE, hbaseTableName);
    Job job = new Job(jc);
    JobContext jobContext = ShimLoader.getHadoopShims().newJobContext(job);

    try {
        checkOutputSpecs(jobContext);
    } catch (InterruptedException e) {
        throw new IOException(e);
    }
}

From source file:com.github.dryangkun.hbase.tidx.hive.HiveHBaseTableOutputFormat.java

License:Apache License

@Override
public org.apache.hadoop.mapred.RecordWriter<ImmutableBytesWritable, Object> getRecordWriter(
        FileSystem fileSystem, JobConf jobConf, String name, Progressable progressable) throws IOException {

    String hbaseTableName = jobConf.get(HBaseSerDe.HBASE_TABLE_NAME);
    jobConf.set(TableOutputFormat.OUTPUT_TABLE, hbaseTableName);
    final boolean walEnabled = HiveConf.getBoolVar(jobConf, HiveConf.ConfVars.HIVE_HBASE_WAL_ENABLED);
    final HTable table = new HTable(HBaseConfiguration.create(jobConf), hbaseTableName);
    table.setAutoFlush(false);/*from w ww .  j av  a2s. co  m*/
    return new MyRecordWriter(table, walEnabled);
}