Example usage for org.apache.hadoop.mapred JobConf JobConf

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf JobConf.

Prototype

public JobConf(boolean loadDefaults)

Source Link

Document

A new map/reduce configuration where the behavior of reading from the default resources can be turned off.

Usage

From source file:com.facebook.hiveio.input.HiveApiInputFormat.java

License:Apache License

@Override
public RecordReaderImpl createRecordReader(InputSplit inputSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();
    JobConf jobConf = new JobConf(conf);

    HInputSplit split = (HInputSplit) inputSplit;
    split.setConf(jobConf);/*www.jav  a  2s .c o  m*/

    int[] columnIds = split.getColumnIds();
    HiveUtils.setReadColumnIds(jobConf, columnIds);

    // CHECKSTYLE: stop LineLength
    org.apache.hadoop.mapred.RecordReader<WritableComparable, Writable> baseRecordReader = split
            .getBaseRecordReader(jobConf, context);
    // CHECKSTYLE: resume LineLength

    RecordParser<Writable> recordParser = getParser(baseRecordReader.createValue(), split, columnIds, conf);

    RecordReaderImpl reader = new RecordReaderImpl(baseRecordReader, recordParser);
    reader.setObserver(observer);

    return reader;
}

From source file:com.facebook.hiveio.output.CheckOutputSpecsTest.java

License:Apache License

@Test(expectedExceptions = IOException.class)
public void testTableDoesntExist() throws Exception {
    Configuration conf = new Configuration();

    HiveOutputDescription outputDesc = new HiveOutputDescription();
    outputDesc.getTableDesc().setTableName("doesnt-exist");

    OutputConf outputConf = new OutputConf(conf, PROFILE_ID);
    outputConf.writeOutputDescription(outputDesc);

    HiveApiOutputFormat outputFormat = new HiveApiOutputFormat();
    outputFormat.setMyProfileId(PROFILE_ID);

    JobConf jobConf = new JobConf(conf);
    TaskAttemptContext taskContext = new HackTaskAttemptContext(jobConf, new TaskAttemptID());
    JobContext jobContext = new HackJobContext(jobConf, taskContext.getJobID());
    outputFormat.checkOutputSpecs(jobContext);
    fail();//w  w  w  .  j a  v a 2 s.co  m
}

From source file:com.facebook.hiveio.output.FaultyCheckOutputSpecsTest.java

License:Apache License

@Test(expectedExceptions = IOException.class)
public void testExceptionAfterTooManyRetriesWhenTableDoesNotExist() throws Exception {
    HiveMetastores.setTestClient(new FaultyThriftHiveMetastore(BackoffRetryTask.NUM_TRIES.getDefaultValue()));
    Configuration conf = new Configuration();
    conf.setLong(BackoffRetryTask.INITIAL_RETRY_DELAY_MSEC.getKey(), 100);
    HiveOutputDescription outputDesc = new HiveOutputDescription();
    outputDesc.getTableDesc().setTableName("doesnt-exist");
    OutputConf outputConf = new OutputConf(conf, PROFILE_ID);
    outputConf.writeOutputDescription(outputDesc);
    HiveApiOutputFormat outputFormat = new HiveApiOutputFormat();
    outputFormat.setMyProfileId(PROFILE_ID);
    JobConf jobConf = new JobConf(conf);
    TaskAttemptContext taskContext = new HackTaskAttemptContext(jobConf, new TaskAttemptID());
    JobContext jobContext = new HackJobContext(jobConf, taskContext.getJobID());

    outputFormat.checkOutputSpecs(jobContext);

    fail();//from w  w  w  .  j a  v  a  2  s. c o m
}

From source file:com.facebook.hiveio.output.FaultyCheckOutputSpecsTest.java

License:Apache License

@Test
public void testRecoveredFromFailuresAfterRetries() throws Exception {
    FaultyThriftHiveMetastore metastore = new FaultyThriftHiveMetastore(
            BackoffRetryTask.NUM_TRIES.getDefaultValue() - 1);

    Configuration conf = new Configuration();
    conf.setLong(BackoffRetryTask.INITIAL_RETRY_DELAY_MSEC.getKey(), 100);
    HiveOutputDescription outputDesc = new HiveOutputDescription();
    outputDesc.getTableDesc().setTableName("foo");
    OutputConf outputConf = new OutputConf(conf, PROFILE_ID);
    outputConf.writeOutputDescription(outputDesc);
    HiveApiOutputFormat outputFormat = new HiveApiOutputFormat();
    outputFormat.setMyProfileId(PROFILE_ID);
    JobConf jobConf = new JobConf(conf);
    TaskAttemptContext taskContext = new HackTaskAttemptContext(jobConf, new TaskAttemptID());
    JobContext jobContext = new HackJobContext(jobConf, taskContext.getJobID());

    HiveMetastores.setTestClient(metastore);

    outputFormat.checkOutputSpecs(jobContext);

    assertEquals(BackoffRetryTask.NUM_TRIES.getDefaultValue(), metastore.getNumCalls());
}

From source file:com.facebook.hiveio.output.HiveApiOutputFormat.java

License:Apache License

/**
 * Get the base Hadoop RecordWriter.//from  w w  w. j a v a 2s.co  m
 * @param taskAttemptContext TaskAttemptContext
 * @param baseOutputFormat Hadoop OutputFormat
 * @return RecordWriter
 * @throws IOException Hadoop issues
 */
// CHECKSTYLE: stop LineLengthCheck
protected static org.apache.hadoop.mapred.RecordWriter<WritableComparable, Writable> getBaseRecordWriter(
        TaskAttemptContext taskAttemptContext, org.apache.hadoop.mapred.OutputFormat baseOutputFormat)
        throws IOException {
    // CHECKSTYLE: resume LineLengthCheck
    HadoopUtils.setWorkOutputDir(taskAttemptContext);
    JobConf jobConf = new JobConf(taskAttemptContext.getConfiguration());
    int fileId = CREATED_FILES_COUNTER.incrementAndGet();
    String name = FileOutputFormat.getUniqueName(jobConf, "part-" + fileId);
    Reporter reporter = new ProgressReporter(taskAttemptContext);
    org.apache.hadoop.mapred.RecordWriter<WritableComparable, Writable> baseWriter = baseOutputFormat
            .getRecordWriter(null, jobConf, name, reporter);
    LOG.info("getBaseRecordWriter: Created new {} with file {}", baseWriter, name);
    return baseWriter;
}

From source file:com.facebook.hiveio.output.HiveApiOutputFormat.java

License:Apache License

@Override
public HiveApiOutputCommitter getOutputCommitter(TaskAttemptContext taskAttemptContext)
        throws IOException, InterruptedException {
    HadoopUtils.setWorkOutputDir(taskAttemptContext);
    Configuration conf = taskAttemptContext.getConfiguration();
    JobConf jobConf = new JobConf(conf);
    OutputCommitter baseCommitter = jobConf.getOutputCommitter();
    LOG.info("Getting output committer with base output committer {}",
            baseCommitter.getClass().getSimpleName());
    return new HiveApiOutputCommitter(new HackOutputCommitter(baseCommitter, jobConf), myProfileId);
}

From source file:com.facebook.hiveio.output.HiveOutput.java

License:Apache License

/**
 * Write records to a Hive table//w w  w . j a  v a 2  s .c  o  m
 *
 * @param outputDesc description of Hive table
 * @param records the records to write
 * @throws TException
 * @throws IOException
 * @throws InterruptedException
 */
public static void writeTable(HiveOutputDescription outputDesc, Iterable<HiveWritableRecord> records)
        throws TException, IOException, InterruptedException {
    long uniqueId = System.nanoTime();
    String taskAttemptIdStr = "attempt_200707121733_" + (int) uniqueId + "_m_000005_0";

    String profile = Long.toString(uniqueId);

    HiveConf conf = new HiveConf(HiveOutput.class);
    conf.setInt("mapred.task.partition", 1);
    conf.set("mapred.task.id", taskAttemptIdStr);

    HiveApiOutputFormat.initProfile(conf, outputDesc, profile);

    HiveApiOutputFormat outputFormat = new HiveApiOutputFormat();
    outputFormat.setMyProfileId(profile);

    TaskAttemptID taskAttemptID = TaskAttemptID.forName(taskAttemptIdStr);
    TaskAttemptContext taskContext = new HackTaskAttemptContext(new JobConf(conf), taskAttemptID);
    JobContext jobContext = new HackJobContext(new JobConf(conf), taskAttemptID.getJobID());

    RecordWriterImpl recordWriter = outputFormat.getRecordWriter(taskContext);

    HiveApiOutputCommitter committer = outputFormat.getOutputCommitter(taskContext);
    committer.setupJob(jobContext);

    committer.setupTask(taskContext);
    for (HiveWritableRecord record : records) {
        recordWriter.write(NullWritable.get(), record);
    }
    recordWriter.close(taskContext);
    committer.commitTask(taskContext);

    committer.commitJob(jobContext);
}

From source file:com.facebook.hiveio.output.PerThread.java

License:Apache License

/**
 * Get task context/*from  w ww .  j  ava 2  s  . c  o m*/
 *
 * @return task context
 */
public TaskAttemptContext taskContext() {
    return new HackTaskAttemptContext(new JobConf(conf), taskID);
}

From source file:com.facebook.hiveio.output.PerThread.java

License:Apache License

/**
 * Get JobConf
 *
 * @return JobConf
 */
public JobConf jobConf() {
    return new JobConf(conf);
}

From source file:com.facebook.presto.hive.BackgroundHiveSplitLoader.java

License:Apache License

private void loadPartition(HivePartitionMetadata partition) throws IOException {
    String partitionName = partition.getHivePartition().getPartitionId();
    Properties schema = getPartitionSchema(table, partition.getPartition());
    List<HivePartitionKey> partitionKeys = getPartitionKeys(table, partition.getPartition());
    TupleDomain<HiveColumnHandle> effectivePredicate = partition.getHivePartition().getEffectivePredicate();

    Path path = new Path(getPartitionLocation(table, partition.getPartition()));
    Configuration configuration = hdfsEnvironment.getConfiguration(path);
    InputFormat<?, ?> inputFormat = getInputFormat(configuration, schema, false);
    FileSystem fs = hdfsEnvironment.getFileSystem(session.getUser(), path);

    if (inputFormat instanceof SymlinkTextInputFormat) {
        if (bucketHandle.isPresent()) {
            throw new PrestoException(StandardErrorCode.NOT_SUPPORTED,
                    "Bucketed table in SymlinkTextInputFormat is not yet supported");
        }//from  w  w  w  .j  av  a 2 s.co  m

        // TODO: This should use an iterator like the HiveFileIterator
        for (Path targetPath : getTargetPathsFromSymlink(fs, path)) {
            // The input should be in TextInputFormat.
            TextInputFormat targetInputFormat = new TextInputFormat();
            // get the configuration for the target path -- it may be a different hdfs instance
            Configuration targetConfiguration = hdfsEnvironment.getConfiguration(targetPath);
            JobConf targetJob = new JobConf(targetConfiguration);
            targetJob.setInputFormat(TextInputFormat.class);
            targetInputFormat.configure(targetJob);
            FileInputFormat.setInputPaths(targetJob, targetPath);
            InputSplit[] targetSplits = targetInputFormat.getSplits(targetJob, 0);

            for (InputSplit inputSplit : targetSplits) {
                FileSplit split = (FileSplit) inputSplit;
                FileSystem targetFilesystem = hdfsEnvironment.getFileSystem(session.getUser(), split.getPath());
                FileStatus file = targetFilesystem.getFileStatus(split.getPath());
                hiveSplitSource.addToQueue(createHiveSplits(partitionName, file.getPath().toString(),
                        targetFilesystem.getFileBlockLocations(file, split.getStart(), split.getLength()),
                        split.getStart(), split.getLength(), schema, partitionKeys, false, session,
                        OptionalInt.empty(), effectivePredicate, partition.getColumnCoercions()));
                if (stopped) {
                    return;
                }
            }
        }
        return;
    }

    // If only one bucket could match: load that one file
    HiveFileIterator iterator = new HiveFileIterator(path, fs, directoryLister, namenodeStats, partitionName,
            inputFormat, schema, partitionKeys, effectivePredicate, partition.getColumnCoercions());
    if (!buckets.isEmpty()) {
        int bucketCount = buckets.get(0).getBucketCount();
        List<LocatedFileStatus> list = listAndSortBucketFiles(iterator, bucketCount);

        for (HiveBucket bucket : buckets) {
            int bucketNumber = bucket.getBucketNumber();
            LocatedFileStatus file = list.get(bucketNumber);
            boolean splittable = isSplittable(iterator.getInputFormat(),
                    hdfsEnvironment.getFileSystem(session.getUser(), file.getPath()), file.getPath());

            hiveSplitSource.addToQueue(createHiveSplits(iterator.getPartitionName(), file.getPath().toString(),
                    file.getBlockLocations(), 0, file.getLen(), iterator.getSchema(),
                    iterator.getPartitionKeys(), splittable, session, OptionalInt.of(bucketNumber),
                    effectivePredicate, partition.getColumnCoercions()));
        }

        return;
    }

    // If table is bucketed: list the directory, sort, tag with bucket id
    if (bucketHandle.isPresent()) {
        // HiveFileIterator skips hidden files automatically.
        int bucketCount = bucketHandle.get().getBucketCount();
        List<LocatedFileStatus> list = listAndSortBucketFiles(iterator, bucketCount);

        for (int bucketIndex = 0; bucketIndex < bucketCount; bucketIndex++) {
            LocatedFileStatus file = list.get(bucketIndex);
            boolean splittable = isSplittable(iterator.getInputFormat(),
                    hdfsEnvironment.getFileSystem(session.getUser(), file.getPath()), file.getPath());

            hiveSplitSource.addToQueue(createHiveSplits(iterator.getPartitionName(), file.getPath().toString(),
                    file.getBlockLocations(), 0, file.getLen(), iterator.getSchema(),
                    iterator.getPartitionKeys(), splittable, session, OptionalInt.of(bucketIndex),
                    iterator.getEffectivePredicate(), partition.getColumnCoercions()));
        }

        return;
    }

    fileIterators.addLast(iterator);
}