Example usage for org.apache.hadoop.mapreduce.task JobContextImpl JobContextImpl

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.task JobContextImpl JobContextImpl.

Prototype

public JobContextImpl(Configuration conf, JobID jobId)

Source Link

Usage

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.output.MultipleOutputs.java

License:Apache License

static JobContext getNamedJobContext(JobContext context, String namedOutput) throws IOException {
    Job job = getNamedJob(context, namedOutput);
    return new JobContextImpl(job.getConfiguration(), job.getJobID());
}

From source file:com.knewton.mapreduce.io.SSTableInputFormatTest.java

License:Apache License

/**
 * Helper method for setting up all the mock files in the FS. Calls list status in the input
 * format and returns the result.//from  ww w . j  a v  a  2s  .c o m
 */
private List<FileStatus> testListStatus(Configuration conf, String dirs) throws IOException {
    JobID jobId = new JobID();
    conf.set(FileInputFormat.INPUT_DIR, dirs);
    JobContextImpl job = new JobContextImpl(conf, jobId);
    SSTableColumnInputFormat inputFormat = new SSTableColumnInputFormat();
    return inputFormat.listStatus(job);
}

From source file:com.netflix.bdp.s3.TestS3MultipartOutputCommitter.java

License:Apache License

@Before
public void setupCommitter() throws Exception {
    getConfiguration().set("s3.multipart.committer.num-threads", String.valueOf(numThreads));
    getConfiguration().set(UPLOAD_UUID, UUID.randomUUID().toString());
    this.job = new JobContextImpl(getConfiguration(), JOB_ID);
    this.jobCommitter = new MockedS3Committer(S3_OUTPUT_PATH, job);
    jobCommitter.setupJob(job);/*from  ww w .ja v  a2s  .  co m*/
    this.uuid = job.getConfiguration().get(UPLOAD_UUID);

    this.tac = new TaskAttemptContextImpl(new Configuration(job.getConfiguration()), AID);

    // get the task's configuration copy so modifications take effect
    this.conf = tac.getConfiguration();
    conf.set("mapred.local.dir", "/tmp/local-0,/tmp/local-1");
    conf.setInt(UPLOAD_SIZE, 100);

    this.committer = new MockedS3Committer(S3_OUTPUT_PATH, tac);
}

From source file:com.splicemachine.derby.impl.io.WholeTextInputFormatTest.java

License:Apache License

@Test
public void testGetsStreamForDirectory() throws Exception {
    /*/*from  ww w . jav a  2 s .c  o m*/
     * This test failed before changes to WholeTextInputFormat(hooray for test-driven development!),
     * so this constitutes an effective regression test for SPLICE-739. Of course, we'll be certain
     * about it by ALSO writing an IT, but this is a nice little Unit test of the same thing.
     */
    Configuration configuration = HConfiguration.unwrapDelegate();
    String dirPath = SpliceUnitTest.getResourceDirectory() + "multiLineDirectory";
    configuration.set("mapred.input.dir", dirPath);

    WholeTextInputFormat wtif = new WholeTextInputFormat();
    wtif.setConf(configuration);

    JobContext ctx = new JobContextImpl(configuration, new JobID("test", 1));
    List<InputSplit> splits = wtif.getSplits(ctx);

    int i = 0;
    Set<String> files = readFileNames(dirPath);

    Assert.assertEquals("We didn't get a split per file", files.size(), splits.size());

    Set<String> readFiles = new HashSet<>();
    long totalRecords = 0;

    for (InputSplit is : splits) {
        TaskAttemptContext tac = new TaskAttemptContextImpl(configuration,
                new TaskAttemptID("test", 1, true, i, 1));
        RecordReader<String, InputStream> recordReader = wtif.createRecordReader(is, tac);
        CombineFileSplit cfs = (CombineFileSplit) is;
        System.out.println(cfs);

        totalRecords += collectRecords(readFiles, recordReader);
        i++;
    }
    Assert.assertEquals("did not read all data!", 28, totalRecords);

    Assert.assertEquals("Did not read all files!", files.size(), readFiles.size());
    for (String expectedFile : files) {
        Assert.assertTrue("Did not read file <" + expectedFile + "> read =" + readFiles + " exp",
                readFiles.contains(expectedFile));
    }
}

From source file:cz.seznam.euphoria.hadoop.HadoopUtils.java

License:Apache License

public static JobContext createJobContext(Configuration conf) {
    // TODO jobId uses some default hard-coded value
    return new JobContextImpl(conf, new JobID("", 0));
}

From source file:edu.uci.ics.hyracks.hdfs.ContextFactory.java

License:Apache License

public JobContext createJobContext(Configuration conf) {
    return new JobContextImpl(conf, new JobID("0", 0));
}

From source file:org.apache.carbondata.hadoop.CarbonInputFormat.java

License:Apache License

private Map<String, AbstractIndex> getSegmentAbstractIndexs(JobContext job,
        AbsoluteTableIdentifier absoluteTableIdentifier, String segmentId)
        throws IOException, IndexBuilderException {
    Map<String, AbstractIndex> segmentIndexMap = SegmentTaskIndexStore.getInstance()
            .getSegmentBTreeIfExists(absoluteTableIdentifier, segmentId);

    // if segment tree is not loaded, load the segment tree
    if (segmentIndexMap == null) {
        // List<FileStatus> fileStatusList = new LinkedList<FileStatus>();
        List<TableBlockInfo> tableBlockInfoList = new LinkedList<TableBlockInfo>();
        // getFileStatusOfSegments(job, new int[]{ segmentId }, fileStatusList);

        // get file location of all files of given segment
        JobContext newJob = new JobContextImpl(new Configuration(job.getConfiguration()), job.getJobID());
        newJob.getConfiguration().set(CarbonInputFormat.INPUT_SEGMENT_NUMBERS, segmentId + "");

        // identify table blocks
        for (InputSplit inputSplit : getSplitsInternal(newJob)) {
            CarbonInputSplit carbonInputSplit = (CarbonInputSplit) inputSplit;
            tableBlockInfoList/*  w w  w  . j  av a2s .  com*/
                    .add(new TableBlockInfo(carbonInputSplit.getPath().toString(), carbonInputSplit.getStart(),
                            segmentId, carbonInputSplit.getLocations(), carbonInputSplit.getLength()));
        }

        Map<String, List<TableBlockInfo>> segmentToTableBlocksInfos = new HashMap<>();
        segmentToTableBlocksInfos.put(segmentId, tableBlockInfoList);

        // get Btree blocks for given segment
        segmentIndexMap = SegmentTaskIndexStore.getInstance()
                .loadAndGetTaskIdToSegmentsMap(segmentToTableBlocksInfos, absoluteTableIdentifier);

    }
    return segmentIndexMap;
}

From source file:org.apache.carbondata.sdk.file.CarbonReaderBuilder.java

License:Apache License

/**
 * Build CarbonReader/*from   ww  w .  j  av a2s .  c  o m*/
 *
 * @param <T>
 * @return CarbonReader
 * @throws IOException
 * @throws InterruptedException
 */
public <T> CarbonReader<T> build() throws IOException, InterruptedException {
    if (hadoopConf == null) {
        hadoopConf = FileFactory.getConfiguration();
    }
    CarbonTable table;
    // now always infer schema. TODO:Refactor in next version.
    table = CarbonTable.buildTable(tablePath, tableName, hadoopConf);
    final CarbonFileInputFormat format = new CarbonFileInputFormat();
    final Job job = new Job(hadoopConf);
    format.setTableInfo(job.getConfiguration(), table.getTableInfo());
    format.setTablePath(job.getConfiguration(), table.getTablePath());
    format.setTableName(job.getConfiguration(), table.getTableName());
    format.setDatabaseName(job.getConfiguration(), table.getDatabaseName());
    if (filterExpression != null) {
        format.setFilterPredicates(job.getConfiguration(), filterExpression);
    }

    if (projectionColumns != null) {
        // set the user projection
        int len = projectionColumns.length;
        //      TODO : Handle projection of complex child columns
        for (int i = 0; i < len; i++) {
            if (projectionColumns[i].contains(".")) {
                throw new UnsupportedOperationException(
                        "Complex child columns projection NOT supported through CarbonReader");
            }
        }
        format.setColumnProjection(job.getConfiguration(), projectionColumns);
    }

    try {

        if (filterExpression == null) {
            job.getConfiguration().set("filter_blocks", "false");
        }
        List<InputSplit> splits = format.getSplits(new JobContextImpl(job.getConfiguration(), new JobID()));
        List<RecordReader<Void, T>> readers = new ArrayList<>(splits.size());
        for (InputSplit split : splits) {
            TaskAttemptContextImpl attempt = new TaskAttemptContextImpl(job.getConfiguration(),
                    new TaskAttemptID());
            RecordReader reader;
            QueryModel queryModel = format.createQueryModel(split, attempt);
            boolean hasComplex = false;
            for (ProjectionDimension projectionDimension : queryModel.getProjectionDimensions()) {
                if (projectionDimension.getDimension().isComplex()) {
                    hasComplex = true;
                    break;
                }
            }
            if (useVectorReader && !hasComplex) {
                queryModel.setDirectVectorFill(filterExpression == null);
                reader = new CarbonVectorizedRecordReader(queryModel);
            } else {
                reader = format.createRecordReader(split, attempt);
            }
            try {
                reader.initialize(split, attempt);
                readers.add(reader);
            } catch (Exception e) {
                CarbonUtil.closeStreams(readers.toArray(new RecordReader[0]));
                throw e;
            }
        }
        return new CarbonReader<>(readers);
    } catch (Exception ex) {
        // Clear the datamap cache as it can get added in getSplits() method
        DataMapStoreManager.getInstance().clearDataMaps(table.getAbsoluteTableIdentifier());
        throw ex;
    }
}

From source file:org.apache.carbondata.store.LocalCarbonStore.java

License:Apache License

@Override
public Iterator<CarbonRow> scan(AbsoluteTableIdentifier tableIdentifier, String[] projectColumns,
        Expression filter) throws IOException {
    Objects.requireNonNull(tableIdentifier);
    Objects.requireNonNull(projectColumns);

    CarbonTable table = getTable(tableIdentifier.getTablePath());
    if (table.isStreamingSink() || table.isHivePartitionTable()) {
        throw new UnsupportedOperationException("streaming and partition table is not supported");
    }/*from   ww w . j a va  2s.co  m*/
    // TODO: use InputFormat to prune data and read data

    final CarbonTableInputFormat format = new CarbonTableInputFormat();
    final Job job = new Job(new Configuration());
    CarbonInputFormat.setTableInfo(job.getConfiguration(), table.getTableInfo());
    CarbonInputFormat.setTablePath(job.getConfiguration(), table.getTablePath());
    CarbonInputFormat.setTableName(job.getConfiguration(), table.getTableName());
    CarbonInputFormat.setDatabaseName(job.getConfiguration(), table.getDatabaseName());
    CarbonInputFormat.setCarbonReadSupport(job.getConfiguration(), CarbonRowReadSupport.class);
    CarbonInputFormat.setColumnProjection(job.getConfiguration(), new CarbonProjection(projectColumns));
    if (filter != null) {
        CarbonInputFormat.setFilterPredicates(job.getConfiguration(), filter);
    }

    final List<InputSplit> splits = format.getSplits(new JobContextImpl(job.getConfiguration(), new JobID()));

    List<RecordReader<Void, Object>> readers = new ArrayList<>(splits.size());

    List<CarbonRow> rows = new ArrayList<>();

    try {
        for (InputSplit split : splits) {
            TaskAttemptContextImpl attempt = new TaskAttemptContextImpl(job.getConfiguration(),
                    new TaskAttemptID());
            RecordReader reader = format.createRecordReader(split, attempt);
            reader.initialize(split, attempt);
            readers.add(reader);
        }

        for (RecordReader<Void, Object> reader : readers) {
            while (reader.nextKeyValue()) {
                rows.add((CarbonRow) reader.getCurrentValue());
            }
            try {
                reader.close();
            } catch (IOException e) {
                LOGGER.error(e);
            }
        }
    } catch (InterruptedException e) {
        throw new IOException(e);
    } finally {
        for (RecordReader<Void, Object> reader : readers) {
            try {
                reader.close();
            } catch (IOException e) {
                LOGGER.error(e);
            }
        }
    }
    return rows.iterator();
}

From source file:org.apache.giraph.job.HadoopUtils.java

License:Apache License

/**
 * Create a JobContext, supporting many Hadoops.
 *
 * @param conf Configuration//w  w  w.  ja  v  a 2s  .com
 * @param jobID JobID to use
 * @return JobContext
 */
public static JobContext makeJobContext(Configuration conf, JobID jobID) {
    JobContext context;
    /*if[HADOOP_NON_JOBCONTEXT_IS_INTERFACE]
    context = new JobContext(conf, jobID);
    else[HADOOP_NON_JOBCONTEXT_IS_INTERFACE]*/
    context = new JobContextImpl(conf, jobID);
    /*end[HADOOP_NON_JOBCONTEXT_IS_INTERFACE]*/
    return context;
}