List of usage examples for org.apache.hadoop.mapreduce.task JobContextImpl JobContextImpl
public JobContextImpl(Configuration conf, JobID jobId)
From source file:co.cask.cdap.internal.app.runtime.batch.dataset.output.MultipleOutputs.java
License:Apache License
static JobContext getNamedJobContext(JobContext context, String namedOutput) throws IOException { Job job = getNamedJob(context, namedOutput); return new JobContextImpl(job.getConfiguration(), job.getJobID()); }
From source file:com.knewton.mapreduce.io.SSTableInputFormatTest.java
License:Apache License
/** * Helper method for setting up all the mock files in the FS. Calls list status in the input * format and returns the result.//from ww w . j a v a 2s .c o m */ private List<FileStatus> testListStatus(Configuration conf, String dirs) throws IOException { JobID jobId = new JobID(); conf.set(FileInputFormat.INPUT_DIR, dirs); JobContextImpl job = new JobContextImpl(conf, jobId); SSTableColumnInputFormat inputFormat = new SSTableColumnInputFormat(); return inputFormat.listStatus(job); }
From source file:com.netflix.bdp.s3.TestS3MultipartOutputCommitter.java
License:Apache License
@Before public void setupCommitter() throws Exception { getConfiguration().set("s3.multipart.committer.num-threads", String.valueOf(numThreads)); getConfiguration().set(UPLOAD_UUID, UUID.randomUUID().toString()); this.job = new JobContextImpl(getConfiguration(), JOB_ID); this.jobCommitter = new MockedS3Committer(S3_OUTPUT_PATH, job); jobCommitter.setupJob(job);/*from ww w .ja v a2s . co m*/ this.uuid = job.getConfiguration().get(UPLOAD_UUID); this.tac = new TaskAttemptContextImpl(new Configuration(job.getConfiguration()), AID); // get the task's configuration copy so modifications take effect this.conf = tac.getConfiguration(); conf.set("mapred.local.dir", "/tmp/local-0,/tmp/local-1"); conf.setInt(UPLOAD_SIZE, 100); this.committer = new MockedS3Committer(S3_OUTPUT_PATH, tac); }
From source file:com.splicemachine.derby.impl.io.WholeTextInputFormatTest.java
License:Apache License
@Test public void testGetsStreamForDirectory() throws Exception { /*/*from ww w . jav a 2 s .c o m*/ * This test failed before changes to WholeTextInputFormat(hooray for test-driven development!), * so this constitutes an effective regression test for SPLICE-739. Of course, we'll be certain * about it by ALSO writing an IT, but this is a nice little Unit test of the same thing. */ Configuration configuration = HConfiguration.unwrapDelegate(); String dirPath = SpliceUnitTest.getResourceDirectory() + "multiLineDirectory"; configuration.set("mapred.input.dir", dirPath); WholeTextInputFormat wtif = new WholeTextInputFormat(); wtif.setConf(configuration); JobContext ctx = new JobContextImpl(configuration, new JobID("test", 1)); List<InputSplit> splits = wtif.getSplits(ctx); int i = 0; Set<String> files = readFileNames(dirPath); Assert.assertEquals("We didn't get a split per file", files.size(), splits.size()); Set<String> readFiles = new HashSet<>(); long totalRecords = 0; for (InputSplit is : splits) { TaskAttemptContext tac = new TaskAttemptContextImpl(configuration, new TaskAttemptID("test", 1, true, i, 1)); RecordReader<String, InputStream> recordReader = wtif.createRecordReader(is, tac); CombineFileSplit cfs = (CombineFileSplit) is; System.out.println(cfs); totalRecords += collectRecords(readFiles, recordReader); i++; } Assert.assertEquals("did not read all data!", 28, totalRecords); Assert.assertEquals("Did not read all files!", files.size(), readFiles.size()); for (String expectedFile : files) { Assert.assertTrue("Did not read file <" + expectedFile + "> read =" + readFiles + " exp", readFiles.contains(expectedFile)); } }
From source file:cz.seznam.euphoria.hadoop.HadoopUtils.java
License:Apache License
public static JobContext createJobContext(Configuration conf) { // TODO jobId uses some default hard-coded value return new JobContextImpl(conf, new JobID("", 0)); }
From source file:edu.uci.ics.hyracks.hdfs.ContextFactory.java
License:Apache License
public JobContext createJobContext(Configuration conf) { return new JobContextImpl(conf, new JobID("0", 0)); }
From source file:org.apache.carbondata.hadoop.CarbonInputFormat.java
License:Apache License
private Map<String, AbstractIndex> getSegmentAbstractIndexs(JobContext job, AbsoluteTableIdentifier absoluteTableIdentifier, String segmentId) throws IOException, IndexBuilderException { Map<String, AbstractIndex> segmentIndexMap = SegmentTaskIndexStore.getInstance() .getSegmentBTreeIfExists(absoluteTableIdentifier, segmentId); // if segment tree is not loaded, load the segment tree if (segmentIndexMap == null) { // List<FileStatus> fileStatusList = new LinkedList<FileStatus>(); List<TableBlockInfo> tableBlockInfoList = new LinkedList<TableBlockInfo>(); // getFileStatusOfSegments(job, new int[]{ segmentId }, fileStatusList); // get file location of all files of given segment JobContext newJob = new JobContextImpl(new Configuration(job.getConfiguration()), job.getJobID()); newJob.getConfiguration().set(CarbonInputFormat.INPUT_SEGMENT_NUMBERS, segmentId + ""); // identify table blocks for (InputSplit inputSplit : getSplitsInternal(newJob)) { CarbonInputSplit carbonInputSplit = (CarbonInputSplit) inputSplit; tableBlockInfoList/* w w w . j av a2s . com*/ .add(new TableBlockInfo(carbonInputSplit.getPath().toString(), carbonInputSplit.getStart(), segmentId, carbonInputSplit.getLocations(), carbonInputSplit.getLength())); } Map<String, List<TableBlockInfo>> segmentToTableBlocksInfos = new HashMap<>(); segmentToTableBlocksInfos.put(segmentId, tableBlockInfoList); // get Btree blocks for given segment segmentIndexMap = SegmentTaskIndexStore.getInstance() .loadAndGetTaskIdToSegmentsMap(segmentToTableBlocksInfos, absoluteTableIdentifier); } return segmentIndexMap; }
From source file:org.apache.carbondata.sdk.file.CarbonReaderBuilder.java
License:Apache License
/** * Build CarbonReader/*from ww w . j av a2s . c o m*/ * * @param <T> * @return CarbonReader * @throws IOException * @throws InterruptedException */ public <T> CarbonReader<T> build() throws IOException, InterruptedException { if (hadoopConf == null) { hadoopConf = FileFactory.getConfiguration(); } CarbonTable table; // now always infer schema. TODO:Refactor in next version. table = CarbonTable.buildTable(tablePath, tableName, hadoopConf); final CarbonFileInputFormat format = new CarbonFileInputFormat(); final Job job = new Job(hadoopConf); format.setTableInfo(job.getConfiguration(), table.getTableInfo()); format.setTablePath(job.getConfiguration(), table.getTablePath()); format.setTableName(job.getConfiguration(), table.getTableName()); format.setDatabaseName(job.getConfiguration(), table.getDatabaseName()); if (filterExpression != null) { format.setFilterPredicates(job.getConfiguration(), filterExpression); } if (projectionColumns != null) { // set the user projection int len = projectionColumns.length; // TODO : Handle projection of complex child columns for (int i = 0; i < len; i++) { if (projectionColumns[i].contains(".")) { throw new UnsupportedOperationException( "Complex child columns projection NOT supported through CarbonReader"); } } format.setColumnProjection(job.getConfiguration(), projectionColumns); } try { if (filterExpression == null) { job.getConfiguration().set("filter_blocks", "false"); } List<InputSplit> splits = format.getSplits(new JobContextImpl(job.getConfiguration(), new JobID())); List<RecordReader<Void, T>> readers = new ArrayList<>(splits.size()); for (InputSplit split : splits) { TaskAttemptContextImpl attempt = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID()); RecordReader reader; QueryModel queryModel = format.createQueryModel(split, attempt); boolean hasComplex = false; for (ProjectionDimension projectionDimension : queryModel.getProjectionDimensions()) { if (projectionDimension.getDimension().isComplex()) { hasComplex = true; break; } } if (useVectorReader && !hasComplex) { queryModel.setDirectVectorFill(filterExpression == null); reader = new CarbonVectorizedRecordReader(queryModel); } else { reader = format.createRecordReader(split, attempt); } try { reader.initialize(split, attempt); readers.add(reader); } catch (Exception e) { CarbonUtil.closeStreams(readers.toArray(new RecordReader[0])); throw e; } } return new CarbonReader<>(readers); } catch (Exception ex) { // Clear the datamap cache as it can get added in getSplits() method DataMapStoreManager.getInstance().clearDataMaps(table.getAbsoluteTableIdentifier()); throw ex; } }
From source file:org.apache.carbondata.store.LocalCarbonStore.java
License:Apache License
@Override public Iterator<CarbonRow> scan(AbsoluteTableIdentifier tableIdentifier, String[] projectColumns, Expression filter) throws IOException { Objects.requireNonNull(tableIdentifier); Objects.requireNonNull(projectColumns); CarbonTable table = getTable(tableIdentifier.getTablePath()); if (table.isStreamingSink() || table.isHivePartitionTable()) { throw new UnsupportedOperationException("streaming and partition table is not supported"); }/*from ww w . j a va 2s.co m*/ // TODO: use InputFormat to prune data and read data final CarbonTableInputFormat format = new CarbonTableInputFormat(); final Job job = new Job(new Configuration()); CarbonInputFormat.setTableInfo(job.getConfiguration(), table.getTableInfo()); CarbonInputFormat.setTablePath(job.getConfiguration(), table.getTablePath()); CarbonInputFormat.setTableName(job.getConfiguration(), table.getTableName()); CarbonInputFormat.setDatabaseName(job.getConfiguration(), table.getDatabaseName()); CarbonInputFormat.setCarbonReadSupport(job.getConfiguration(), CarbonRowReadSupport.class); CarbonInputFormat.setColumnProjection(job.getConfiguration(), new CarbonProjection(projectColumns)); if (filter != null) { CarbonInputFormat.setFilterPredicates(job.getConfiguration(), filter); } final List<InputSplit> splits = format.getSplits(new JobContextImpl(job.getConfiguration(), new JobID())); List<RecordReader<Void, Object>> readers = new ArrayList<>(splits.size()); List<CarbonRow> rows = new ArrayList<>(); try { for (InputSplit split : splits) { TaskAttemptContextImpl attempt = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID()); RecordReader reader = format.createRecordReader(split, attempt); reader.initialize(split, attempt); readers.add(reader); } for (RecordReader<Void, Object> reader : readers) { while (reader.nextKeyValue()) { rows.add((CarbonRow) reader.getCurrentValue()); } try { reader.close(); } catch (IOException e) { LOGGER.error(e); } } } catch (InterruptedException e) { throw new IOException(e); } finally { for (RecordReader<Void, Object> reader : readers) { try { reader.close(); } catch (IOException e) { LOGGER.error(e); } } } return rows.iterator(); }
From source file:org.apache.giraph.job.HadoopUtils.java
License:Apache License
/** * Create a JobContext, supporting many Hadoops. * * @param conf Configuration//w w w. ja v a 2s .com * @param jobID JobID to use * @return JobContext */ public static JobContext makeJobContext(Configuration conf, JobID jobID) { JobContext context; /*if[HADOOP_NON_JOBCONTEXT_IS_INTERFACE] context = new JobContext(conf, jobID); else[HADOOP_NON_JOBCONTEXT_IS_INTERFACE]*/ context = new JobContextImpl(conf, jobID); /*end[HADOOP_NON_JOBCONTEXT_IS_INTERFACE]*/ return context; }