List of usage examples for org.apache.hadoop.mapreduce.task TaskAttemptContextImpl TaskAttemptContextImpl
public TaskAttemptContextImpl(Configuration conf, TaskAttemptID taskId)
From source file:org.apache.carbondata.sdk.file.CarbonReaderBuilder.java
License:Apache License
/** * Build CarbonReader//from w w w .j a v a2 s . c om * * @param <T> * @return CarbonReader * @throws IOException * @throws InterruptedException */ public <T> CarbonReader<T> build() throws IOException, InterruptedException { if (hadoopConf == null) { hadoopConf = FileFactory.getConfiguration(); } CarbonTable table; // now always infer schema. TODO:Refactor in next version. table = CarbonTable.buildTable(tablePath, tableName, hadoopConf); final CarbonFileInputFormat format = new CarbonFileInputFormat(); final Job job = new Job(hadoopConf); format.setTableInfo(job.getConfiguration(), table.getTableInfo()); format.setTablePath(job.getConfiguration(), table.getTablePath()); format.setTableName(job.getConfiguration(), table.getTableName()); format.setDatabaseName(job.getConfiguration(), table.getDatabaseName()); if (filterExpression != null) { format.setFilterPredicates(job.getConfiguration(), filterExpression); } if (projectionColumns != null) { // set the user projection int len = projectionColumns.length; // TODO : Handle projection of complex child columns for (int i = 0; i < len; i++) { if (projectionColumns[i].contains(".")) { throw new UnsupportedOperationException( "Complex child columns projection NOT supported through CarbonReader"); } } format.setColumnProjection(job.getConfiguration(), projectionColumns); } try { if (filterExpression == null) { job.getConfiguration().set("filter_blocks", "false"); } List<InputSplit> splits = format.getSplits(new JobContextImpl(job.getConfiguration(), new JobID())); List<RecordReader<Void, T>> readers = new ArrayList<>(splits.size()); for (InputSplit split : splits) { TaskAttemptContextImpl attempt = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID()); RecordReader reader; QueryModel queryModel = format.createQueryModel(split, attempt); boolean hasComplex = false; for (ProjectionDimension projectionDimension : queryModel.getProjectionDimensions()) { if (projectionDimension.getDimension().isComplex()) { hasComplex = true; break; } } if (useVectorReader && !hasComplex) { queryModel.setDirectVectorFill(filterExpression == null); reader = new CarbonVectorizedRecordReader(queryModel); } else { reader = format.createRecordReader(split, attempt); } try { reader.initialize(split, attempt); readers.add(reader); } catch (Exception e) { CarbonUtil.closeStreams(readers.toArray(new RecordReader[0])); throw e; } } return new CarbonReader<>(readers); } catch (Exception ex) { // Clear the datamap cache as it can get added in getSplits() method DataMapStoreManager.getInstance().clearDataMaps(table.getAbsoluteTableIdentifier()); throw ex; } }
From source file:org.apache.carbondata.sdk.file.CSVCarbonWriter.java
License:Apache License
CSVCarbonWriter(CarbonLoadModel loadModel, Configuration hadoopConf) throws IOException { CarbonTableOutputFormat.setLoadModel(hadoopConf, loadModel); CarbonTableOutputFormat format = new CarbonTableOutputFormat(); JobID jobId = new JobID(UUID.randomUUID().toString(), 0); Random random = new Random(); TaskID task = new TaskID(jobId, TaskType.MAP, random.nextInt()); TaskAttemptID attemptID = new TaskAttemptID(task, random.nextInt()); TaskAttemptContextImpl context = new TaskAttemptContextImpl(hadoopConf, attemptID); this.recordWriter = format.getRecordWriter(context); this.context = context; this.writable = new ObjectArrayWritable(); }
From source file:org.apache.carbondata.sdk.file.JsonCarbonWriter.java
License:Apache License
JsonCarbonWriter(CarbonLoadModel loadModel, Configuration configuration) throws IOException { CarbonTableOutputFormat.setLoadModel(configuration, loadModel); CarbonTableOutputFormat outputFormat = new CarbonTableOutputFormat(); JobID jobId = new JobID(UUID.randomUUID().toString(), 0); Random random = new Random(); TaskID task = new TaskID(jobId, TaskType.MAP, random.nextInt()); TaskAttemptID attemptID = new TaskAttemptID(task, random.nextInt()); TaskAttemptContextImpl context = new TaskAttemptContextImpl(configuration, attemptID); this.recordWriter = outputFormat.getRecordWriter(context); this.context = context; this.writable = new ObjectArrayWritable(); }
From source file:org.apache.carbondata.store.LocalCarbonStore.java
License:Apache License
@Override public Iterator<CarbonRow> scan(AbsoluteTableIdentifier tableIdentifier, String[] projectColumns, Expression filter) throws IOException { Objects.requireNonNull(tableIdentifier); Objects.requireNonNull(projectColumns); CarbonTable table = getTable(tableIdentifier.getTablePath()); if (table.isStreamingSink() || table.isHivePartitionTable()) { throw new UnsupportedOperationException("streaming and partition table is not supported"); }/* w w w . j a v a 2s .c om*/ // TODO: use InputFormat to prune data and read data final CarbonTableInputFormat format = new CarbonTableInputFormat(); final Job job = new Job(new Configuration()); CarbonInputFormat.setTableInfo(job.getConfiguration(), table.getTableInfo()); CarbonInputFormat.setTablePath(job.getConfiguration(), table.getTablePath()); CarbonInputFormat.setTableName(job.getConfiguration(), table.getTableName()); CarbonInputFormat.setDatabaseName(job.getConfiguration(), table.getDatabaseName()); CarbonInputFormat.setCarbonReadSupport(job.getConfiguration(), CarbonRowReadSupport.class); CarbonInputFormat.setColumnProjection(job.getConfiguration(), new CarbonProjection(projectColumns)); if (filter != null) { CarbonInputFormat.setFilterPredicates(job.getConfiguration(), filter); } final List<InputSplit> splits = format.getSplits(new JobContextImpl(job.getConfiguration(), new JobID())); List<RecordReader<Void, Object>> readers = new ArrayList<>(splits.size()); List<CarbonRow> rows = new ArrayList<>(); try { for (InputSplit split : splits) { TaskAttemptContextImpl attempt = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID()); RecordReader reader = format.createRecordReader(split, attempt); reader.initialize(split, attempt); readers.add(reader); } for (RecordReader<Void, Object> reader : readers) { while (reader.nextKeyValue()) { rows.add((CarbonRow) reader.getCurrentValue()); } try { reader.close(); } catch (IOException e) { LOGGER.error(e); } } } catch (InterruptedException e) { throw new IOException(e); } finally { for (RecordReader<Void, Object> reader : readers) { try { reader.close(); } catch (IOException e) { LOGGER.error(e); } } } return rows.iterator(); }
From source file:org.apache.carbondata.stream.CarbonStreamRecordReaderTest.java
License:Apache License
@Override protected void setUp() throws Exception { tablePath = new File("target/stream_input").getCanonicalPath(); String dbName = "default"; String tableName = "stream_table_input"; identifier = AbsoluteTableIdentifier.from(tablePath, new CarbonTableIdentifier(dbName, tableName, UUID.randomUUID().toString())); JobID jobId = CarbonInputFormatUtil.getJobId(new Date(), 0); TaskID taskId = new TaskID(jobId, TaskType.MAP, 0); taskAttemptId = new TaskAttemptID(taskId, 0); hadoopConf = new Configuration(); taskAttemptContext = new TaskAttemptContextImpl(hadoopConf, taskAttemptId); }
From source file:org.apache.carbondata.streaming.CarbonStreamOutputFormatTest.java
License:Apache License
@Test public void testGetRecordWriter() { CarbonStreamOutputFormat outputFormat = new CarbonStreamOutputFormat(); try {//from ww w . ja v a 2 s . c o m CarbonStreamOutputFormat.setCarbonLoadModel(hadoopConf, carbonLoadModel); TaskAttemptContext taskAttemptContext = new TaskAttemptContextImpl(hadoopConf, taskAttemptId); RecordWriter recordWriter = outputFormat.getRecordWriter(taskAttemptContext); Assert.assertNotNull("Failed to get CarbonStreamRecordWriter", recordWriter); } catch (Exception e) { e.printStackTrace(); Assert.assertTrue(e.getMessage(), false); } }
From source file:org.apache.cassandra.hadoop2.AbstractColumnFamilyInputFormat.java
License:Apache License
public org.apache.hadoop.mapred.InputSplit[] getSplits(JobConf jobConf, int numSplits) throws IOException { TaskAttemptContext tac = new TaskAttemptContextImpl(jobConf, new TaskAttemptID()); List<org.apache.hadoop.mapreduce.InputSplit> newInputSplits = this.getSplits(tac); org.apache.hadoop.mapred.InputSplit[] oldInputSplits = new org.apache.hadoop.mapred.InputSplit[newInputSplits .size()];/*from ww w . ja v a2 s . c o m*/ for (int i = 0; i < newInputSplits.size(); i++) { oldInputSplits[i] = (ColumnFamilySplit) newInputSplits.get(i); } return oldInputSplits; }
From source file:org.apache.cassandra.hadoop2.ColumnFamilyInputFormat.java
License:Apache License
public org.apache.hadoop.mapred.RecordReader<ByteBuffer, SortedMap<ByteBuffer, Column>> getRecordReader( org.apache.hadoop.mapred.InputSplit split, JobConf jobConf, final Reporter reporter) throws IOException { TaskAttemptContext tac = new TaskAttemptContextImpl(jobConf, TaskAttemptID.forName(jobConf.get(MAPRED_TASK_ID))) { @Override/*from ww w . j a v a2 s . c o m*/ public void progress() { reporter.progress(); } }; ColumnFamilyRecordReader recordReader = new ColumnFamilyRecordReader( jobConf.getInt(CASSANDRA_HADOOP_MAX_KEY_SIZE, CASSANDRA_HADOOP_MAX_KEY_SIZE_DEFAULT)); recordReader.initialize((org.apache.hadoop.mapreduce.InputSplit) split, tac); return recordReader; }
From source file:org.apache.cassandra.hadoop2.cql3.CqlPagingInputFormat.java
License:Apache License
public RecordReader<Map<String, ByteBuffer>, Map<String, ByteBuffer>> getRecordReader(InputSplit split, JobConf jobConf, final Reporter reporter) throws IOException { TaskAttemptContext tac = new TaskAttemptContextImpl(jobConf, TaskAttemptID.forName(jobConf.get(MAPRED_TASK_ID))) { @Override/*from w w w.j ava 2 s. co m*/ public void progress() { reporter.progress(); } }; CqlPagingRecordReader recordReader = new CqlPagingRecordReader(); recordReader.initialize((org.apache.hadoop.mapreduce.InputSplit) split, tac); return recordReader; }
From source file:org.apache.crunch.io.hcatalog.HCatRecordDataIterable.java
License:Apache License
@Override public Iterator<HCatRecord> iterator() { try {// w ww.j av a 2 s . c o m Job job = Job.getInstance(bundle.configure(conf)); final InputFormat fmt = ReflectionUtils.newInstance(bundle.getFormatClass(), conf); final TaskAttemptContext ctxt = new TaskAttemptContextImpl(conf, new TaskAttemptID()); return Iterators .concat(Lists.transform(fmt.getSplits(job), new Function<InputSplit, Iterator<HCatRecord>>() { @Override public Iterator<HCatRecord> apply(InputSplit split) { RecordReader reader = null; try { reader = fmt.createRecordReader(split, ctxt); reader.initialize(split, ctxt); } catch (IOException | InterruptedException e) { throw new CrunchRuntimeException(e); } return new HCatRecordReaderIterator(reader); } }).iterator()); } catch (Exception e) { throw new CrunchRuntimeException(e); } }