List of usage examples for org.apache.hadoop.mapreduce.task TaskAttemptContextImpl TaskAttemptContextImpl
public TaskAttemptContextImpl(Configuration conf, TaskAttemptID taskId)
From source file:mvm.rya.accumulo.pig.AccumuloStorageTest.java
License:Apache License
protected List<AccumuloStorage> createAccumuloStorages(String location) throws IOException, InterruptedException { List<AccumuloStorage> accumuloStorages = new ArrayList<AccumuloStorage>(); AccumuloStorage storage = new AccumuloStorage(); InputFormat inputFormat = storage.getInputFormat(); Job job = new Job(new Configuration()); storage.setLocation(location, job);//from w w w. java 2s . com List<InputSplit> splits = inputFormat.getSplits(job); assertNotNull(splits); for (InputSplit inputSplit : splits) { storage = new AccumuloStorage(); job = new Job(new Configuration()); storage.setLocation(location, job); TaskAttemptContext taskAttemptContext = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID("jtid", 0, false, 0, 0)); RecordReader recordReader = inputFormat.createRecordReader(inputSplit, taskAttemptContext); recordReader.initialize(inputSplit, taskAttemptContext); storage.prepareToRead(recordReader, null); accumuloStorages.add(storage); } return accumuloStorages; }
From source file:mvm.rya.accumulo.pig.StatementPatternStorageTest.java
License:Apache License
protected List<StatementPatternStorage> createStorages(String location) throws IOException, InterruptedException { List<StatementPatternStorage> storages = new ArrayList<StatementPatternStorage>(); StatementPatternStorage storage = new StatementPatternStorage(); InputFormat inputFormat = storage.getInputFormat(); Job job = new Job(new Configuration()); storage.setLocation(location, job);// w w w . ja va 2 s . c o m List<InputSplit> splits = inputFormat.getSplits(job); assertNotNull(splits); for (InputSplit inputSplit : splits) { storage = new StatementPatternStorage(); job = new Job(new Configuration()); storage.setLocation(location, job); TaskAttemptContext taskAttemptContext = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID("jtid", 0, false, 0, 0)); RecordReader recordReader = inputFormat.createRecordReader(inputSplit, taskAttemptContext); recordReader.initialize(inputSplit, taskAttemptContext); storage.prepareToRead(recordReader, null); storages.add(storage); } return storages; }
From source file:org.apache.accumulo.examples.wikisearch.logic.TestQueryLogic.java
License:Apache License
@Before public void setup() throws Exception { Logger.getLogger(AbstractQueryLogic.class).setLevel(Level.DEBUG); Logger.getLogger(QueryLogic.class).setLevel(Level.DEBUG); Logger.getLogger(RangeCalculator.class).setLevel(Level.DEBUG); conf.set(AggregatingRecordReader.START_TOKEN, "<page>"); conf.set(AggregatingRecordReader.END_TOKEN, "</page>"); conf.set(WikipediaConfiguration.TABLE_NAME, TABLE_NAME); conf.set(WikipediaConfiguration.NUM_PARTITIONS, "1"); conf.set(WikipediaConfiguration.NUM_GROUPS, "1"); MockInstance i = new MockInstance(); c = i.getConnector("root", new PasswordToken("")); WikipediaIngester.createTables(c.tableOperations(), TABLE_NAME, false); for (String table : TABLE_NAMES) { writerMap.put(new Text(table), c.createBatchWriter(table, 1000L, 1000L, 1)); }//w ww . ja v a 2 s.c om TaskAttemptID id = new TaskAttemptID("fake", 1, TaskType.MAP, 1, 1); TaskAttemptContext context = new TaskAttemptContextImpl(conf, id); RawLocalFileSystem fs = new RawLocalFileSystem(); fs.setConf(conf); URL url = ClassLoader.getSystemResource("enwiki-20110901-001.xml"); Assert.assertNotNull(url); File data = new File(url.toURI()); Path tmpFile = new Path(data.getAbsolutePath()); // Setup the Mapper WikipediaInputSplit split = new WikipediaInputSplit( new FileSplit(tmpFile, 0, fs.pathToFile(tmpFile).length(), null), 0); AggregatingRecordReader rr = new AggregatingRecordReader(); Path ocPath = new Path(tmpFile, "oc"); OutputCommitter oc = new FileOutputCommitter(ocPath, context); fs.deleteOnExit(ocPath); StandaloneStatusReporter sr = new StandaloneStatusReporter(); rr.initialize(split, context); MockAccumuloRecordWriter rw = new MockAccumuloRecordWriter(); WikipediaMapper mapper = new WikipediaMapper(); // there are times I wonder, "Why do Java people think this is good?" then I drink more whiskey final MapContextImpl<LongWritable, Text, Text, Mutation> mapContext = new MapContextImpl<LongWritable, Text, Text, Mutation>( conf, id, rr, rw, oc, sr, split); // Load data into Mock Accumulo Mapper<LongWritable, Text, Text, Mutation>.Context con = mapper.new Context() { /** * Get the input split for this map. */ public InputSplit getInputSplit() { return mapContext.getInputSplit(); } @Override public LongWritable getCurrentKey() throws IOException, InterruptedException { return mapContext.getCurrentKey(); } @Override public Text getCurrentValue() throws IOException, InterruptedException { return mapContext.getCurrentValue(); } @Override public boolean nextKeyValue() throws IOException, InterruptedException { return mapContext.nextKeyValue(); } @Override public Counter getCounter(Enum<?> counterName) { return mapContext.getCounter(counterName); } @Override public Counter getCounter(String groupName, String counterName) { return mapContext.getCounter(groupName, counterName); } @Override public OutputCommitter getOutputCommitter() { return mapContext.getOutputCommitter(); } @Override public void write(Text key, Mutation value) throws IOException, InterruptedException { mapContext.write(key, value); } @Override public String getStatus() { return mapContext.getStatus(); } @Override public TaskAttemptID getTaskAttemptID() { return mapContext.getTaskAttemptID(); } @Override public void setStatus(String msg) { mapContext.setStatus(msg); } @Override public Path[] getArchiveClassPaths() { return mapContext.getArchiveClassPaths(); } @Override public String[] getArchiveTimestamps() { return mapContext.getArchiveTimestamps(); } @Override public URI[] getCacheArchives() throws IOException { return mapContext.getCacheArchives(); } @Override public URI[] getCacheFiles() throws IOException { return mapContext.getCacheArchives(); } @Override public Class<? extends Reducer<?, ?, ?, ?>> getCombinerClass() throws ClassNotFoundException { return mapContext.getCombinerClass(); } @Override public Configuration getConfiguration() { return mapContext.getConfiguration(); } @Override public Path[] getFileClassPaths() { return mapContext.getFileClassPaths(); } @Override public String[] getFileTimestamps() { return mapContext.getFileTimestamps(); } @Override public RawComparator<?> getGroupingComparator() { return mapContext.getGroupingComparator(); } @Override public Class<? extends InputFormat<?, ?>> getInputFormatClass() throws ClassNotFoundException { return mapContext.getInputFormatClass(); } @Override public String getJar() { return mapContext.getJar(); } @Override public JobID getJobID() { return mapContext.getJobID(); } @Override public String getJobName() { return mapContext.getJobName(); } /*@Override public boolean userClassesTakesPrecedence() { return mapContext.userClassesTakesPrecedence(); }*/ @Override public boolean getJobSetupCleanupNeeded() { return mapContext.getJobSetupCleanupNeeded(); } @Override public boolean getTaskCleanupNeeded() { return mapContext.getTaskCleanupNeeded(); } @Override public Path[] getLocalCacheArchives() throws IOException { return mapContext.getLocalCacheArchives(); } @Override public Path[] getLocalCacheFiles() throws IOException { return mapContext.getLocalCacheFiles(); } @Override public Class<?> getMapOutputKeyClass() { return mapContext.getMapOutputKeyClass(); } @Override public Class<?> getMapOutputValueClass() { return mapContext.getMapOutputValueClass(); } @Override public Class<? extends Mapper<?, ?, ?, ?>> getMapperClass() throws ClassNotFoundException { return mapContext.getMapperClass(); } @Override public int getMaxMapAttempts() { return mapContext.getMaxMapAttempts(); } @Override public int getMaxReduceAttempts() { return mapContext.getMaxReduceAttempts(); } @Override public int getNumReduceTasks() { return mapContext.getNumReduceTasks(); } @Override public Class<? extends OutputFormat<?, ?>> getOutputFormatClass() throws ClassNotFoundException { return mapContext.getOutputFormatClass(); } @Override public Class<?> getOutputKeyClass() { return mapContext.getOutputKeyClass(); } @Override public Class<?> getOutputValueClass() { return mapContext.getOutputValueClass(); } @Override public Class<? extends Partitioner<?, ?>> getPartitionerClass() throws ClassNotFoundException { return mapContext.getPartitionerClass(); } @Override public Class<? extends Reducer<?, ?, ?, ?>> getReducerClass() throws ClassNotFoundException { return mapContext.getReducerClass(); } @Override public RawComparator<?> getSortComparator() { return mapContext.getSortComparator(); } @Override public boolean getSymlink() { return mapContext.getSymlink(); } @Override public Path getWorkingDirectory() throws IOException { return mapContext.getWorkingDirectory(); } @Override public void progress() { mapContext.progress(); } @Override public boolean getProfileEnabled() { return mapContext.getProfileEnabled(); } @Override public String getProfileParams() { return mapContext.getProfileParams(); } @Override public IntegerRanges getProfileTaskRange(boolean isMap) { return mapContext.getProfileTaskRange(isMap); } @Override public String getUser() { return mapContext.getUser(); } @Override public Credentials getCredentials() { return mapContext.getCredentials(); } @Override public float getProgress() { return mapContext.getProgress(); } }; mapper.run(con); // Flush and close record writers. rw.close(context); table = new QueryLogic(); table.setMetadataTableName(METADATA_TABLE_NAME); table.setTableName(TABLE_NAME); table.setIndexTableName(INDEX_TABLE_NAME); table.setReverseIndexTableName(RINDEX_TABLE_NAME); table.setUseReadAheadIterator(false); table.setUnevaluatedFields(Collections.singletonList("TEXT")); }
From source file:org.apache.accumulo.examples.wikisearch.reader.AggregatingRecordReaderTest.java
License:Apache License
@Before public void setUp() throws Exception { conf = new Configuration(); conf.set(AggregatingRecordReader.START_TOKEN, "<doc"); conf.set(AggregatingRecordReader.END_TOKEN, "</doc>"); conf.set(AggregatingRecordReader.RETURN_PARTIAL_MATCHES, Boolean.toString(true)); TaskAttemptID id = new TaskAttemptID(); ctx = new TaskAttemptContextImpl(conf, id); XPath xp = xpFactory.newXPath(); EXPR_A = xp.compile("/doc/a"); EXPR_B = xp.compile("/doc/b"); EXPR_ATTR = xp.compile("/doc/@attr"); }
From source file:org.apache.beam.sdk.io.hadoop.format.HadoopFormats.java
License:Apache License
/** * Creates {@link TaskAttemptContext}./* w ww . j av a 2s . c o m*/ * * @param conf configuration * @param taskAttemptID taskAttemptId * @return new {@link TaskAttemptContext} */ static TaskAttemptContext createTaskAttemptContext(Configuration conf, TaskAttemptID taskAttemptID) { return new TaskAttemptContextImpl(conf, taskAttemptID); }
From source file:org.apache.carbondata.hadoop.testutil.StoreCreator.java
License:Apache License
/** * Execute graph which will further load data * * @param loadModel/*w w w. j a va2 s . c o m*/ * @param storeLocation * @throws Exception */ public static void loadData(CarbonLoadModel loadModel, String storeLocation) throws Exception { if (new File(storeLocation).mkdirs()) { LOG.warn("mkdir is failed"); } String outPutLoc = storeLocation + "/etl"; String databaseName = loadModel.getDatabaseName(); String tableName = loadModel.getTableName(); String tempLocationKey = databaseName + '_' + tableName + "_1"; CarbonProperties.getInstance().addProperty(tempLocationKey, storeLocation + "/" + databaseName + "/" + tableName); CarbonProperties.getInstance().addProperty("store_output_location", outPutLoc); CarbonProperties.getInstance().addProperty("send.signal.load", "false"); CarbonProperties.getInstance().addProperty("carbon.is.columnar.storage", "true"); CarbonProperties.getInstance().addProperty("carbon.dimension.split.value.in.columnar", "1"); CarbonProperties.getInstance().addProperty("carbon.is.fullyfilled.bits", "true"); CarbonProperties.getInstance().addProperty("is.int.based.indexer", "true"); CarbonProperties.getInstance().addProperty("aggregate.columnar.keyblock", "true"); CarbonProperties.getInstance().addProperty("is.compressed.keyblock", "false"); String graphPath = outPutLoc + File.separator + loadModel.getDatabaseName() + File.separator + tableName + File.separator + 0 + File.separator + 1 + File.separator + tableName + ".ktr"; File path = new File(graphPath); if (path.exists()) { if (!path.delete()) { LOG.warn("delete " + path + " failed"); } } BlockDetails blockDetails = new BlockDetails(new Path(loadModel.getFactFilePath()), 0, new File(loadModel.getFactFilePath()).length(), new String[] { "localhost" }); Configuration configuration = new Configuration(); CSVInputFormat.setCommentCharacter(configuration, loadModel.getCommentChar()); CSVInputFormat.setCSVDelimiter(configuration, loadModel.getCsvDelimiter()); CSVInputFormat.setEscapeCharacter(configuration, loadModel.getEscapeChar()); CSVInputFormat.setHeaderExtractionEnabled(configuration, true); CSVInputFormat.setQuoteCharacter(configuration, loadModel.getQuoteChar()); CSVInputFormat.setReadBufferSize(configuration, CarbonProperties.getInstance().getProperty( CarbonCommonConstants.CSV_READ_BUFFER_SIZE, CarbonCommonConstants.CSV_READ_BUFFER_SIZE_DEFAULT)); CSVInputFormat.setNumberOfColumns(configuration, String.valueOf(loadModel.getCsvHeaderColumns().length)); CSVInputFormat.setMaxColumns(configuration, "10"); TaskAttemptContextImpl hadoopAttemptContext = new TaskAttemptContextImpl(configuration, new TaskAttemptID("", 1, TaskType.MAP, 0, 0)); CSVInputFormat format = new CSVInputFormat(); RecordReader<NullWritable, StringArrayWritable> recordReader = format.createRecordReader(blockDetails, hadoopAttemptContext); CSVRecordReaderIterator readerIterator = new CSVRecordReaderIterator(recordReader, blockDetails, hadoopAttemptContext); DataTypeUtil.clearFormatter(); new DataLoadExecutor().execute(loadModel, new String[] { storeLocation + "/" + databaseName + "/" + tableName }, new CarbonIterator[] { readerIterator }); writeLoadMetadata(loadModel.getCarbonDataLoadSchema(), loadModel.getTableName(), loadModel.getTableName(), new ArrayList<LoadMetadataDetails>()); }
From source file:org.apache.carbondata.presto.CarbondataPageSource.java
License:Apache License
private void initialReaderForRow() { SimpleDateFormat formatter = new SimpleDateFormat("yyyyMMddHHmm"); String jobTrackerId = formatter.format(new Date()); TaskAttemptID attemptId = new TaskAttemptID(jobTrackerId, 0, TaskType.MAP, 0, 0); TaskAttemptContextImpl attemptContext = new TaskAttemptContextImpl(FileFactory.getConfiguration(), attemptId);//from w w w .ja va 2s . c om CarbonMultiBlockSplit carbonInputSplit = CarbonLocalMultiBlockSplit .convertSplit(split.getSchema().getProperty("carbonSplit")); try { rowReader.initialize(carbonInputSplit, attemptContext); } catch (IOException e) { throw new RuntimeException(e); } }
From source file:org.apache.carbondata.presto.CarbondataPageSource.java
License:Apache License
/** * @param carbondataSplit/* w ww . j av a2 s. co m*/ * @param columns * @return */ private QueryModel createQueryModel(HiveSplit carbondataSplit, List<? extends ColumnHandle> columns, Configuration conf) { try { CarbonProjection carbonProjection = getCarbonProjection(columns); conf.set(CarbonTableInputFormat.INPUT_SEGMENT_NUMBERS, ""); String carbonTablePath = carbonTable.getAbsoluteTableIdentifier().getTablePath(); CarbonTableInputFormat.setTransactionalTable(conf, carbonTable.getTableInfo().isTransactionalTable()); CarbonTableInputFormat.setTableInfo(conf, carbonTable.getTableInfo()); conf.set(CarbonTableInputFormat.INPUT_DIR, carbonTablePath); conf.set("query.id", queryId); JobConf jobConf = new JobConf(conf); CarbonTableInputFormat carbonTableInputFormat = createInputFormat(jobConf, carbonTable, PrestoFilterUtil.parseFilterExpression(carbondataSplit.getEffectivePredicate()), carbonProjection); TaskAttemptContextImpl hadoopAttemptContext = new TaskAttemptContextImpl(jobConf, new TaskAttemptID("", 1, TaskType.MAP, 0, 0)); CarbonMultiBlockSplit carbonInputSplit = CarbonLocalMultiBlockSplit .convertSplit(carbondataSplit.getSchema().getProperty("carbonSplit")); QueryModel queryModel = carbonTableInputFormat.createQueryModel(carbonInputSplit, hadoopAttemptContext); queryModel.setQueryId(queryId); queryModel.setVectorReader(true); queryModel.setStatisticsRecorder( CarbonTimeStatisticsFactory.createExecutorRecorder(queryModel.getQueryId())); List<TableBlockInfo> tableBlockInfoList = CarbonInputSplit .createBlocks(carbonInputSplit.getAllSplits()); queryModel.setTableBlockInfos(tableBlockInfoList); return queryModel; } catch (IOException e) { throw new RuntimeException("Unable to get the Query Model ", e); } }
From source file:org.apache.carbondata.processing.StoreCreator.java
License:Apache License
/** * Execute graph which will further load data * * @param loadModel//from ww w .j av a 2s . c om * @param storeLocation * @throws Exception */ public static void executeGraph(CarbonLoadModel loadModel, String storeLocation) throws Exception { new File(storeLocation).mkdirs(); String outPutLoc = storeLocation + "/etl"; String databaseName = loadModel.getDatabaseName(); String tableName = loadModel.getTableName(); String tempLocationKey = databaseName + '_' + tableName + "_1"; CarbonProperties.getInstance().addProperty(tempLocationKey, storeLocation); CarbonProperties.getInstance().addProperty("store_output_location", outPutLoc); CarbonProperties.getInstance().addProperty("send.signal.load", "false"); CarbonProperties.getInstance().addProperty("carbon.is.columnar.storage", "true"); CarbonProperties.getInstance().addProperty("carbon.dimension.split.value.in.columnar", "1"); CarbonProperties.getInstance().addProperty("carbon.is.fullyfilled.bits", "true"); CarbonProperties.getInstance().addProperty("is.int.based.indexer", "true"); CarbonProperties.getInstance().addProperty("aggregate.columnar.keyblock", "true"); CarbonProperties.getInstance().addProperty("high.cardinality.value", "100000"); CarbonProperties.getInstance().addProperty("is.compressed.keyblock", "false"); CarbonProperties.getInstance().addProperty("carbon.leaf.node.size", "120000"); String graphPath = outPutLoc + File.separator + loadModel.getDatabaseName() + File.separator + tableName + File.separator + 0 + File.separator + 1 + File.separator + tableName + ".ktr"; File path = new File(graphPath); if (path.exists()) { path.delete(); } SchemaInfo info = new SchemaInfo(); BlockDetails blockDetails = new BlockDetails(new Path(loadModel.getFactFilePath()), 0, new File(loadModel.getFactFilePath()).length(), new String[] { "localhost" }); Configuration configuration = new Configuration(); CSVInputFormat.setCommentCharacter(configuration, loadModel.getCommentChar()); CSVInputFormat.setCSVDelimiter(configuration, loadModel.getCsvDelimiter()); CSVInputFormat.setEscapeCharacter(configuration, loadModel.getEscapeChar()); CSVInputFormat.setHeaderExtractionEnabled(configuration, true); CSVInputFormat.setQuoteCharacter(configuration, loadModel.getQuoteChar()); CSVInputFormat.setReadBufferSize(configuration, CarbonProperties.getInstance().getProperty( CarbonCommonConstants.CSV_READ_BUFFER_SIZE, CarbonCommonConstants.CSV_READ_BUFFER_SIZE_DEFAULT)); CSVInputFormat.setMaxColumns(configuration, "10"); CSVInputFormat.setNumberOfColumns(configuration, "7"); TaskAttemptContextImpl hadoopAttemptContext = new TaskAttemptContextImpl(configuration, new TaskAttemptID("", 1, TaskType.MAP, 0, 0)); CSVInputFormat format = new CSVInputFormat(); RecordReader<NullWritable, StringArrayWritable> recordReader = format.createRecordReader(blockDetails, hadoopAttemptContext); CSVRecordReaderIterator readerIterator = new CSVRecordReaderIterator(recordReader, blockDetails, hadoopAttemptContext); new DataLoadExecutor().execute(loadModel, storeLocation, new CarbonIterator[] { readerIterator }); info.setDatabaseName(databaseName); info.setTableName(tableName); writeLoadMetadata(loadModel.getCarbonDataLoadSchema(), loadModel.getTableName(), loadModel.getTableName(), new ArrayList<LoadMetadataDetails>()); String segLocation = storeLocation + "/" + databaseName + "/" + tableName + "/Fact/Part0/Segment_0"; File file = new File(segLocation); File factFile = null; File[] folderList = file.listFiles(); File folder = null; for (int i = 0; i < folderList.length; i++) { if (folderList[i].isDirectory()) { folder = folderList[i]; } } if (folder.isDirectory()) { File[] files = folder.listFiles(); for (int i = 0; i < files.length; i++) { if (!files[i].isDirectory() && files[i].getName().startsWith("part")) { factFile = files[i]; break; } } factFile.renameTo(new File(segLocation + "/" + factFile.getName())); CarbonUtil.deleteFoldersAndFiles(folder); } }
From source file:org.apache.carbondata.sdk.file.AvroCarbonWriter.java
License:Apache License
AvroCarbonWriter(CarbonLoadModel loadModel, Configuration hadoopConf) throws IOException { CarbonTableOutputFormat.setLoadModel(hadoopConf, loadModel); CarbonTableOutputFormat format = new CarbonTableOutputFormat(); JobID jobId = new JobID(UUID.randomUUID().toString(), 0); Random random = new Random(); TaskID task = new TaskID(jobId, TaskType.MAP, random.nextInt()); TaskAttemptID attemptID = new TaskAttemptID(task, random.nextInt()); TaskAttemptContextImpl context = new TaskAttemptContextImpl(hadoopConf, attemptID); this.recordWriter = format.getRecordWriter(context); this.context = context; this.writable = new ObjectArrayWritable(); }