List of usage examples for org.apache.hadoop.mapred TaskAttemptID TaskAttemptID
public TaskAttemptID(String jtIdentifier, int jobId, TaskType type, int taskId, int id)
From source file:com.facebook.hiveio.output.PerThread.java
License:Apache License
/** * Constructor// www . jav a2 s . c o m * * @param conf Configuration */ public PerThread(Configuration conf) { this.conf = HiveUtils.newHiveConf(conf, OutputCmd.class); this.taskID = new TaskAttemptID("hiveio_output", 42, true, (int) Thread.currentThread().getId(), 0); }
From source file:com.inmobi.messaging.consumer.databus.mapreduce.TestDatabusInputFormatMapReduce.java
License:Apache License
@BeforeTest public void setUp() throws Exception { databusInputFormat = new DatabusInputFormat(); rootDir = new Path("file:///", new Path(System.getProperty("test.root.dir"), "databustestMapRduce")); taskId = new TaskAttemptID("jt", 0, true, 0, 0); super.setUp(); }
From source file:com.splicemachine.derby.impl.io.WholeTextInputFormatTest.java
License:Apache License
@Test public void testGetsStreamForDirectory() throws Exception { /*// ww w .jav a 2 s .co m * This test failed before changes to WholeTextInputFormat(hooray for test-driven development!), * so this constitutes an effective regression test for SPLICE-739. Of course, we'll be certain * about it by ALSO writing an IT, but this is a nice little Unit test of the same thing. */ Configuration configuration = HConfiguration.unwrapDelegate(); String dirPath = SpliceUnitTest.getResourceDirectory() + "multiLineDirectory"; configuration.set("mapred.input.dir", dirPath); WholeTextInputFormat wtif = new WholeTextInputFormat(); wtif.setConf(configuration); JobContext ctx = new JobContextImpl(configuration, new JobID("test", 1)); List<InputSplit> splits = wtif.getSplits(ctx); int i = 0; Set<String> files = readFileNames(dirPath); Assert.assertEquals("We didn't get a split per file", files.size(), splits.size()); Set<String> readFiles = new HashSet<>(); long totalRecords = 0; for (InputSplit is : splits) { TaskAttemptContext tac = new TaskAttemptContextImpl(configuration, new TaskAttemptID("test", 1, true, i, 1)); RecordReader<String, InputStream> recordReader = wtif.createRecordReader(is, tac); CombineFileSplit cfs = (CombineFileSplit) is; System.out.println(cfs); totalRecords += collectRecords(readFiles, recordReader); i++; } Assert.assertEquals("did not read all data!", 28, totalRecords); Assert.assertEquals("Did not read all files!", files.size(), readFiles.size()); for (String expectedFile : files) { Assert.assertTrue("Did not read file <" + expectedFile + "> read =" + readFiles + " exp", readFiles.contains(expectedFile)); } }
From source file:org.apache.carbondata.hadoop.testutil.StoreCreator.java
License:Apache License
/** * Execute graph which will further load data * * @param loadModel//from w ww. ja v a 2 s .c o m * @param storeLocation * @throws Exception */ public static void loadData(CarbonLoadModel loadModel, String storeLocation) throws Exception { if (new File(storeLocation).mkdirs()) { LOG.warn("mkdir is failed"); } String outPutLoc = storeLocation + "/etl"; String databaseName = loadModel.getDatabaseName(); String tableName = loadModel.getTableName(); String tempLocationKey = databaseName + '_' + tableName + "_1"; CarbonProperties.getInstance().addProperty(tempLocationKey, storeLocation + "/" + databaseName + "/" + tableName); CarbonProperties.getInstance().addProperty("store_output_location", outPutLoc); CarbonProperties.getInstance().addProperty("send.signal.load", "false"); CarbonProperties.getInstance().addProperty("carbon.is.columnar.storage", "true"); CarbonProperties.getInstance().addProperty("carbon.dimension.split.value.in.columnar", "1"); CarbonProperties.getInstance().addProperty("carbon.is.fullyfilled.bits", "true"); CarbonProperties.getInstance().addProperty("is.int.based.indexer", "true"); CarbonProperties.getInstance().addProperty("aggregate.columnar.keyblock", "true"); CarbonProperties.getInstance().addProperty("is.compressed.keyblock", "false"); String graphPath = outPutLoc + File.separator + loadModel.getDatabaseName() + File.separator + tableName + File.separator + 0 + File.separator + 1 + File.separator + tableName + ".ktr"; File path = new File(graphPath); if (path.exists()) { if (!path.delete()) { LOG.warn("delete " + path + " failed"); } } BlockDetails blockDetails = new BlockDetails(new Path(loadModel.getFactFilePath()), 0, new File(loadModel.getFactFilePath()).length(), new String[] { "localhost" }); Configuration configuration = new Configuration(); CSVInputFormat.setCommentCharacter(configuration, loadModel.getCommentChar()); CSVInputFormat.setCSVDelimiter(configuration, loadModel.getCsvDelimiter()); CSVInputFormat.setEscapeCharacter(configuration, loadModel.getEscapeChar()); CSVInputFormat.setHeaderExtractionEnabled(configuration, true); CSVInputFormat.setQuoteCharacter(configuration, loadModel.getQuoteChar()); CSVInputFormat.setReadBufferSize(configuration, CarbonProperties.getInstance().getProperty( CarbonCommonConstants.CSV_READ_BUFFER_SIZE, CarbonCommonConstants.CSV_READ_BUFFER_SIZE_DEFAULT)); CSVInputFormat.setNumberOfColumns(configuration, String.valueOf(loadModel.getCsvHeaderColumns().length)); CSVInputFormat.setMaxColumns(configuration, "10"); TaskAttemptContextImpl hadoopAttemptContext = new TaskAttemptContextImpl(configuration, new TaskAttemptID("", 1, TaskType.MAP, 0, 0)); CSVInputFormat format = new CSVInputFormat(); RecordReader<NullWritable, StringArrayWritable> recordReader = format.createRecordReader(blockDetails, hadoopAttemptContext); CSVRecordReaderIterator readerIterator = new CSVRecordReaderIterator(recordReader, blockDetails, hadoopAttemptContext); DataTypeUtil.clearFormatter(); new DataLoadExecutor().execute(loadModel, new String[] { storeLocation + "/" + databaseName + "/" + tableName }, new CarbonIterator[] { readerIterator }); writeLoadMetadata(loadModel.getCarbonDataLoadSchema(), loadModel.getTableName(), loadModel.getTableName(), new ArrayList<LoadMetadataDetails>()); }
From source file:org.apache.carbondata.presto.CarbondataRecordSetProvider.java
License:Apache License
@Override public RecordSet getRecordSet(ConnectorTransactionHandle transactionHandle, ConnectorSession session, ConnectorSplit split, List<? extends ColumnHandle> columns) { CarbondataSplit carbondataSplit = checkType(split, CarbondataSplit.class, "split is not class CarbondataSplit"); checkArgument(carbondataSplit.getConnectorId().equals(connectorId), "split is not for this connector"); CarbonProjection carbonProjection = new CarbonProjection(); // Convert all columns handles ImmutableList.Builder<CarbondataColumnHandle> handles = ImmutableList.builder(); for (ColumnHandle handle : columns) { handles.add(checkType(handle, CarbondataColumnHandle.class, "handle")); carbonProjection.addColumn(((CarbondataColumnHandle) handle).getColumnName()); }//from ww w . j a v a2 s. co m CarbonTableCacheModel tableCacheModel = carbonTableReader .getCarbonCache(carbondataSplit.getSchemaTableName()); checkNotNull(tableCacheModel, "tableCacheModel should not be null"); checkNotNull(tableCacheModel.carbonTable, "tableCacheModel.carbonTable should not be null"); checkNotNull(tableCacheModel.tableInfo, "tableCacheModel.tableInfo should not be null"); // Build Query Model CarbonTable targetTable = tableCacheModel.carbonTable; QueryModel queryModel = null; try { Configuration conf = new Configuration(); conf.set(CarbonTableInputFormat.INPUT_SEGMENT_NUMBERS, ""); String carbonTablePath = PathFactory.getInstance() .getCarbonTablePath(targetTable.getAbsoluteTableIdentifier().getStorePath(), targetTable.getCarbonTableIdentifier(), null) .getPath(); conf.set(CarbonTableInputFormat.INPUT_DIR, carbonTablePath); JobConf jobConf = new JobConf(conf); CarbonTableInputFormat carbonTableInputFormat = createInputFormat(jobConf, tableCacheModel.carbonTable, PrestoFilterUtil.getFilters(targetTable.getFactTableName().hashCode()), carbonProjection); TaskAttemptContextImpl hadoopAttemptContext = new TaskAttemptContextImpl(jobConf, new TaskAttemptID("", 1, TaskType.MAP, 0, 0)); CarbonInputSplit carbonInputSplit = CarbonLocalInputSplit .convertSplit(carbondataSplit.getLocalInputSplit()); queryModel = carbonTableInputFormat.getQueryModel(carbonInputSplit, hadoopAttemptContext); } catch (IOException e) { throw new RuntimeException("Unable to get the Query Model ", e); } return new CarbondataRecordSet(targetTable, session, carbondataSplit, handles.build(), queryModel); }
From source file:org.apache.carbondata.processing.StoreCreator.java
License:Apache License
/** * Execute graph which will further load data * * @param loadModel/*from ww w . j a v a 2s . c o m*/ * @param storeLocation * @throws Exception */ public static void executeGraph(CarbonLoadModel loadModel, String storeLocation) throws Exception { new File(storeLocation).mkdirs(); String outPutLoc = storeLocation + "/etl"; String databaseName = loadModel.getDatabaseName(); String tableName = loadModel.getTableName(); String tempLocationKey = databaseName + '_' + tableName + "_1"; CarbonProperties.getInstance().addProperty(tempLocationKey, storeLocation); CarbonProperties.getInstance().addProperty("store_output_location", outPutLoc); CarbonProperties.getInstance().addProperty("send.signal.load", "false"); CarbonProperties.getInstance().addProperty("carbon.is.columnar.storage", "true"); CarbonProperties.getInstance().addProperty("carbon.dimension.split.value.in.columnar", "1"); CarbonProperties.getInstance().addProperty("carbon.is.fullyfilled.bits", "true"); CarbonProperties.getInstance().addProperty("is.int.based.indexer", "true"); CarbonProperties.getInstance().addProperty("aggregate.columnar.keyblock", "true"); CarbonProperties.getInstance().addProperty("high.cardinality.value", "100000"); CarbonProperties.getInstance().addProperty("is.compressed.keyblock", "false"); CarbonProperties.getInstance().addProperty("carbon.leaf.node.size", "120000"); String graphPath = outPutLoc + File.separator + loadModel.getDatabaseName() + File.separator + tableName + File.separator + 0 + File.separator + 1 + File.separator + tableName + ".ktr"; File path = new File(graphPath); if (path.exists()) { path.delete(); } SchemaInfo info = new SchemaInfo(); BlockDetails blockDetails = new BlockDetails(new Path(loadModel.getFactFilePath()), 0, new File(loadModel.getFactFilePath()).length(), new String[] { "localhost" }); Configuration configuration = new Configuration(); CSVInputFormat.setCommentCharacter(configuration, loadModel.getCommentChar()); CSVInputFormat.setCSVDelimiter(configuration, loadModel.getCsvDelimiter()); CSVInputFormat.setEscapeCharacter(configuration, loadModel.getEscapeChar()); CSVInputFormat.setHeaderExtractionEnabled(configuration, true); CSVInputFormat.setQuoteCharacter(configuration, loadModel.getQuoteChar()); CSVInputFormat.setReadBufferSize(configuration, CarbonProperties.getInstance().getProperty( CarbonCommonConstants.CSV_READ_BUFFER_SIZE, CarbonCommonConstants.CSV_READ_BUFFER_SIZE_DEFAULT)); CSVInputFormat.setMaxColumns(configuration, "10"); CSVInputFormat.setNumberOfColumns(configuration, "7"); TaskAttemptContextImpl hadoopAttemptContext = new TaskAttemptContextImpl(configuration, new TaskAttemptID("", 1, TaskType.MAP, 0, 0)); CSVInputFormat format = new CSVInputFormat(); RecordReader<NullWritable, StringArrayWritable> recordReader = format.createRecordReader(blockDetails, hadoopAttemptContext); CSVRecordReaderIterator readerIterator = new CSVRecordReaderIterator(recordReader, blockDetails, hadoopAttemptContext); new DataLoadExecutor().execute(loadModel, storeLocation, new CarbonIterator[] { readerIterator }); info.setDatabaseName(databaseName); info.setTableName(tableName); writeLoadMetadata(loadModel.getCarbonDataLoadSchema(), loadModel.getTableName(), loadModel.getTableName(), new ArrayList<LoadMetadataDetails>()); String segLocation = storeLocation + "/" + databaseName + "/" + tableName + "/Fact/Part0/Segment_0"; File file = new File(segLocation); File factFile = null; File[] folderList = file.listFiles(); File folder = null; for (int i = 0; i < folderList.length; i++) { if (folderList[i].isDirectory()) { folder = folderList[i]; } } if (folder.isDirectory()) { File[] files = folder.listFiles(); for (int i = 0; i < files.length; i++) { if (!files[i].isDirectory() && files[i].getName().startsWith("part")) { factFile = files[i]; break; } } factFile.renameTo(new File(segLocation + "/" + factFile.getName())); CarbonUtil.deleteFoldersAndFiles(folder); } }
From source file:org.lilyproject.hadooptestfw.fork.MiniMRCluster.java
License:Apache License
/** * Get the events list at the tasktracker */// ww w . ja v a 2 s . co m public MapTaskCompletionEventsUpdate getMapTaskCompletionEventsUpdates(int index, JobID jobId, int max) throws IOException { String jtId = jobTracker.getJobTracker().getTrackerIdentifier(); TaskAttemptID dummy = new TaskAttemptID(jtId, jobId.getId(), false, 0, 0); return taskTrackerList.get(index).getTaskTracker().getMapCompletionEvents(jobId, 0, max, dummy, null); }