List of usage examples for org.apache.hadoop.mapreduce TaskType MAP
TaskType MAP
To view the source code for org.apache.hadoop.mapreduce TaskType MAP.
Click Source Link
From source file:org.apache.carbondata.hadoop.testutil.StoreCreator.java
License:Apache License
/** * Execute graph which will further load data * * @param loadModel/*w w w .j a v a 2 s. c o m*/ * @param storeLocation * @throws Exception */ public static void loadData(CarbonLoadModel loadModel, String storeLocation) throws Exception { if (new File(storeLocation).mkdirs()) { LOG.warn("mkdir is failed"); } String outPutLoc = storeLocation + "/etl"; String databaseName = loadModel.getDatabaseName(); String tableName = loadModel.getTableName(); String tempLocationKey = databaseName + '_' + tableName + "_1"; CarbonProperties.getInstance().addProperty(tempLocationKey, storeLocation + "/" + databaseName + "/" + tableName); CarbonProperties.getInstance().addProperty("store_output_location", outPutLoc); CarbonProperties.getInstance().addProperty("send.signal.load", "false"); CarbonProperties.getInstance().addProperty("carbon.is.columnar.storage", "true"); CarbonProperties.getInstance().addProperty("carbon.dimension.split.value.in.columnar", "1"); CarbonProperties.getInstance().addProperty("carbon.is.fullyfilled.bits", "true"); CarbonProperties.getInstance().addProperty("is.int.based.indexer", "true"); CarbonProperties.getInstance().addProperty("aggregate.columnar.keyblock", "true"); CarbonProperties.getInstance().addProperty("is.compressed.keyblock", "false"); String graphPath = outPutLoc + File.separator + loadModel.getDatabaseName() + File.separator + tableName + File.separator + 0 + File.separator + 1 + File.separator + tableName + ".ktr"; File path = new File(graphPath); if (path.exists()) { if (!path.delete()) { LOG.warn("delete " + path + " failed"); } } BlockDetails blockDetails = new BlockDetails(new Path(loadModel.getFactFilePath()), 0, new File(loadModel.getFactFilePath()).length(), new String[] { "localhost" }); Configuration configuration = new Configuration(); CSVInputFormat.setCommentCharacter(configuration, loadModel.getCommentChar()); CSVInputFormat.setCSVDelimiter(configuration, loadModel.getCsvDelimiter()); CSVInputFormat.setEscapeCharacter(configuration, loadModel.getEscapeChar()); CSVInputFormat.setHeaderExtractionEnabled(configuration, true); CSVInputFormat.setQuoteCharacter(configuration, loadModel.getQuoteChar()); CSVInputFormat.setReadBufferSize(configuration, CarbonProperties.getInstance().getProperty( CarbonCommonConstants.CSV_READ_BUFFER_SIZE, CarbonCommonConstants.CSV_READ_BUFFER_SIZE_DEFAULT)); CSVInputFormat.setNumberOfColumns(configuration, String.valueOf(loadModel.getCsvHeaderColumns().length)); CSVInputFormat.setMaxColumns(configuration, "10"); TaskAttemptContextImpl hadoopAttemptContext = new TaskAttemptContextImpl(configuration, new TaskAttemptID("", 1, TaskType.MAP, 0, 0)); CSVInputFormat format = new CSVInputFormat(); RecordReader<NullWritable, StringArrayWritable> recordReader = format.createRecordReader(blockDetails, hadoopAttemptContext); CSVRecordReaderIterator readerIterator = new CSVRecordReaderIterator(recordReader, blockDetails, hadoopAttemptContext); DataTypeUtil.clearFormatter(); new DataLoadExecutor().execute(loadModel, new String[] { storeLocation + "/" + databaseName + "/" + tableName }, new CarbonIterator[] { readerIterator }); writeLoadMetadata(loadModel.getCarbonDataLoadSchema(), loadModel.getTableName(), loadModel.getTableName(), new ArrayList<LoadMetadataDetails>()); }
From source file:org.apache.carbondata.presto.CarbondataPageSource.java
License:Apache License
private void initialReaderForRow() { SimpleDateFormat formatter = new SimpleDateFormat("yyyyMMddHHmm"); String jobTrackerId = formatter.format(new Date()); TaskAttemptID attemptId = new TaskAttemptID(jobTrackerId, 0, TaskType.MAP, 0, 0); TaskAttemptContextImpl attemptContext = new TaskAttemptContextImpl(FileFactory.getConfiguration(), attemptId);/* w ww. j a v a 2 s . com*/ CarbonMultiBlockSplit carbonInputSplit = CarbonLocalMultiBlockSplit .convertSplit(split.getSchema().getProperty("carbonSplit")); try { rowReader.initialize(carbonInputSplit, attemptContext); } catch (IOException e) { throw new RuntimeException(e); } }
From source file:org.apache.carbondata.presto.CarbondataPageSource.java
License:Apache License
/** * @param carbondataSplit//from w ww . ja v a2s . c o m * @param columns * @return */ private QueryModel createQueryModel(HiveSplit carbondataSplit, List<? extends ColumnHandle> columns, Configuration conf) { try { CarbonProjection carbonProjection = getCarbonProjection(columns); conf.set(CarbonTableInputFormat.INPUT_SEGMENT_NUMBERS, ""); String carbonTablePath = carbonTable.getAbsoluteTableIdentifier().getTablePath(); CarbonTableInputFormat.setTransactionalTable(conf, carbonTable.getTableInfo().isTransactionalTable()); CarbonTableInputFormat.setTableInfo(conf, carbonTable.getTableInfo()); conf.set(CarbonTableInputFormat.INPUT_DIR, carbonTablePath); conf.set("query.id", queryId); JobConf jobConf = new JobConf(conf); CarbonTableInputFormat carbonTableInputFormat = createInputFormat(jobConf, carbonTable, PrestoFilterUtil.parseFilterExpression(carbondataSplit.getEffectivePredicate()), carbonProjection); TaskAttemptContextImpl hadoopAttemptContext = new TaskAttemptContextImpl(jobConf, new TaskAttemptID("", 1, TaskType.MAP, 0, 0)); CarbonMultiBlockSplit carbonInputSplit = CarbonLocalMultiBlockSplit .convertSplit(carbondataSplit.getSchema().getProperty("carbonSplit")); QueryModel queryModel = carbonTableInputFormat.createQueryModel(carbonInputSplit, hadoopAttemptContext); queryModel.setQueryId(queryId); queryModel.setVectorReader(true); queryModel.setStatisticsRecorder( CarbonTimeStatisticsFactory.createExecutorRecorder(queryModel.getQueryId())); List<TableBlockInfo> tableBlockInfoList = CarbonInputSplit .createBlocks(carbonInputSplit.getAllSplits()); queryModel.setTableBlockInfos(tableBlockInfoList); return queryModel; } catch (IOException e) { throw new RuntimeException("Unable to get the Query Model ", e); } }
From source file:org.apache.carbondata.presto.CarbondataRecordSetProvider.java
License:Apache License
@Override public RecordSet getRecordSet(ConnectorTransactionHandle transactionHandle, ConnectorSession session, ConnectorSplit split, List<? extends ColumnHandle> columns) { CarbondataSplit carbondataSplit = checkType(split, CarbondataSplit.class, "split is not class CarbondataSplit"); checkArgument(carbondataSplit.getConnectorId().equals(connectorId), "split is not for this connector"); CarbonProjection carbonProjection = new CarbonProjection(); // Convert all columns handles ImmutableList.Builder<CarbondataColumnHandle> handles = ImmutableList.builder(); for (ColumnHandle handle : columns) { handles.add(checkType(handle, CarbondataColumnHandle.class, "handle")); carbonProjection.addColumn(((CarbondataColumnHandle) handle).getColumnName()); }/* w ww . j a va 2 s . c om*/ CarbonTableCacheModel tableCacheModel = carbonTableReader .getCarbonCache(carbondataSplit.getSchemaTableName()); checkNotNull(tableCacheModel, "tableCacheModel should not be null"); checkNotNull(tableCacheModel.carbonTable, "tableCacheModel.carbonTable should not be null"); checkNotNull(tableCacheModel.tableInfo, "tableCacheModel.tableInfo should not be null"); // Build Query Model CarbonTable targetTable = tableCacheModel.carbonTable; QueryModel queryModel = null; try { Configuration conf = new Configuration(); conf.set(CarbonTableInputFormat.INPUT_SEGMENT_NUMBERS, ""); String carbonTablePath = PathFactory.getInstance() .getCarbonTablePath(targetTable.getAbsoluteTableIdentifier().getStorePath(), targetTable.getCarbonTableIdentifier(), null) .getPath(); conf.set(CarbonTableInputFormat.INPUT_DIR, carbonTablePath); JobConf jobConf = new JobConf(conf); CarbonTableInputFormat carbonTableInputFormat = createInputFormat(jobConf, tableCacheModel.carbonTable, PrestoFilterUtil.getFilters(targetTable.getFactTableName().hashCode()), carbonProjection); TaskAttemptContextImpl hadoopAttemptContext = new TaskAttemptContextImpl(jobConf, new TaskAttemptID("", 1, TaskType.MAP, 0, 0)); CarbonInputSplit carbonInputSplit = CarbonLocalInputSplit .convertSplit(carbondataSplit.getLocalInputSplit()); queryModel = carbonTableInputFormat.getQueryModel(carbonInputSplit, hadoopAttemptContext); } catch (IOException e) { throw new RuntimeException("Unable to get the Query Model ", e); } return new CarbondataRecordSet(targetTable, session, carbondataSplit, handles.build(), queryModel); }
From source file:org.apache.carbondata.processing.StoreCreator.java
License:Apache License
/** * Execute graph which will further load data * * @param loadModel//from w w w. ja v a 2s . c om * @param storeLocation * @throws Exception */ public static void executeGraph(CarbonLoadModel loadModel, String storeLocation) throws Exception { new File(storeLocation).mkdirs(); String outPutLoc = storeLocation + "/etl"; String databaseName = loadModel.getDatabaseName(); String tableName = loadModel.getTableName(); String tempLocationKey = databaseName + '_' + tableName + "_1"; CarbonProperties.getInstance().addProperty(tempLocationKey, storeLocation); CarbonProperties.getInstance().addProperty("store_output_location", outPutLoc); CarbonProperties.getInstance().addProperty("send.signal.load", "false"); CarbonProperties.getInstance().addProperty("carbon.is.columnar.storage", "true"); CarbonProperties.getInstance().addProperty("carbon.dimension.split.value.in.columnar", "1"); CarbonProperties.getInstance().addProperty("carbon.is.fullyfilled.bits", "true"); CarbonProperties.getInstance().addProperty("is.int.based.indexer", "true"); CarbonProperties.getInstance().addProperty("aggregate.columnar.keyblock", "true"); CarbonProperties.getInstance().addProperty("high.cardinality.value", "100000"); CarbonProperties.getInstance().addProperty("is.compressed.keyblock", "false"); CarbonProperties.getInstance().addProperty("carbon.leaf.node.size", "120000"); String graphPath = outPutLoc + File.separator + loadModel.getDatabaseName() + File.separator + tableName + File.separator + 0 + File.separator + 1 + File.separator + tableName + ".ktr"; File path = new File(graphPath); if (path.exists()) { path.delete(); } SchemaInfo info = new SchemaInfo(); BlockDetails blockDetails = new BlockDetails(new Path(loadModel.getFactFilePath()), 0, new File(loadModel.getFactFilePath()).length(), new String[] { "localhost" }); Configuration configuration = new Configuration(); CSVInputFormat.setCommentCharacter(configuration, loadModel.getCommentChar()); CSVInputFormat.setCSVDelimiter(configuration, loadModel.getCsvDelimiter()); CSVInputFormat.setEscapeCharacter(configuration, loadModel.getEscapeChar()); CSVInputFormat.setHeaderExtractionEnabled(configuration, true); CSVInputFormat.setQuoteCharacter(configuration, loadModel.getQuoteChar()); CSVInputFormat.setReadBufferSize(configuration, CarbonProperties.getInstance().getProperty( CarbonCommonConstants.CSV_READ_BUFFER_SIZE, CarbonCommonConstants.CSV_READ_BUFFER_SIZE_DEFAULT)); CSVInputFormat.setMaxColumns(configuration, "10"); CSVInputFormat.setNumberOfColumns(configuration, "7"); TaskAttemptContextImpl hadoopAttemptContext = new TaskAttemptContextImpl(configuration, new TaskAttemptID("", 1, TaskType.MAP, 0, 0)); CSVInputFormat format = new CSVInputFormat(); RecordReader<NullWritable, StringArrayWritable> recordReader = format.createRecordReader(blockDetails, hadoopAttemptContext); CSVRecordReaderIterator readerIterator = new CSVRecordReaderIterator(recordReader, blockDetails, hadoopAttemptContext); new DataLoadExecutor().execute(loadModel, storeLocation, new CarbonIterator[] { readerIterator }); info.setDatabaseName(databaseName); info.setTableName(tableName); writeLoadMetadata(loadModel.getCarbonDataLoadSchema(), loadModel.getTableName(), loadModel.getTableName(), new ArrayList<LoadMetadataDetails>()); String segLocation = storeLocation + "/" + databaseName + "/" + tableName + "/Fact/Part0/Segment_0"; File file = new File(segLocation); File factFile = null; File[] folderList = file.listFiles(); File folder = null; for (int i = 0; i < folderList.length; i++) { if (folderList[i].isDirectory()) { folder = folderList[i]; } } if (folder.isDirectory()) { File[] files = folder.listFiles(); for (int i = 0; i < files.length; i++) { if (!files[i].isDirectory() && files[i].getName().startsWith("part")) { factFile = files[i]; break; } } factFile.renameTo(new File(segLocation + "/" + factFile.getName())); CarbonUtil.deleteFoldersAndFiles(folder); } }
From source file:org.apache.carbondata.sdk.file.AvroCarbonWriter.java
License:Apache License
AvroCarbonWriter(CarbonLoadModel loadModel, Configuration hadoopConf) throws IOException { CarbonTableOutputFormat.setLoadModel(hadoopConf, loadModel); CarbonTableOutputFormat format = new CarbonTableOutputFormat(); JobID jobId = new JobID(UUID.randomUUID().toString(), 0); Random random = new Random(); TaskID task = new TaskID(jobId, TaskType.MAP, random.nextInt()); TaskAttemptID attemptID = new TaskAttemptID(task, random.nextInt()); TaskAttemptContextImpl context = new TaskAttemptContextImpl(hadoopConf, attemptID); this.recordWriter = format.getRecordWriter(context); this.context = context; this.writable = new ObjectArrayWritable(); }
From source file:org.apache.carbondata.sdk.file.CSVCarbonWriter.java
License:Apache License
CSVCarbonWriter(CarbonLoadModel loadModel, Configuration hadoopConf) throws IOException { CarbonTableOutputFormat.setLoadModel(hadoopConf, loadModel); CarbonTableOutputFormat format = new CarbonTableOutputFormat(); JobID jobId = new JobID(UUID.randomUUID().toString(), 0); Random random = new Random(); TaskID task = new TaskID(jobId, TaskType.MAP, random.nextInt()); TaskAttemptID attemptID = new TaskAttemptID(task, random.nextInt()); TaskAttemptContextImpl context = new TaskAttemptContextImpl(hadoopConf, attemptID); this.recordWriter = format.getRecordWriter(context); this.context = context; this.writable = new ObjectArrayWritable(); }
From source file:org.apache.carbondata.sdk.file.JsonCarbonWriter.java
License:Apache License
JsonCarbonWriter(CarbonLoadModel loadModel, Configuration configuration) throws IOException { CarbonTableOutputFormat.setLoadModel(configuration, loadModel); CarbonTableOutputFormat outputFormat = new CarbonTableOutputFormat(); JobID jobId = new JobID(UUID.randomUUID().toString(), 0); Random random = new Random(); TaskID task = new TaskID(jobId, TaskType.MAP, random.nextInt()); TaskAttemptID attemptID = new TaskAttemptID(task, random.nextInt()); TaskAttemptContextImpl context = new TaskAttemptContextImpl(configuration, attemptID); this.recordWriter = outputFormat.getRecordWriter(context); this.context = context; this.writable = new ObjectArrayWritable(); }
From source file:org.apache.carbondata.stream.CarbonStreamRecordReaderTest.java
License:Apache License
@Override protected void setUp() throws Exception { tablePath = new File("target/stream_input").getCanonicalPath(); String dbName = "default"; String tableName = "stream_table_input"; identifier = AbsoluteTableIdentifier.from(tablePath, new CarbonTableIdentifier(dbName, tableName, UUID.randomUUID().toString())); JobID jobId = CarbonInputFormatUtil.getJobId(new Date(), 0); TaskID taskId = new TaskID(jobId, TaskType.MAP, 0); taskAttemptId = new TaskAttemptID(taskId, 0); hadoopConf = new Configuration(); taskAttemptContext = new TaskAttemptContextImpl(hadoopConf, taskAttemptId); }
From source file:org.apache.carbondata.streaming.CarbonStreamOutputFormatTest.java
License:Apache License
@Override protected void setUp() throws Exception { super.setUp(); JobID jobId = CarbonInputFormatUtil.getJobId(new Date(), 0); TaskID taskId = new TaskID(jobId, TaskType.MAP, 0); taskAttemptId = new TaskAttemptID(taskId, 0); hadoopConf = new Configuration(); hadoopConf.set("mapred.job.id", jobId.toString()); hadoopConf.set("mapred.tip.id", taskAttemptId.getTaskID().toString()); hadoopConf.set("mapred.task.id", taskAttemptId.toString()); hadoopConf.setBoolean("mapred.task.is.map", true); hadoopConf.setInt("mapred.task.partition", 0); tablePath = new File("target/stream_output").getCanonicalPath(); String dbName = "default"; String tableName = "stream_table_output"; AbsoluteTableIdentifier identifier = AbsoluteTableIdentifier.from(tablePath, new CarbonTableIdentifier(dbName, tableName, UUID.randomUUID().toString())); CarbonTable table = new StoreCreator(new File("target/store").getAbsolutePath(), new File("../hadoop/src/test/resources/data.csv").getCanonicalPath()).createTable(identifier); String factFilePath = new File("../hadoop/src/test/resources/data.csv").getCanonicalPath(); carbonLoadModel = StoreCreator.buildCarbonLoadModel(table, factFilePath, identifier); }