Example usage for org.apache.hadoop.mapreduce TaskType MAP

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskType MAP.

Prototype

TaskType MAP

To view the source code for org.apache.hadoop.mapreduce TaskType MAP.

Click Source Link

Usage

From source file:org.apache.carbondata.hadoop.testutil.StoreCreator.java

License:Apache License

/**
 * Execute graph which will further load data
 *
 * @param loadModel/*w w  w  .j a v a 2 s. c o m*/
 * @param storeLocation
 * @throws Exception
 */
public static void loadData(CarbonLoadModel loadModel, String storeLocation) throws Exception {
    if (new File(storeLocation).mkdirs()) {
        LOG.warn("mkdir is failed");
    }
    String outPutLoc = storeLocation + "/etl";
    String databaseName = loadModel.getDatabaseName();
    String tableName = loadModel.getTableName();
    String tempLocationKey = databaseName + '_' + tableName + "_1";
    CarbonProperties.getInstance().addProperty(tempLocationKey,
            storeLocation + "/" + databaseName + "/" + tableName);
    CarbonProperties.getInstance().addProperty("store_output_location", outPutLoc);
    CarbonProperties.getInstance().addProperty("send.signal.load", "false");
    CarbonProperties.getInstance().addProperty("carbon.is.columnar.storage", "true");
    CarbonProperties.getInstance().addProperty("carbon.dimension.split.value.in.columnar", "1");
    CarbonProperties.getInstance().addProperty("carbon.is.fullyfilled.bits", "true");
    CarbonProperties.getInstance().addProperty("is.int.based.indexer", "true");
    CarbonProperties.getInstance().addProperty("aggregate.columnar.keyblock", "true");
    CarbonProperties.getInstance().addProperty("is.compressed.keyblock", "false");

    String graphPath = outPutLoc + File.separator + loadModel.getDatabaseName() + File.separator + tableName
            + File.separator + 0 + File.separator + 1 + File.separator + tableName + ".ktr";
    File path = new File(graphPath);
    if (path.exists()) {
        if (!path.delete()) {
            LOG.warn("delete " + path + " failed");
        }
    }

    BlockDetails blockDetails = new BlockDetails(new Path(loadModel.getFactFilePath()), 0,
            new File(loadModel.getFactFilePath()).length(), new String[] { "localhost" });
    Configuration configuration = new Configuration();
    CSVInputFormat.setCommentCharacter(configuration, loadModel.getCommentChar());
    CSVInputFormat.setCSVDelimiter(configuration, loadModel.getCsvDelimiter());
    CSVInputFormat.setEscapeCharacter(configuration, loadModel.getEscapeChar());
    CSVInputFormat.setHeaderExtractionEnabled(configuration, true);
    CSVInputFormat.setQuoteCharacter(configuration, loadModel.getQuoteChar());
    CSVInputFormat.setReadBufferSize(configuration, CarbonProperties.getInstance().getProperty(
            CarbonCommonConstants.CSV_READ_BUFFER_SIZE, CarbonCommonConstants.CSV_READ_BUFFER_SIZE_DEFAULT));
    CSVInputFormat.setNumberOfColumns(configuration, String.valueOf(loadModel.getCsvHeaderColumns().length));
    CSVInputFormat.setMaxColumns(configuration, "10");

    TaskAttemptContextImpl hadoopAttemptContext = new TaskAttemptContextImpl(configuration,
            new TaskAttemptID("", 1, TaskType.MAP, 0, 0));
    CSVInputFormat format = new CSVInputFormat();

    RecordReader<NullWritable, StringArrayWritable> recordReader = format.createRecordReader(blockDetails,
            hadoopAttemptContext);

    CSVRecordReaderIterator readerIterator = new CSVRecordReaderIterator(recordReader, blockDetails,
            hadoopAttemptContext);
    DataTypeUtil.clearFormatter();
    new DataLoadExecutor().execute(loadModel,
            new String[] { storeLocation + "/" + databaseName + "/" + tableName },
            new CarbonIterator[] { readerIterator });

    writeLoadMetadata(loadModel.getCarbonDataLoadSchema(), loadModel.getTableName(), loadModel.getTableName(),
            new ArrayList<LoadMetadataDetails>());
}

From source file:org.apache.carbondata.presto.CarbondataPageSource.java

License:Apache License

private void initialReaderForRow() {
    SimpleDateFormat formatter = new SimpleDateFormat("yyyyMMddHHmm");
    String jobTrackerId = formatter.format(new Date());
    TaskAttemptID attemptId = new TaskAttemptID(jobTrackerId, 0, TaskType.MAP, 0, 0);
    TaskAttemptContextImpl attemptContext = new TaskAttemptContextImpl(FileFactory.getConfiguration(),
            attemptId);/*  w  ww.  j  a v  a  2  s  . com*/
    CarbonMultiBlockSplit carbonInputSplit = CarbonLocalMultiBlockSplit
            .convertSplit(split.getSchema().getProperty("carbonSplit"));
    try {
        rowReader.initialize(carbonInputSplit, attemptContext);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}

From source file:org.apache.carbondata.presto.CarbondataPageSource.java

License:Apache License

/**
 * @param carbondataSplit//from   w ww  . ja v  a2s  .  c  o  m
 * @param columns
 * @return
 */
private QueryModel createQueryModel(HiveSplit carbondataSplit, List<? extends ColumnHandle> columns,
        Configuration conf) {

    try {
        CarbonProjection carbonProjection = getCarbonProjection(columns);
        conf.set(CarbonTableInputFormat.INPUT_SEGMENT_NUMBERS, "");
        String carbonTablePath = carbonTable.getAbsoluteTableIdentifier().getTablePath();
        CarbonTableInputFormat.setTransactionalTable(conf, carbonTable.getTableInfo().isTransactionalTable());
        CarbonTableInputFormat.setTableInfo(conf, carbonTable.getTableInfo());
        conf.set(CarbonTableInputFormat.INPUT_DIR, carbonTablePath);
        conf.set("query.id", queryId);
        JobConf jobConf = new JobConf(conf);
        CarbonTableInputFormat carbonTableInputFormat = createInputFormat(jobConf, carbonTable,
                PrestoFilterUtil.parseFilterExpression(carbondataSplit.getEffectivePredicate()),
                carbonProjection);
        TaskAttemptContextImpl hadoopAttemptContext = new TaskAttemptContextImpl(jobConf,
                new TaskAttemptID("", 1, TaskType.MAP, 0, 0));
        CarbonMultiBlockSplit carbonInputSplit = CarbonLocalMultiBlockSplit
                .convertSplit(carbondataSplit.getSchema().getProperty("carbonSplit"));
        QueryModel queryModel = carbonTableInputFormat.createQueryModel(carbonInputSplit, hadoopAttemptContext);
        queryModel.setQueryId(queryId);
        queryModel.setVectorReader(true);
        queryModel.setStatisticsRecorder(
                CarbonTimeStatisticsFactory.createExecutorRecorder(queryModel.getQueryId()));

        List<TableBlockInfo> tableBlockInfoList = CarbonInputSplit
                .createBlocks(carbonInputSplit.getAllSplits());
        queryModel.setTableBlockInfos(tableBlockInfoList);
        return queryModel;
    } catch (IOException e) {
        throw new RuntimeException("Unable to get the Query Model ", e);
    }
}

From source file:org.apache.carbondata.presto.CarbondataRecordSetProvider.java

License:Apache License

@Override
public RecordSet getRecordSet(ConnectorTransactionHandle transactionHandle, ConnectorSession session,
        ConnectorSplit split, List<? extends ColumnHandle> columns) {

    CarbondataSplit carbondataSplit = checkType(split, CarbondataSplit.class,
            "split is not class CarbondataSplit");
    checkArgument(carbondataSplit.getConnectorId().equals(connectorId), "split is not for this connector");

    CarbonProjection carbonProjection = new CarbonProjection();
    // Convert all columns handles
    ImmutableList.Builder<CarbondataColumnHandle> handles = ImmutableList.builder();
    for (ColumnHandle handle : columns) {
        handles.add(checkType(handle, CarbondataColumnHandle.class, "handle"));
        carbonProjection.addColumn(((CarbondataColumnHandle) handle).getColumnName());
    }/* w ww  . j  a va 2 s .  c om*/

    CarbonTableCacheModel tableCacheModel = carbonTableReader
            .getCarbonCache(carbondataSplit.getSchemaTableName());
    checkNotNull(tableCacheModel, "tableCacheModel should not be null");
    checkNotNull(tableCacheModel.carbonTable, "tableCacheModel.carbonTable should not be null");
    checkNotNull(tableCacheModel.tableInfo, "tableCacheModel.tableInfo should not be null");

    // Build Query Model
    CarbonTable targetTable = tableCacheModel.carbonTable;

    QueryModel queryModel = null;
    try {
        Configuration conf = new Configuration();
        conf.set(CarbonTableInputFormat.INPUT_SEGMENT_NUMBERS, "");
        String carbonTablePath = PathFactory.getInstance()
                .getCarbonTablePath(targetTable.getAbsoluteTableIdentifier().getStorePath(),
                        targetTable.getCarbonTableIdentifier(), null)
                .getPath();

        conf.set(CarbonTableInputFormat.INPUT_DIR, carbonTablePath);
        JobConf jobConf = new JobConf(conf);
        CarbonTableInputFormat carbonTableInputFormat = createInputFormat(jobConf, tableCacheModel.carbonTable,
                PrestoFilterUtil.getFilters(targetTable.getFactTableName().hashCode()), carbonProjection);
        TaskAttemptContextImpl hadoopAttemptContext = new TaskAttemptContextImpl(jobConf,
                new TaskAttemptID("", 1, TaskType.MAP, 0, 0));
        CarbonInputSplit carbonInputSplit = CarbonLocalInputSplit
                .convertSplit(carbondataSplit.getLocalInputSplit());
        queryModel = carbonTableInputFormat.getQueryModel(carbonInputSplit, hadoopAttemptContext);
    } catch (IOException e) {
        throw new RuntimeException("Unable to get the Query Model ", e);
    }
    return new CarbondataRecordSet(targetTable, session, carbondataSplit, handles.build(), queryModel);
}

From source file:org.apache.carbondata.processing.StoreCreator.java

License:Apache License

/**
 * Execute graph which will further load data
 *
 * @param loadModel//from w w w.  ja v a  2s  .  c om
 * @param storeLocation
 * @throws Exception
 */
public static void executeGraph(CarbonLoadModel loadModel, String storeLocation) throws Exception {
    new File(storeLocation).mkdirs();
    String outPutLoc = storeLocation + "/etl";
    String databaseName = loadModel.getDatabaseName();
    String tableName = loadModel.getTableName();
    String tempLocationKey = databaseName + '_' + tableName + "_1";
    CarbonProperties.getInstance().addProperty(tempLocationKey, storeLocation);
    CarbonProperties.getInstance().addProperty("store_output_location", outPutLoc);
    CarbonProperties.getInstance().addProperty("send.signal.load", "false");
    CarbonProperties.getInstance().addProperty("carbon.is.columnar.storage", "true");
    CarbonProperties.getInstance().addProperty("carbon.dimension.split.value.in.columnar", "1");
    CarbonProperties.getInstance().addProperty("carbon.is.fullyfilled.bits", "true");
    CarbonProperties.getInstance().addProperty("is.int.based.indexer", "true");
    CarbonProperties.getInstance().addProperty("aggregate.columnar.keyblock", "true");
    CarbonProperties.getInstance().addProperty("high.cardinality.value", "100000");
    CarbonProperties.getInstance().addProperty("is.compressed.keyblock", "false");
    CarbonProperties.getInstance().addProperty("carbon.leaf.node.size", "120000");

    String graphPath = outPutLoc + File.separator + loadModel.getDatabaseName() + File.separator + tableName
            + File.separator + 0 + File.separator + 1 + File.separator + tableName + ".ktr";
    File path = new File(graphPath);
    if (path.exists()) {
        path.delete();
    }

    SchemaInfo info = new SchemaInfo();
    BlockDetails blockDetails = new BlockDetails(new Path(loadModel.getFactFilePath()), 0,
            new File(loadModel.getFactFilePath()).length(), new String[] { "localhost" });
    Configuration configuration = new Configuration();
    CSVInputFormat.setCommentCharacter(configuration, loadModel.getCommentChar());
    CSVInputFormat.setCSVDelimiter(configuration, loadModel.getCsvDelimiter());
    CSVInputFormat.setEscapeCharacter(configuration, loadModel.getEscapeChar());
    CSVInputFormat.setHeaderExtractionEnabled(configuration, true);
    CSVInputFormat.setQuoteCharacter(configuration, loadModel.getQuoteChar());
    CSVInputFormat.setReadBufferSize(configuration, CarbonProperties.getInstance().getProperty(
            CarbonCommonConstants.CSV_READ_BUFFER_SIZE, CarbonCommonConstants.CSV_READ_BUFFER_SIZE_DEFAULT));
    CSVInputFormat.setMaxColumns(configuration, "10");
    CSVInputFormat.setNumberOfColumns(configuration, "7");

    TaskAttemptContextImpl hadoopAttemptContext = new TaskAttemptContextImpl(configuration,
            new TaskAttemptID("", 1, TaskType.MAP, 0, 0));
    CSVInputFormat format = new CSVInputFormat();

    RecordReader<NullWritable, StringArrayWritable> recordReader = format.createRecordReader(blockDetails,
            hadoopAttemptContext);

    CSVRecordReaderIterator readerIterator = new CSVRecordReaderIterator(recordReader, blockDetails,
            hadoopAttemptContext);
    new DataLoadExecutor().execute(loadModel, storeLocation, new CarbonIterator[] { readerIterator });

    info.setDatabaseName(databaseName);
    info.setTableName(tableName);

    writeLoadMetadata(loadModel.getCarbonDataLoadSchema(), loadModel.getTableName(), loadModel.getTableName(),
            new ArrayList<LoadMetadataDetails>());

    String segLocation = storeLocation + "/" + databaseName + "/" + tableName + "/Fact/Part0/Segment_0";
    File file = new File(segLocation);
    File factFile = null;
    File[] folderList = file.listFiles();
    File folder = null;
    for (int i = 0; i < folderList.length; i++) {
        if (folderList[i].isDirectory()) {
            folder = folderList[i];
        }
    }
    if (folder.isDirectory()) {
        File[] files = folder.listFiles();
        for (int i = 0; i < files.length; i++) {
            if (!files[i].isDirectory() && files[i].getName().startsWith("part")) {
                factFile = files[i];
                break;
            }
        }
        factFile.renameTo(new File(segLocation + "/" + factFile.getName()));
        CarbonUtil.deleteFoldersAndFiles(folder);
    }
}

From source file:org.apache.carbondata.sdk.file.AvroCarbonWriter.java

License:Apache License

AvroCarbonWriter(CarbonLoadModel loadModel, Configuration hadoopConf) throws IOException {
    CarbonTableOutputFormat.setLoadModel(hadoopConf, loadModel);
    CarbonTableOutputFormat format = new CarbonTableOutputFormat();
    JobID jobId = new JobID(UUID.randomUUID().toString(), 0);
    Random random = new Random();
    TaskID task = new TaskID(jobId, TaskType.MAP, random.nextInt());
    TaskAttemptID attemptID = new TaskAttemptID(task, random.nextInt());
    TaskAttemptContextImpl context = new TaskAttemptContextImpl(hadoopConf, attemptID);
    this.recordWriter = format.getRecordWriter(context);
    this.context = context;
    this.writable = new ObjectArrayWritable();
}

From source file:org.apache.carbondata.sdk.file.CSVCarbonWriter.java

License:Apache License

CSVCarbonWriter(CarbonLoadModel loadModel, Configuration hadoopConf) throws IOException {
    CarbonTableOutputFormat.setLoadModel(hadoopConf, loadModel);
    CarbonTableOutputFormat format = new CarbonTableOutputFormat();
    JobID jobId = new JobID(UUID.randomUUID().toString(), 0);
    Random random = new Random();
    TaskID task = new TaskID(jobId, TaskType.MAP, random.nextInt());
    TaskAttemptID attemptID = new TaskAttemptID(task, random.nextInt());
    TaskAttemptContextImpl context = new TaskAttemptContextImpl(hadoopConf, attemptID);
    this.recordWriter = format.getRecordWriter(context);
    this.context = context;
    this.writable = new ObjectArrayWritable();
}

From source file:org.apache.carbondata.sdk.file.JsonCarbonWriter.java

License:Apache License

JsonCarbonWriter(CarbonLoadModel loadModel, Configuration configuration) throws IOException {
    CarbonTableOutputFormat.setLoadModel(configuration, loadModel);
    CarbonTableOutputFormat outputFormat = new CarbonTableOutputFormat();
    JobID jobId = new JobID(UUID.randomUUID().toString(), 0);
    Random random = new Random();
    TaskID task = new TaskID(jobId, TaskType.MAP, random.nextInt());
    TaskAttemptID attemptID = new TaskAttemptID(task, random.nextInt());
    TaskAttemptContextImpl context = new TaskAttemptContextImpl(configuration, attemptID);
    this.recordWriter = outputFormat.getRecordWriter(context);
    this.context = context;
    this.writable = new ObjectArrayWritable();
}

From source file:org.apache.carbondata.stream.CarbonStreamRecordReaderTest.java

License:Apache License

@Override
protected void setUp() throws Exception {
    tablePath = new File("target/stream_input").getCanonicalPath();
    String dbName = "default";
    String tableName = "stream_table_input";
    identifier = AbsoluteTableIdentifier.from(tablePath,
            new CarbonTableIdentifier(dbName, tableName, UUID.randomUUID().toString()));

    JobID jobId = CarbonInputFormatUtil.getJobId(new Date(), 0);
    TaskID taskId = new TaskID(jobId, TaskType.MAP, 0);
    taskAttemptId = new TaskAttemptID(taskId, 0);

    hadoopConf = new Configuration();
    taskAttemptContext = new TaskAttemptContextImpl(hadoopConf, taskAttemptId);
}

From source file:org.apache.carbondata.streaming.CarbonStreamOutputFormatTest.java

License:Apache License

@Override
protected void setUp() throws Exception {
    super.setUp();
    JobID jobId = CarbonInputFormatUtil.getJobId(new Date(), 0);
    TaskID taskId = new TaskID(jobId, TaskType.MAP, 0);
    taskAttemptId = new TaskAttemptID(taskId, 0);

    hadoopConf = new Configuration();
    hadoopConf.set("mapred.job.id", jobId.toString());
    hadoopConf.set("mapred.tip.id", taskAttemptId.getTaskID().toString());
    hadoopConf.set("mapred.task.id", taskAttemptId.toString());
    hadoopConf.setBoolean("mapred.task.is.map", true);
    hadoopConf.setInt("mapred.task.partition", 0);

    tablePath = new File("target/stream_output").getCanonicalPath();
    String dbName = "default";
    String tableName = "stream_table_output";
    AbsoluteTableIdentifier identifier = AbsoluteTableIdentifier.from(tablePath,
            new CarbonTableIdentifier(dbName, tableName, UUID.randomUUID().toString()));

    CarbonTable table = new StoreCreator(new File("target/store").getAbsolutePath(),
            new File("../hadoop/src/test/resources/data.csv").getCanonicalPath()).createTable(identifier);

    String factFilePath = new File("../hadoop/src/test/resources/data.csv").getCanonicalPath();
    carbonLoadModel = StoreCreator.buildCarbonLoadModel(table, factFilePath, identifier);
}