Example usage for org.apache.hadoop.mapreduce.task TaskAttemptContextImpl TaskAttemptContextImpl

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.task TaskAttemptContextImpl TaskAttemptContextImpl.

Prototype

public TaskAttemptContextImpl(Configuration conf, TaskAttemptID taskId)

Source Link

Usage

From source file:org.apache.carbondata.sdk.file.CarbonReaderBuilder.java

License:Apache License

/**
 * Build CarbonReader//from   w w w  .j a v a2 s  .  c om
 *
 * @param <T>
 * @return CarbonReader
 * @throws IOException
 * @throws InterruptedException
 */
public <T> CarbonReader<T> build() throws IOException, InterruptedException {
    if (hadoopConf == null) {
        hadoopConf = FileFactory.getConfiguration();
    }
    CarbonTable table;
    // now always infer schema. TODO:Refactor in next version.
    table = CarbonTable.buildTable(tablePath, tableName, hadoopConf);
    final CarbonFileInputFormat format = new CarbonFileInputFormat();
    final Job job = new Job(hadoopConf);
    format.setTableInfo(job.getConfiguration(), table.getTableInfo());
    format.setTablePath(job.getConfiguration(), table.getTablePath());
    format.setTableName(job.getConfiguration(), table.getTableName());
    format.setDatabaseName(job.getConfiguration(), table.getDatabaseName());
    if (filterExpression != null) {
        format.setFilterPredicates(job.getConfiguration(), filterExpression);
    }

    if (projectionColumns != null) {
        // set the user projection
        int len = projectionColumns.length;
        //      TODO : Handle projection of complex child columns
        for (int i = 0; i < len; i++) {
            if (projectionColumns[i].contains(".")) {
                throw new UnsupportedOperationException(
                        "Complex child columns projection NOT supported through CarbonReader");
            }
        }
        format.setColumnProjection(job.getConfiguration(), projectionColumns);
    }

    try {

        if (filterExpression == null) {
            job.getConfiguration().set("filter_blocks", "false");
        }
        List<InputSplit> splits = format.getSplits(new JobContextImpl(job.getConfiguration(), new JobID()));
        List<RecordReader<Void, T>> readers = new ArrayList<>(splits.size());
        for (InputSplit split : splits) {
            TaskAttemptContextImpl attempt = new TaskAttemptContextImpl(job.getConfiguration(),
                    new TaskAttemptID());
            RecordReader reader;
            QueryModel queryModel = format.createQueryModel(split, attempt);
            boolean hasComplex = false;
            for (ProjectionDimension projectionDimension : queryModel.getProjectionDimensions()) {
                if (projectionDimension.getDimension().isComplex()) {
                    hasComplex = true;
                    break;
                }
            }
            if (useVectorReader && !hasComplex) {
                queryModel.setDirectVectorFill(filterExpression == null);
                reader = new CarbonVectorizedRecordReader(queryModel);
            } else {
                reader = format.createRecordReader(split, attempt);
            }
            try {
                reader.initialize(split, attempt);
                readers.add(reader);
            } catch (Exception e) {
                CarbonUtil.closeStreams(readers.toArray(new RecordReader[0]));
                throw e;
            }
        }
        return new CarbonReader<>(readers);
    } catch (Exception ex) {
        // Clear the datamap cache as it can get added in getSplits() method
        DataMapStoreManager.getInstance().clearDataMaps(table.getAbsoluteTableIdentifier());
        throw ex;
    }
}

From source file:org.apache.carbondata.sdk.file.CSVCarbonWriter.java

License:Apache License

CSVCarbonWriter(CarbonLoadModel loadModel, Configuration hadoopConf) throws IOException {
    CarbonTableOutputFormat.setLoadModel(hadoopConf, loadModel);
    CarbonTableOutputFormat format = new CarbonTableOutputFormat();
    JobID jobId = new JobID(UUID.randomUUID().toString(), 0);
    Random random = new Random();
    TaskID task = new TaskID(jobId, TaskType.MAP, random.nextInt());
    TaskAttemptID attemptID = new TaskAttemptID(task, random.nextInt());
    TaskAttemptContextImpl context = new TaskAttemptContextImpl(hadoopConf, attemptID);
    this.recordWriter = format.getRecordWriter(context);
    this.context = context;
    this.writable = new ObjectArrayWritable();
}

From source file:org.apache.carbondata.sdk.file.JsonCarbonWriter.java

License:Apache License

JsonCarbonWriter(CarbonLoadModel loadModel, Configuration configuration) throws IOException {
    CarbonTableOutputFormat.setLoadModel(configuration, loadModel);
    CarbonTableOutputFormat outputFormat = new CarbonTableOutputFormat();
    JobID jobId = new JobID(UUID.randomUUID().toString(), 0);
    Random random = new Random();
    TaskID task = new TaskID(jobId, TaskType.MAP, random.nextInt());
    TaskAttemptID attemptID = new TaskAttemptID(task, random.nextInt());
    TaskAttemptContextImpl context = new TaskAttemptContextImpl(configuration, attemptID);
    this.recordWriter = outputFormat.getRecordWriter(context);
    this.context = context;
    this.writable = new ObjectArrayWritable();
}

From source file:org.apache.carbondata.store.LocalCarbonStore.java

License:Apache License

@Override
public Iterator<CarbonRow> scan(AbsoluteTableIdentifier tableIdentifier, String[] projectColumns,
        Expression filter) throws IOException {
    Objects.requireNonNull(tableIdentifier);
    Objects.requireNonNull(projectColumns);

    CarbonTable table = getTable(tableIdentifier.getTablePath());
    if (table.isStreamingSink() || table.isHivePartitionTable()) {
        throw new UnsupportedOperationException("streaming and partition table is not supported");
    }/* w  w w . j a  v a  2s  .c  om*/
    // TODO: use InputFormat to prune data and read data

    final CarbonTableInputFormat format = new CarbonTableInputFormat();
    final Job job = new Job(new Configuration());
    CarbonInputFormat.setTableInfo(job.getConfiguration(), table.getTableInfo());
    CarbonInputFormat.setTablePath(job.getConfiguration(), table.getTablePath());
    CarbonInputFormat.setTableName(job.getConfiguration(), table.getTableName());
    CarbonInputFormat.setDatabaseName(job.getConfiguration(), table.getDatabaseName());
    CarbonInputFormat.setCarbonReadSupport(job.getConfiguration(), CarbonRowReadSupport.class);
    CarbonInputFormat.setColumnProjection(job.getConfiguration(), new CarbonProjection(projectColumns));
    if (filter != null) {
        CarbonInputFormat.setFilterPredicates(job.getConfiguration(), filter);
    }

    final List<InputSplit> splits = format.getSplits(new JobContextImpl(job.getConfiguration(), new JobID()));

    List<RecordReader<Void, Object>> readers = new ArrayList<>(splits.size());

    List<CarbonRow> rows = new ArrayList<>();

    try {
        for (InputSplit split : splits) {
            TaskAttemptContextImpl attempt = new TaskAttemptContextImpl(job.getConfiguration(),
                    new TaskAttemptID());
            RecordReader reader = format.createRecordReader(split, attempt);
            reader.initialize(split, attempt);
            readers.add(reader);
        }

        for (RecordReader<Void, Object> reader : readers) {
            while (reader.nextKeyValue()) {
                rows.add((CarbonRow) reader.getCurrentValue());
            }
            try {
                reader.close();
            } catch (IOException e) {
                LOGGER.error(e);
            }
        }
    } catch (InterruptedException e) {
        throw new IOException(e);
    } finally {
        for (RecordReader<Void, Object> reader : readers) {
            try {
                reader.close();
            } catch (IOException e) {
                LOGGER.error(e);
            }
        }
    }
    return rows.iterator();
}

From source file:org.apache.carbondata.stream.CarbonStreamRecordReaderTest.java

License:Apache License

@Override
protected void setUp() throws Exception {
    tablePath = new File("target/stream_input").getCanonicalPath();
    String dbName = "default";
    String tableName = "stream_table_input";
    identifier = AbsoluteTableIdentifier.from(tablePath,
            new CarbonTableIdentifier(dbName, tableName, UUID.randomUUID().toString()));

    JobID jobId = CarbonInputFormatUtil.getJobId(new Date(), 0);
    TaskID taskId = new TaskID(jobId, TaskType.MAP, 0);
    taskAttemptId = new TaskAttemptID(taskId, 0);

    hadoopConf = new Configuration();
    taskAttemptContext = new TaskAttemptContextImpl(hadoopConf, taskAttemptId);
}

From source file:org.apache.carbondata.streaming.CarbonStreamOutputFormatTest.java

License:Apache License

@Test
public void testGetRecordWriter() {
    CarbonStreamOutputFormat outputFormat = new CarbonStreamOutputFormat();
    try {//from  ww w . ja v  a  2 s .  c  o m
        CarbonStreamOutputFormat.setCarbonLoadModel(hadoopConf, carbonLoadModel);
        TaskAttemptContext taskAttemptContext = new TaskAttemptContextImpl(hadoopConf, taskAttemptId);
        RecordWriter recordWriter = outputFormat.getRecordWriter(taskAttemptContext);
        Assert.assertNotNull("Failed to get CarbonStreamRecordWriter", recordWriter);
    } catch (Exception e) {
        e.printStackTrace();
        Assert.assertTrue(e.getMessage(), false);
    }
}

From source file:org.apache.cassandra.hadoop2.AbstractColumnFamilyInputFormat.java

License:Apache License

public org.apache.hadoop.mapred.InputSplit[] getSplits(JobConf jobConf, int numSplits) throws IOException {
    TaskAttemptContext tac = new TaskAttemptContextImpl(jobConf, new TaskAttemptID());
    List<org.apache.hadoop.mapreduce.InputSplit> newInputSplits = this.getSplits(tac);
    org.apache.hadoop.mapred.InputSplit[] oldInputSplits = new org.apache.hadoop.mapred.InputSplit[newInputSplits
            .size()];/*from   ww w  . ja v a2 s .  c  o  m*/
    for (int i = 0; i < newInputSplits.size(); i++) {
        oldInputSplits[i] = (ColumnFamilySplit) newInputSplits.get(i);
    }
    return oldInputSplits;
}

From source file:org.apache.cassandra.hadoop2.ColumnFamilyInputFormat.java

License:Apache License

public org.apache.hadoop.mapred.RecordReader<ByteBuffer, SortedMap<ByteBuffer, Column>> getRecordReader(
        org.apache.hadoop.mapred.InputSplit split, JobConf jobConf, final Reporter reporter)
        throws IOException {
    TaskAttemptContext tac = new TaskAttemptContextImpl(jobConf,
            TaskAttemptID.forName(jobConf.get(MAPRED_TASK_ID))) {
        @Override/*from ww w . j a  v  a2 s  .  c  o  m*/
        public void progress() {
            reporter.progress();
        }
    };

    ColumnFamilyRecordReader recordReader = new ColumnFamilyRecordReader(
            jobConf.getInt(CASSANDRA_HADOOP_MAX_KEY_SIZE, CASSANDRA_HADOOP_MAX_KEY_SIZE_DEFAULT));
    recordReader.initialize((org.apache.hadoop.mapreduce.InputSplit) split, tac);
    return recordReader;
}

From source file:org.apache.cassandra.hadoop2.cql3.CqlPagingInputFormat.java

License:Apache License

public RecordReader<Map<String, ByteBuffer>, Map<String, ByteBuffer>> getRecordReader(InputSplit split,
        JobConf jobConf, final Reporter reporter) throws IOException {
    TaskAttemptContext tac = new TaskAttemptContextImpl(jobConf,
            TaskAttemptID.forName(jobConf.get(MAPRED_TASK_ID))) {
        @Override/*from  w w w.j ava 2  s. co m*/
        public void progress() {
            reporter.progress();
        }
    };

    CqlPagingRecordReader recordReader = new CqlPagingRecordReader();
    recordReader.initialize((org.apache.hadoop.mapreduce.InputSplit) split, tac);
    return recordReader;
}

From source file:org.apache.crunch.io.hcatalog.HCatRecordDataIterable.java

License:Apache License

@Override
public Iterator<HCatRecord> iterator() {
    try {// w  ww.j av  a 2  s  . c  o m
        Job job = Job.getInstance(bundle.configure(conf));

        final InputFormat fmt = ReflectionUtils.newInstance(bundle.getFormatClass(), conf);
        final TaskAttemptContext ctxt = new TaskAttemptContextImpl(conf, new TaskAttemptID());

        return Iterators
                .concat(Lists.transform(fmt.getSplits(job), new Function<InputSplit, Iterator<HCatRecord>>() {

                    @Override
                    public Iterator<HCatRecord> apply(InputSplit split) {
                        RecordReader reader = null;
                        try {
                            reader = fmt.createRecordReader(split, ctxt);
                            reader.initialize(split, ctxt);
                        } catch (IOException | InterruptedException e) {
                            throw new CrunchRuntimeException(e);
                        }
                        return new HCatRecordReaderIterator(reader);
                    }
                }).iterator());
    } catch (Exception e) {
        throw new CrunchRuntimeException(e);
    }
}