Example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:com.splicemachine.mrio.api.core.SMOutputFormat.java

License:Apache License

@Override
public RecordWriter<RowLocation, ExecRow> getRecordWriter(TaskAttemptContext context)
        throws IOException, InterruptedException {
    //        TableContext tableContext = TableContext.getTableContextFromBase64String(context.getConfiguration().get(MRConstants.SPLICE_TBLE_CONTEXT));
    return new SMRecordWriterImpl(context.getConfiguration());
}

From source file:com.splicemachine.mrio.api.core.SMRecordReaderImpl.java

License:Apache License

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
    if (LOG.isDebugEnabled())
        SpliceLogUtils.debug(LOG, "initialize with split=%s", split);
    init(config == null ? context.getConfiguration() : config, split);
}

From source file:com.splicemachine.orc.input.OrcMapreduceRecordReader.java

License:Open Source License

@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext)
        throws IOException, InterruptedException {
    OrcNewSplit orcNewSplit = (OrcNewSplit) inputSplit;
    Configuration configuration = taskAttemptContext.getConfiguration();
    double maxMergeDistance = configuration.getDouble(MAX_MERGE_DISTANCE, MAX_MERGE_DISTANCE_DEFAULT);
    double maxReadSize = configuration.getDouble(MAX_READ_SIZE, MAX_READ_SIZE_DEFAULT);
    double streamBufferSize = configuration.getDouble(STREAM_BUFFER_SIZE, STREAM_BUFFER_SIZE_DEFAULT);
    Path path = orcNewSplit.getPath();
    FileSystem fileSystem = FileSystem.get(path.toUri(), configuration);
    long size = fileSystem.getFileStatus(path).getLen();
    FSDataInputStream inputStream = fileSystem.open(path);
    rowStruct = getRowStruct(configuration);
    predicate = getSplicePredicate(configuration);
    List<Integer> partitions = getPartitionIds(configuration);
    List<Integer> columnIds = getColumnIds(configuration);

    List<String> values = null;
    try {// ww  w  . j  a  v a 2 s .  com
        values = Warehouse.getPartValuesFromPartName(((OrcNewSplit) inputSplit).getPath().toString());
    } catch (MetaException me) {
        throw new IOException(me);
    }
    OrcDataSource orcDataSource = new HdfsOrcDataSource(path.toString(), size,
            new DataSize(maxMergeDistance, DataSize.Unit.MEGABYTE),
            new DataSize(maxReadSize, DataSize.Unit.MEGABYTE),
            new DataSize(streamBufferSize, DataSize.Unit.MEGABYTE), inputStream);
    OrcReader orcReader = new OrcReader(orcDataSource, new OrcMetadataReader(),
            new DataSize(maxMergeDistance, DataSize.Unit.MEGABYTE),
            new DataSize(maxReadSize, DataSize.Unit.MEGABYTE));
    orcRecordReader = orcReader.createRecordReader(getColumnsAndTypes(columnIds, rowStruct), predicate,
            HIVE_STORAGE_TIME_ZONE, new AggregatedMemoryContext(), partitions, values);
}

From source file:com.splicemachine.stream.index.HTableOutputFormat.java

License:Apache License

@Override
public RecordWriter<byte[], Either<Exception, KVPair>> getRecordWriter(TaskAttemptContext taskAttemptContext)
        throws IOException, InterruptedException {
    try {/*from  w ww  .  j a  v  a 2s. c  o  m*/
        DataSetWriterBuilder tableWriter = TableWriterUtils
                .deserializeTableWriter(taskAttemptContext.getConfiguration());
        TxnView childTxn = outputCommitter.getChildTransaction(taskAttemptContext.getTaskAttemptID());
        if (childTxn == null)
            throw new IOException("child transaction lookup failed");
        tableWriter.txn(childTxn);
        return new HTableRecordWriter(tableWriter.buildTableWriter(), outputCommitter);
    } catch (Exception e) {
        throw new IOException(e);
    }
}

From source file:com.splicemachine.stream.index.HTableOutputFormat.java

License:Apache License

@Override
public OutputCommitter getOutputCommitter(TaskAttemptContext taskAttemptContext)
        throws IOException, InterruptedException {

    if (LOG.isDebugEnabled())
        SpliceLogUtils.debug(LOG, "getOutputCommitter for taskAttemptContext=%s", taskAttemptContext);
    try {//from   w  w w  . j av  a  2 s. com
        if (outputCommitter == null) {
            DataSetWriterBuilder tableWriter = TableWriterUtils
                    .deserializeTableWriter(taskAttemptContext.getConfiguration());
            outputCommitter = new SpliceOutputCommitter(tableWriter.getTxn(),
                    tableWriter.getDestinationTable());
        }
        return outputCommitter;
    } catch (StandardException e) {
        throw new IOException(e);
    }
}

From source file:com.splicemachine.stream.output.SMOutputFormat.java

License:Apache License

@Override
public RecordWriter<RowLocation, Either<Exception, ExecRow>> getRecordWriter(
        TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
    try {// w w w . j  a  v  a2  s  .  c om
        DataSetWriterBuilder dsWriter = TableWriterUtils
                .deserializeTableWriter(taskAttemptContext.getConfiguration());
        TxnView childTxn = outputCommitter.getChildTransaction(taskAttemptContext.getTaskAttemptID());
        if (childTxn == null)
            throw new IOException("child transaction lookup failed");
        dsWriter.txn(outputCommitter.getChildTransaction(taskAttemptContext.getTaskAttemptID()));
        return new SMRecordWriter(dsWriter.buildTableWriter(), outputCommitter);
    } catch (Exception e) {
        throw new IOException(e);
    }
}

From source file:com.splout.db.hadoop.engine.SploutSQLProxyOutputFormat.java

License:Apache License

@Override
public RecordWriter<ITuple, NullWritable> getRecordWriter(TaskAttemptContext context)
        throws IOException, InterruptedException {

    long waitTimeHeartBeater = context.getConfiguration().getLong(HeartBeater.WAIT_TIME_CONF, 5000);
    heartBeater = new HeartBeater(context, waitTimeHeartBeater);
    heartBeater.needHeartBeat();/*from  ww  w  .j a v  a2 s  .  c  om*/
    conf = context.getConfiguration();
    this.context = context;

    outputFormat.setConf(context.getConfiguration());

    return new RecordWriter<ITuple, NullWritable>() {

        // Temporary and permanent Paths for properly writing Hadoop output files
        private Map<Integer, Path> permPool = new HashMap<Integer, Path>();
        private Map<Integer, Path> tempPool = new HashMap<Integer, Path>();

        private void initSql(int partition) throws IOException, InterruptedException {
            // HDFS final location of the generated partition file. It will be
            // loaded to the temporary folder in the HDFS than finally will be
            // committed by the OutputCommitter to the proper location.
            FileOutputCommitter committer = (FileOutputCommitter) getOutputCommitter(
                    SploutSQLProxyOutputFormat.this.context);
            Path perm = new Path(committer.getWorkPath(), partition + ".db");
            FileSystem fs = perm.getFileSystem(conf);

            // Make a task unique name that contains the actual index output name to
            // make debugging simpler
            // Note: if using JVM reuse, the sequence number will not be reset for a
            // new task using the jvm
            Path temp = conf.getLocalPath("mapred.local.dir",
                    "splout_task_" + SploutSQLProxyOutputFormat.this.context.getTaskAttemptID() + '.'
                            + FILE_SEQUENCE.incrementAndGet());

            FileSystem localFileSystem = FileSystem.getLocal(conf);
            if (localFileSystem.exists(temp)) {
                localFileSystem.delete(temp, true);
            }
            localFileSystem.mkdirs(temp);

            Path local = fs.startLocalOutput(perm, new Path(temp, partition + ".db"));

            //
            permPool.put(partition, perm);
            tempPool.put(partition, new Path(temp, partition + ".db"));

            outputFormat.initPartition(partition, local);
        }

        @Override
        public void close(TaskAttemptContext ctx) throws IOException, InterruptedException {
            FileSystem fs = FileSystem.get(ctx.getConfiguration());
            try {
                if (ctx != null) {
                    heartBeater.setProgress(ctx);
                }
                outputFormat.close();
                for (Map.Entry<Integer, Path> entry : permPool.entrySet()) {
                    // Hadoop - completeLocalOutput()
                    fs.completeLocalOutput(entry.getValue(), tempPool.get(entry.getKey()));
                }
            } finally { // in any case, destroy the HeartBeater
                heartBeater.cancelHeartBeat();
            }
        }

        @Override
        public void write(ITuple tuple, NullWritable ignore) throws IOException, InterruptedException {
            int partition = (Integer) tuple.get(SploutSQLOutputFormat.PARTITION_TUPLE_FIELD);
            if (tempPool.get(partition) == null) {
                initSql(partition);
            }
            outputFormat.write(tuple);
        }

    };
}

From source file:com.splout.db.hadoop.TupleSampler.java

License:Apache License

/**
 * Random sampling method a-la-TeraSort, getting some consecutive samples from each InputSplit
 * without using a Job.//  w ww .j a va2 s  . c o  m
 * The output is SequenceFile with keys.
 *
 * @return The number of retrieved samples
 */
private long randomSampling(long sampleSize, Configuration hadoopConf, Path outFile, List<InputSplit> splits,
        Map<InputSplit, TableSpec> splitToTableSpec,
        Map<InputSplit, InputFormat<ITuple, NullWritable>> splitToFormat,
        Map<InputSplit, Map<String, String>> specificHadoopConf,
        Map<InputSplit, RecordProcessor> recordProcessorPerSplit,
        Map<InputSplit, JavascriptEngine> splitToJsEngine, int maxSplitsToVisit) throws IOException {

    // Instantiate the writer we will write samples to
    FileSystem fs = FileSystem.get(outFile.toUri(), hadoopConf);

    if (splits.size() == 0) {
        throw new IllegalArgumentException("There are no splits to sample from!");
    }

    @SuppressWarnings("deprecation")
    SequenceFile.Writer writer = new SequenceFile.Writer(fs, hadoopConf, outFile, Text.class,
            NullWritable.class);

    logger.info("Sequential sampling options, max splits to visit: " + maxSplitsToVisit + ", samples to take: "
            + sampleSize + ", total number of splits: " + splits.size());
    int blocks = Math.min(maxSplitsToVisit, splits.size());
    blocks = Math.min((int) sampleSize, blocks);
    long recordsPerSample = sampleSize / blocks;
    int sampleStep = splits.size() / blocks;

    long records = 0;

    CounterInterface counterInterface = new CounterInterface(null) {

        public Counter getCounter(String group, String name) {
            return Mockito.mock(Counter.class);
        }

        ;
    };

    // Take N samples from different parts of the input
    for (int i = 0; i < blocks; ++i) {
        TaskAttemptID attemptId = new TaskAttemptID(new TaskID(), 1);

        TaskAttemptContext attemptContext = null;
        try {
            attemptContext = TaskAttemptContextFactory.get(hadoopConf, attemptId);
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
        InputSplit split = splits.get(sampleStep * i);
        if (specificHadoopConf.get(split) != null) {
            for (Map.Entry<String, String> specificConf : specificHadoopConf.get(split).entrySet()) {
                attemptContext.getConfiguration().set(specificConf.getKey(), specificConf.getValue());
            }
        }
        logger.info("Sampling split: " + split);
        RecordReader<ITuple, NullWritable> reader = null;
        try {
            reader = splitToFormat.get(split).createRecordReader(split, attemptContext);
            reader.initialize(split, attemptContext);

            RecordProcessor processor = recordProcessorPerSplit.get(split);
            Text key = new Text();
            while (reader.nextKeyValue()) {
                //
                ITuple tuple = reader.getCurrentKey();

                ITuple uTuple;
                try {
                    uTuple = processor.process(tuple, tuple.getSchema().getName(), counterInterface);
                } catch (Throwable e) {
                    throw new RuntimeException(e);
                }
                if (uTuple != null) { // user may have filtered the record
                    try {
                        key.set(TablespaceGenerator.getPartitionByKey(uTuple, splitToTableSpec.get(split),
                                splitToJsEngine.get(split)));
                    } catch (Throwable e) {
                        throw new RuntimeException("Error when determining partition key.", e);
                    }

                    writer.append(key, NullWritable.get());
                    records += 1;
                    if ((i + 1) * recordsPerSample <= records) {
                        break;
                    }
                }
            }
        } catch (InterruptedException e) {
            throw new RuntimeException(e);
        }

    }

    writer.close();
    return records;
}

From source file:com.splunk.hunk.input.packet.DnsPcapRecordReader.java

License:Apache License

@Override
public void vixInitialize(VixInputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {
    //gLogger.info("takashi:TgzPacketRecordReader:vixInitialize is called");

    FileSystem fs = FileSystem.get(context.getConfiguration());
    fsDataIn = fs.open(split.getPath());
    pcapReader = initPcapReader(packet_type_class, new DataInputStream(fsDataIn));

    packetIterator = pcapReader.iterator();
    totalBytesToRead = split.getLength() - split.getStart();
    objectPreProcessor = new DnsPacketEventProcessor();
}

From source file:com.splunk.hunk.input.packet.HttpPcapRecordReader.java

License:Apache License

@Override
public void vixInitialize(VixInputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {
    //gLogger.info("takashi:TgzPacketRecordReader:vixInitialize is called");

    FileSystem fs = FileSystem.get(context.getConfiguration());
    fsDataIn = fs.open(split.getPath());
    pcapReader = initPcapReader(packet_type_class, new DataInputStream(fsDataIn));

    packetIterator = pcapReader.iterator();
    totalBytesToRead = split.getLength() - split.getStart();
    objectPreProcessor = new HttpPacketEventProcessor();
}