Example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:com.splicemachine.mrio.api.core.SMOutputFormat.java

License:Apache License

@Override
public RecordWriter<RowLocation, ExecRow> getRecordWriter(TaskAttemptContext context)
        throws IOException, InterruptedException {
    //        TableContext tableContext = TableContext.getTableContextFromBase64String(context.getConfiguration().get(MRConstants.SPLICE_TBLE_CONTEXT));
    return new SMRecordWriterImpl(context.getConfiguration());
}

From source file:com.splicemachine.mrio.api.core.SMRecordReaderImpl.java

License:Apache License

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
    if (LOG.isDebugEnabled())
        SpliceLogUtils.debug(LOG, "initialize with split=%s", split);
    init(config == null ? context.getConfiguration() : config, split);
}

From source file:com.splicemachine.orc.input.OrcMapreduceRecordReader.java

License:Open Source License

@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext)
        throws IOException, InterruptedException {
    OrcNewSplit orcNewSplit = (OrcNewSplit) inputSplit;
    Configuration configuration = taskAttemptContext.getConfiguration();
    double maxMergeDistance = configuration.getDouble(MAX_MERGE_DISTANCE, MAX_MERGE_DISTANCE_DEFAULT);
    double maxReadSize = configuration.getDouble(MAX_READ_SIZE, MAX_READ_SIZE_DEFAULT);
    double streamBufferSize = configuration.getDouble(STREAM_BUFFER_SIZE, STREAM_BUFFER_SIZE_DEFAULT);
    Path path = orcNewSplit.getPath();
    FileSystem fileSystem = FileSystem.get(path.toUri(), configuration);
    long size = fileSystem.getFileStatus(path).getLen();
    FSDataInputStream inputStream = fileSystem.open(path);
    rowStruct = getRowStruct(configuration);
    predicate = getSplicePredicate(configuration);
    List<Integer> partitions = getPartitionIds(configuration);
    List<Integer> columnIds = getColumnIds(configuration);

    List<String> values = null;
    try {// ww  w  . j  a  v a 2 s .  com
        values = Warehouse.getPartValuesFromPartName(((OrcNewSplit) inputSplit).getPath().toString());
    } catch (MetaException me) {
        throw new IOException(me);
    }
    OrcDataSource orcDataSource = new HdfsOrcDataSource(path.toString(), size,
            new DataSize(maxMergeDistance, DataSize.Unit.MEGABYTE),
            new DataSize(maxReadSize, DataSize.Unit.MEGABYTE),
            new DataSize(streamBufferSize, DataSize.Unit.MEGABYTE), inputStream);
    OrcReader orcReader = new OrcReader(orcDataSource, new OrcMetadataReader(),
            new DataSize(maxMergeDistance, DataSize.Unit.MEGABYTE),
            new DataSize(maxReadSize, DataSize.Unit.MEGABYTE));
    orcRecordReader = orcReader.createRecordReader(getColumnsAndTypes(columnIds, rowStruct), predicate,
            HIVE_STORAGE_TIME_ZONE, new AggregatedMemoryContext(), partitions, values);
}

From source file:com.splicemachine.stream.index.HTableOutputFormat.java

License:Apache License

@Override
public RecordWriter<byte[], Either<Exception, KVPair>> getRecordWriter(TaskAttemptContext taskAttemptContext)
        throws IOException, InterruptedException {
    try {/*from  w ww  .  j a  v  a 2s. c  o  m*/
        DataSetWriterBuilder tableWriter = TableWriterUtils
                .deserializeTableWriter(taskAttemptContext.getConfiguration());
        TxnView childTxn = outputCommitter.getChildTransaction(taskAttemptContext.getTaskAttemptID());
        if (childTxn == null)
            throw new IOException("child transaction lookup failed");
        tableWriter.txn(childTxn);
        return new HTableRecordWriter(tableWriter.buildTableWriter(), outputCommitter);
    } catch (Exception e) {
        throw new IOException(e);
    }
}

From source file:com.splicemachine.stream.index.HTableOutputFormat.java

License:Apache License

@Override
public OutputCommitter getOutputCommitter(TaskAttemptContext taskAttemptContext)
        throws IOException, InterruptedException {

    if (LOG.isDebugEnabled())
        SpliceLogUtils.debug(LOG, "getOutputCommitter for taskAttemptContext=%s", taskAttemptContext);
    try {//from   w  w w  . j av  a  2 s. com
        if (outputCommitter == null) {
            DataSetWriterBuilder tableWriter = TableWriterUtils
                    .deserializeTableWriter(taskAttemptContext.getConfiguration());
            outputCommitter = new SpliceOutputCommitter(tableWriter.getTxn(),
                    tableWriter.getDestinationTable());
        }
        return outputCommitter;
    } catch (StandardException e) {
        throw new IOException(e);
    }
}

From source file:com.splicemachine.stream.output.SMOutputFormat.java

License:Apache License

@Override
public RecordWriter<RowLocation, Either<Exception, ExecRow>> getRecordWriter(
        TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
    try {// w w w . j  a  v  a2  s  .  c om
        DataSetWriterBuilder dsWriter = TableWriterUtils
                .deserializeTableWriter(taskAttemptContext.getConfiguration());
        TxnView childTxn = outputCommitter.getChildTransaction(taskAttemptContext.getTaskAttemptID());
        if (childTxn == null)
            throw new IOException("child transaction lookup failed");
        dsWriter.txn(outputCommitter.getChildTransaction(taskAttemptContext.getTaskAttemptID()));
        return new SMRecordWriter(dsWriter.buildTableWriter(), outputCommitter);
    } catch (Exception e) {
        throw new IOException(e);
    }
}

From source file:com.splout.db.hadoop.engine.SploutSQLProxyOutputFormat.java

License:Apache License

@Override
public RecordWriter<ITuple, NullWritable> getRecordWriter(TaskAttemptContext context)
        throws IOException, InterruptedException {

    long waitTimeHeartBeater = context.getConfiguration().getLong(HeartBeater.WAIT_TIME_CONF, 5000);
    heartBeater = new HeartBeater(context, waitTimeHeartBeater);
    heartBeater.needHeartBeat();/*from  ww  w  .j a v  a2 s  .  c  om*/
    conf = context.getConfiguration();
    this.context = context;

    outputFormat.setConf(context.getConfiguration());

    return new RecordWriter<ITuple, NullWritable>() {

        // Temporary and permanent Paths for properly writing Hadoop output files
        private Map<Integer, Path> permPool = new HashMap<Integer, Path>();
        private Map<Integer, Path> tempPool = new HashMap<Integer, Path>();

        private void initSql(int partition) throws IOException, InterruptedException {
            // HDFS final location of the generated partition file. It will be
            // loaded to the temporary folder in the HDFS than finally will be
            // committed by the OutputCommitter to the proper location.
            FileOutputCommitter committer = (FileOutputCommitter) getOutputCommitter(
                    SploutSQLProxyOutputFormat.this.context);
            Path perm = new Path(committer.getWorkPath(), partition + ".db");
            FileSystem fs = perm.getFileSystem(conf);

            // Make a task unique name that contains the actual index output name to
            // make debugging simpler
            // Note: if using JVM reuse, the sequence number will not be reset for a
            // new task using the jvm
            Path temp = conf.getLocalPath("mapred.local.dir",
                    "splout_task_" + SploutSQLProxyOutputFormat.this.context.getTaskAttemptID() + '.'
                            + FILE_SEQUENCE.incrementAndGet());

            FileSystem localFileSystem = FileSystem.getLocal(conf);
            if (localFileSystem.exists(temp)) {
                localFileSystem.delete(temp, true);
            }
            localFileSystem.mkdirs(temp);

            Path local = fs.startLocalOutput(perm, new Path(temp, partition + ".db"));

            //
            permPool.put(partition, perm);
            tempPool.put(partition, new Path(temp, partition + ".db"));

            outputFormat.initPartition(partition, local);
        }

        @Override
        public void close(TaskAttemptContext ctx) throws IOException, InterruptedException {
            FileSystem fs = FileSystem.get(ctx.getConfiguration());
            try {
                if (ctx != null) {
                    heartBeater.setProgress(ctx);
                }
                outputFormat.close();
                for (Map.Entry<Integer, Path> entry : permPool.entrySet()) {
                    // Hadoop - completeLocalOutput()
                    fs.completeLocalOutput(entry.getValue(), tempPool.get(entry.getKey()));
                }
            } finally { // in any case, destroy the HeartBeater
                heartBeater.cancelHeartBeat();
            }
        }

        @Override
        public void write(ITuple tuple, NullWritable ignore) throws IOException, InterruptedException {
            int partition = (Integer) tuple.get(SploutSQLOutputFormat.PARTITION_TUPLE_FIELD);
            if (tempPool.get(partition) == null) {
                initSql(partition);
            }
            outputFormat.write(tuple);
        }

    };
}

From source file:com.splout.db.hadoop.TupleSampler.java

License:Apache License

/**
 * Random sampling method a-la-TeraSort, getting some consecutive samples from each InputSplit
 * without using a Job.//  w ww .j a va2 s  . c o  m
 * The output is SequenceFile with keys.
 *
 * @return The number of retrieved samples
 */
private long randomSampling(long sampleSize, Configuration hadoopConf, Path outFile, List<InputSplit> splits,
        Map<InputSplit, TableSpec> splitToTableSpec,
        Map<InputSplit, InputFormat<ITuple, NullWritable>> splitToFormat,
        Map<InputSplit, Map<String, String>> specificHadoopConf,
        Map<InputSplit, RecordProcessor> recordProcessorPerSplit,
        Map<InputSplit, JavascriptEngine> splitToJsEngine, int maxSplitsToVisit) throws IOException {

    // Instantiate the writer we will write samples to
    FileSystem fs = FileSystem.get(outFile.toUri(), hadoopConf);

    if (splits.size() == 0) {
        throw new IllegalArgumentException("There are no splits to sample from!");
    }

    @SuppressWarnings("deprecation")
    SequenceFile.Writer writer = new SequenceFile.Writer(fs, hadoopConf, outFile, Text.class,
            NullWritable.class);

    logger.info("Sequential sampling options, max splits to visit: " + maxSplitsToVisit + ", samples to take: "
            + sampleSize + ", total number of splits: " + splits.size());
    int blocks = Math.min(maxSplitsToVisit, splits.size());
    blocks = Math.min((int) sampleSize, blocks);
    long recordsPerSample = sampleSize / blocks;
    int sampleStep = splits.size() / blocks;

    long records = 0;

    CounterInterface counterInterface = new CounterInterface(null) {

        public Counter getCounter(String group, String name) {
            return Mockito.mock(Counter.class);
        }

        ;
    };

    // Take N samples from different parts of the input
    for (int i = 0; i < blocks; ++i) {
        TaskAttemptID attemptId = new TaskAttemptID(new TaskID(), 1);

        TaskAttemptContext attemptContext = null;
        try {
            attemptContext = TaskAttemptContextFactory.get(hadoopConf, attemptId);
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
        InputSplit split = splits.get(sampleStep * i);
        if (specificHadoopConf.get(split) != null) {
            for (Map.Entry<String, String> specificConf : specificHadoopConf.get(split).entrySet()) {
                attemptContext.getConfiguration().set(specificConf.getKey(), specificConf.getValue());
            }
        }
        logger.info("Sampling split: " + split);
        RecordReader<ITuple, NullWritable> reader = null;
        try {
            reader = splitToFormat.get(split).createRecordReader(split, attemptContext);
            reader.initialize(split, attemptContext);

            RecordProcessor processor = recordProcessorPerSplit.get(split);
            Text key = new Text();
            while (reader.nextKeyValue()) {
                //
                ITuple tuple = reader.getCurrentKey();

                ITuple uTuple;
                try {
                    uTuple = processor.process(tuple, tuple.getSchema().getName(), counterInterface);
                } catch (Throwable e) {
                    throw new RuntimeException(e);
                }
                if (uTuple != null) { // user may have filtered the record
                    try {
                        key.set(TablespaceGenerator.getPartitionByKey(uTuple, splitToTableSpec.get(split),
                                splitToJsEngine.get(split)));
                    } catch (Throwable e) {
                        throw new RuntimeException("Error when determining partition key.", e);
                    }

                    writer.append(key, NullWritable.get());
                    records += 1;
                    if ((i + 1) * recordsPerSample <= records) {
                        break;
                    }
                }
            }
        } catch (InterruptedException e) {
            throw new RuntimeException(e);
        }

    }

    writer.close();
    return records;
}

From source file:com.splunk.hunk.input.packet.DnsPcapRecordReader.java

License:Apache License

@Override
public void vixInitialize(VixInputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {
    //gLogger.info("takashi:TgzPacketRecordReader:vixInitialize is called");

    FileSystem fs = FileSystem.get(context.getConfiguration());
    fsDataIn = fs.open(split.getPath());
    pcapReader = initPcapReader(packet_type_class, new DataInputStream(fsDataIn));

    packetIterator = pcapReader.iterator();
    totalBytesToRead = split.getLength() - split.getStart();
    objectPreProcessor = new DnsPacketEventProcessor();
}

From source file:com.splunk.hunk.input.packet.HttpPcapRecordReader.java

License:Apache License

@Override
public void vixInitialize(VixInputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {
    //gLogger.info("takashi:TgzPacketRecordReader:vixInitialize is called");

    FileSystem fs = FileSystem.get(context.getConfiguration());
    fsDataIn = fs.open(split.getPath());
    pcapReader = initPcapReader(packet_type_class, new DataInputStream(fsDataIn));

    packetIterator = pcapReader.iterator();
    totalBytesToRead = split.getLength() - split.getStart();
    objectPreProcessor = new HttpPacketEventProcessor();
}