List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration
public Configuration getConfiguration();
From source file:com.splicemachine.mrio.api.core.SMOutputFormat.java
License:Apache License
@Override public RecordWriter<RowLocation, ExecRow> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { // TableContext tableContext = TableContext.getTableContextFromBase64String(context.getConfiguration().get(MRConstants.SPLICE_TBLE_CONTEXT)); return new SMRecordWriterImpl(context.getConfiguration()); }
From source file:com.splicemachine.mrio.api.core.SMRecordReaderImpl.java
License:Apache License
@Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { if (LOG.isDebugEnabled()) SpliceLogUtils.debug(LOG, "initialize with split=%s", split); init(config == null ? context.getConfiguration() : config, split); }
From source file:com.splicemachine.orc.input.OrcMapreduceRecordReader.java
License:Open Source License
@Override public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { OrcNewSplit orcNewSplit = (OrcNewSplit) inputSplit; Configuration configuration = taskAttemptContext.getConfiguration(); double maxMergeDistance = configuration.getDouble(MAX_MERGE_DISTANCE, MAX_MERGE_DISTANCE_DEFAULT); double maxReadSize = configuration.getDouble(MAX_READ_SIZE, MAX_READ_SIZE_DEFAULT); double streamBufferSize = configuration.getDouble(STREAM_BUFFER_SIZE, STREAM_BUFFER_SIZE_DEFAULT); Path path = orcNewSplit.getPath(); FileSystem fileSystem = FileSystem.get(path.toUri(), configuration); long size = fileSystem.getFileStatus(path).getLen(); FSDataInputStream inputStream = fileSystem.open(path); rowStruct = getRowStruct(configuration); predicate = getSplicePredicate(configuration); List<Integer> partitions = getPartitionIds(configuration); List<Integer> columnIds = getColumnIds(configuration); List<String> values = null; try {// ww w . j a v a 2 s . com values = Warehouse.getPartValuesFromPartName(((OrcNewSplit) inputSplit).getPath().toString()); } catch (MetaException me) { throw new IOException(me); } OrcDataSource orcDataSource = new HdfsOrcDataSource(path.toString(), size, new DataSize(maxMergeDistance, DataSize.Unit.MEGABYTE), new DataSize(maxReadSize, DataSize.Unit.MEGABYTE), new DataSize(streamBufferSize, DataSize.Unit.MEGABYTE), inputStream); OrcReader orcReader = new OrcReader(orcDataSource, new OrcMetadataReader(), new DataSize(maxMergeDistance, DataSize.Unit.MEGABYTE), new DataSize(maxReadSize, DataSize.Unit.MEGABYTE)); orcRecordReader = orcReader.createRecordReader(getColumnsAndTypes(columnIds, rowStruct), predicate, HIVE_STORAGE_TIME_ZONE, new AggregatedMemoryContext(), partitions, values); }
From source file:com.splicemachine.stream.index.HTableOutputFormat.java
License:Apache License
@Override public RecordWriter<byte[], Either<Exception, KVPair>> getRecordWriter(TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { try {/*from w ww . j a v a 2s. c o m*/ DataSetWriterBuilder tableWriter = TableWriterUtils .deserializeTableWriter(taskAttemptContext.getConfiguration()); TxnView childTxn = outputCommitter.getChildTransaction(taskAttemptContext.getTaskAttemptID()); if (childTxn == null) throw new IOException("child transaction lookup failed"); tableWriter.txn(childTxn); return new HTableRecordWriter(tableWriter.buildTableWriter(), outputCommitter); } catch (Exception e) { throw new IOException(e); } }
From source file:com.splicemachine.stream.index.HTableOutputFormat.java
License:Apache License
@Override public OutputCommitter getOutputCommitter(TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { if (LOG.isDebugEnabled()) SpliceLogUtils.debug(LOG, "getOutputCommitter for taskAttemptContext=%s", taskAttemptContext); try {//from w w w . j av a 2 s. com if (outputCommitter == null) { DataSetWriterBuilder tableWriter = TableWriterUtils .deserializeTableWriter(taskAttemptContext.getConfiguration()); outputCommitter = new SpliceOutputCommitter(tableWriter.getTxn(), tableWriter.getDestinationTable()); } return outputCommitter; } catch (StandardException e) { throw new IOException(e); } }
From source file:com.splicemachine.stream.output.SMOutputFormat.java
License:Apache License
@Override public RecordWriter<RowLocation, Either<Exception, ExecRow>> getRecordWriter( TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { try {// w w w . j a v a2 s . c om DataSetWriterBuilder dsWriter = TableWriterUtils .deserializeTableWriter(taskAttemptContext.getConfiguration()); TxnView childTxn = outputCommitter.getChildTransaction(taskAttemptContext.getTaskAttemptID()); if (childTxn == null) throw new IOException("child transaction lookup failed"); dsWriter.txn(outputCommitter.getChildTransaction(taskAttemptContext.getTaskAttemptID())); return new SMRecordWriter(dsWriter.buildTableWriter(), outputCommitter); } catch (Exception e) { throw new IOException(e); } }
From source file:com.splout.db.hadoop.engine.SploutSQLProxyOutputFormat.java
License:Apache License
@Override public RecordWriter<ITuple, NullWritable> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { long waitTimeHeartBeater = context.getConfiguration().getLong(HeartBeater.WAIT_TIME_CONF, 5000); heartBeater = new HeartBeater(context, waitTimeHeartBeater); heartBeater.needHeartBeat();/*from ww w .j a v a2 s . c om*/ conf = context.getConfiguration(); this.context = context; outputFormat.setConf(context.getConfiguration()); return new RecordWriter<ITuple, NullWritable>() { // Temporary and permanent Paths for properly writing Hadoop output files private Map<Integer, Path> permPool = new HashMap<Integer, Path>(); private Map<Integer, Path> tempPool = new HashMap<Integer, Path>(); private void initSql(int partition) throws IOException, InterruptedException { // HDFS final location of the generated partition file. It will be // loaded to the temporary folder in the HDFS than finally will be // committed by the OutputCommitter to the proper location. FileOutputCommitter committer = (FileOutputCommitter) getOutputCommitter( SploutSQLProxyOutputFormat.this.context); Path perm = new Path(committer.getWorkPath(), partition + ".db"); FileSystem fs = perm.getFileSystem(conf); // Make a task unique name that contains the actual index output name to // make debugging simpler // Note: if using JVM reuse, the sequence number will not be reset for a // new task using the jvm Path temp = conf.getLocalPath("mapred.local.dir", "splout_task_" + SploutSQLProxyOutputFormat.this.context.getTaskAttemptID() + '.' + FILE_SEQUENCE.incrementAndGet()); FileSystem localFileSystem = FileSystem.getLocal(conf); if (localFileSystem.exists(temp)) { localFileSystem.delete(temp, true); } localFileSystem.mkdirs(temp); Path local = fs.startLocalOutput(perm, new Path(temp, partition + ".db")); // permPool.put(partition, perm); tempPool.put(partition, new Path(temp, partition + ".db")); outputFormat.initPartition(partition, local); } @Override public void close(TaskAttemptContext ctx) throws IOException, InterruptedException { FileSystem fs = FileSystem.get(ctx.getConfiguration()); try { if (ctx != null) { heartBeater.setProgress(ctx); } outputFormat.close(); for (Map.Entry<Integer, Path> entry : permPool.entrySet()) { // Hadoop - completeLocalOutput() fs.completeLocalOutput(entry.getValue(), tempPool.get(entry.getKey())); } } finally { // in any case, destroy the HeartBeater heartBeater.cancelHeartBeat(); } } @Override public void write(ITuple tuple, NullWritable ignore) throws IOException, InterruptedException { int partition = (Integer) tuple.get(SploutSQLOutputFormat.PARTITION_TUPLE_FIELD); if (tempPool.get(partition) == null) { initSql(partition); } outputFormat.write(tuple); } }; }
From source file:com.splout.db.hadoop.TupleSampler.java
License:Apache License
/** * Random sampling method a-la-TeraSort, getting some consecutive samples from each InputSplit * without using a Job.// w ww .j a va2 s . c o m * The output is SequenceFile with keys. * * @return The number of retrieved samples */ private long randomSampling(long sampleSize, Configuration hadoopConf, Path outFile, List<InputSplit> splits, Map<InputSplit, TableSpec> splitToTableSpec, Map<InputSplit, InputFormat<ITuple, NullWritable>> splitToFormat, Map<InputSplit, Map<String, String>> specificHadoopConf, Map<InputSplit, RecordProcessor> recordProcessorPerSplit, Map<InputSplit, JavascriptEngine> splitToJsEngine, int maxSplitsToVisit) throws IOException { // Instantiate the writer we will write samples to FileSystem fs = FileSystem.get(outFile.toUri(), hadoopConf); if (splits.size() == 0) { throw new IllegalArgumentException("There are no splits to sample from!"); } @SuppressWarnings("deprecation") SequenceFile.Writer writer = new SequenceFile.Writer(fs, hadoopConf, outFile, Text.class, NullWritable.class); logger.info("Sequential sampling options, max splits to visit: " + maxSplitsToVisit + ", samples to take: " + sampleSize + ", total number of splits: " + splits.size()); int blocks = Math.min(maxSplitsToVisit, splits.size()); blocks = Math.min((int) sampleSize, blocks); long recordsPerSample = sampleSize / blocks; int sampleStep = splits.size() / blocks; long records = 0; CounterInterface counterInterface = new CounterInterface(null) { public Counter getCounter(String group, String name) { return Mockito.mock(Counter.class); } ; }; // Take N samples from different parts of the input for (int i = 0; i < blocks; ++i) { TaskAttemptID attemptId = new TaskAttemptID(new TaskID(), 1); TaskAttemptContext attemptContext = null; try { attemptContext = TaskAttemptContextFactory.get(hadoopConf, attemptId); } catch (Exception e) { throw new RuntimeException(e); } InputSplit split = splits.get(sampleStep * i); if (specificHadoopConf.get(split) != null) { for (Map.Entry<String, String> specificConf : specificHadoopConf.get(split).entrySet()) { attemptContext.getConfiguration().set(specificConf.getKey(), specificConf.getValue()); } } logger.info("Sampling split: " + split); RecordReader<ITuple, NullWritable> reader = null; try { reader = splitToFormat.get(split).createRecordReader(split, attemptContext); reader.initialize(split, attemptContext); RecordProcessor processor = recordProcessorPerSplit.get(split); Text key = new Text(); while (reader.nextKeyValue()) { // ITuple tuple = reader.getCurrentKey(); ITuple uTuple; try { uTuple = processor.process(tuple, tuple.getSchema().getName(), counterInterface); } catch (Throwable e) { throw new RuntimeException(e); } if (uTuple != null) { // user may have filtered the record try { key.set(TablespaceGenerator.getPartitionByKey(uTuple, splitToTableSpec.get(split), splitToJsEngine.get(split))); } catch (Throwable e) { throw new RuntimeException("Error when determining partition key.", e); } writer.append(key, NullWritable.get()); records += 1; if ((i + 1) * recordsPerSample <= records) { break; } } } } catch (InterruptedException e) { throw new RuntimeException(e); } } writer.close(); return records; }
From source file:com.splunk.hunk.input.packet.DnsPcapRecordReader.java
License:Apache License
@Override public void vixInitialize(VixInputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { //gLogger.info("takashi:TgzPacketRecordReader:vixInitialize is called"); FileSystem fs = FileSystem.get(context.getConfiguration()); fsDataIn = fs.open(split.getPath()); pcapReader = initPcapReader(packet_type_class, new DataInputStream(fsDataIn)); packetIterator = pcapReader.iterator(); totalBytesToRead = split.getLength() - split.getStart(); objectPreProcessor = new DnsPacketEventProcessor(); }
From source file:com.splunk.hunk.input.packet.HttpPcapRecordReader.java
License:Apache License
@Override public void vixInitialize(VixInputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { //gLogger.info("takashi:TgzPacketRecordReader:vixInitialize is called"); FileSystem fs = FileSystem.get(context.getConfiguration()); fsDataIn = fs.open(split.getPath()); pcapReader = initPcapReader(packet_type_class, new DataInputStream(fsDataIn)); packetIterator = pcapReader.iterator(); totalBytesToRead = split.getLength() - split.getStart(); objectPreProcessor = new HttpPacketEventProcessor(); }