List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getTaskAttemptID
public TaskAttemptID getTaskAttemptID();
From source file:org.apache.mahout.classifier.df.mapreduce.partial.PartialSequentialBuilder.java
License:Apache License
@Override protected boolean runJob(Job job) throws IOException, InterruptedException { Configuration conf = job.getConfiguration(); // retrieve the splits TextInputFormat input = new TextInputFormat(); List<InputSplit> splits = input.getSplits(job); int nbSplits = splits.size(); log.debug("Nb splits : {}", nbSplits); InputSplit[] sorted = new InputSplit[nbSplits]; splits.toArray(sorted);// w w w . j a v a 2 s .c om Builder.sortSplits(sorted); int numTrees = Builder.getNbTrees(conf); // total number of trees TaskAttemptContext task = new TaskAttemptContext(conf, new TaskAttemptID()); firstOutput = new MockContext(new Step1Mapper(), conf, task.getTaskAttemptID(), numTrees); /* first instance id in hadoop's order */ //int[] firstIds = new int[nbSplits]; /* partitions' sizes in hadoop order */ int[] sizes = new int[nbSplits]; // to compute firstIds, process the splits in file order long slowest = 0; // duration of slowest map int firstId = 0; for (InputSplit split : splits) { int hp = ArrayUtils.indexOf(sorted, split); // hadoop's partition RecordReader<LongWritable, Text> reader = input.createRecordReader(split, task); reader.initialize(split, task); Step1Mapper mapper = new MockStep1Mapper(getTreeBuilder(), dataset, getSeed(), hp, nbSplits, numTrees); long time = System.currentTimeMillis(); //firstIds[hp] = firstId; while (reader.nextKeyValue()) { mapper.map(reader.getCurrentKey(), reader.getCurrentValue(), firstOutput); firstId++; sizes[hp]++; } mapper.cleanup(firstOutput); time = System.currentTimeMillis() - time; log.info("Duration : {}", DFUtils.elapsedTime(time)); if (time > slowest) { slowest = time; } } log.info("Longest duration : {}", DFUtils.elapsedTime(slowest)); return true; }
From source file:org.apache.mahout.df.mapreduce.partial.PartialSequentialBuilder.java
License:Apache License
@Override protected boolean runJob(Job job) throws IOException, InterruptedException { Configuration conf = job.getConfiguration(); // retrieve the splits TextInputFormat input = new TextInputFormat(); List<InputSplit> splits = input.getSplits(job); int nbSplits = splits.size(); log.debug("Nb splits : {}", nbSplits); InputSplit[] sorted = new InputSplit[nbSplits]; splits.toArray(sorted);/*from w w w . ja v a2 s.co m*/ Builder.sortSplits(sorted); int numTrees = Builder.getNbTrees(conf); // total number of trees TaskAttemptContext task = new TaskAttemptContext(conf, new TaskAttemptID()); firstOutput = new MockContext(new Step1Mapper(), conf, task.getTaskAttemptID(), numTrees); firstIds = new int[nbSplits]; sizes = new int[nbSplits]; // to compute firstIds, process the splits in file order long slowest = 0; // duration of slowest map int firstId = 0; for (int p = 0; p < nbSplits; p++) { InputSplit split = splits.get(p); int hp = ArrayUtils.indexOf(sorted, split); // hadoop's partition RecordReader<LongWritable, Text> reader = input.createRecordReader(split, task); reader.initialize(split, task); Step1Mapper mapper = new MockStep1Mapper(getTreeBuilder(), dataset, getSeed(), hp, nbSplits, numTrees); long time = System.currentTimeMillis(); firstIds[hp] = firstId; while (reader.nextKeyValue()) { mapper.map(reader.getCurrentKey(), reader.getCurrentValue(), firstOutput); firstId++; sizes[hp]++; } mapper.cleanup(firstOutput); time = System.currentTimeMillis() - time; log.info("Duration : {}", DFUtils.elapsedTime(time)); if (time > slowest) { slowest = time; } } log.info("Longest duration : {}", DFUtils.elapsedTime(slowest)); return true; }
From source file:org.apache.mahout.df.mapreduce.partial.PartialSequentialBuilder.java
License:Apache License
/** * The second step uses the trees to predict the rest of the instances outside * their own partition//w ww . j a va 2s. c o m */ protected void secondStep(Configuration conf, Path forestPath, PredictionCallback callback) throws IOException, InterruptedException { JobContext jobContext = new JobContext(conf, new JobID()); // retrieve the splits TextInputFormat input = new TextInputFormat(); List<InputSplit> splits = input.getSplits(jobContext); int nbSplits = splits.size(); log.debug("Nb splits : {}", nbSplits); InputSplit[] sorted = new InputSplit[nbSplits]; splits.toArray(sorted); Builder.sortSplits(sorted); int numTrees = Builder.getNbTrees(conf); // total number of trees // compute the expected number of outputs int total = 0; for (int p = 0; p < nbSplits; p++) { total += Step2Mapper.nbConcerned(nbSplits, numTrees, p); } TaskAttemptContext task = new TaskAttemptContext(conf, new TaskAttemptID()); secondOutput = new MockContext(new Step2Mapper(), conf, task.getTaskAttemptID(), numTrees); long slowest = 0; // duration of slowest map for (int partition = 0; partition < nbSplits; partition++) { InputSplit split = sorted[partition]; RecordReader<LongWritable, Text> reader = input.createRecordReader(split, task); // load the output of the 1st step int nbConcerned = Step2Mapper.nbConcerned(nbSplits, numTrees, partition); TreeID[] fsKeys = new TreeID[nbConcerned]; Node[] fsTrees = new Node[nbConcerned]; FileSystem fs = forestPath.getFileSystem(conf); int numInstances = InterResults.load(fs, forestPath, nbSplits, numTrees, partition, fsKeys, fsTrees); Step2Mapper mapper = new Step2Mapper(); mapper.configure(partition, dataset, fsKeys, fsTrees, numInstances); long time = System.currentTimeMillis(); while (reader.nextKeyValue()) { mapper.map(reader.getCurrentKey(), reader.getCurrentValue(), secondOutput); } mapper.cleanup(secondOutput); time = System.currentTimeMillis() - time; log.info("Duration : {}", DFUtils.elapsedTime(time)); if (time > slowest) { slowest = time; } } log.info("Longest duration : {}", DFUtils.elapsedTime(slowest)); }
From source file:org.apache.phoenix.mapreduce.MultiHfileOutputFormat.java
License:Apache License
/** * //www .j a va 2 s . c om * @param context * @return * @throws IOException */ static <V extends Cell> RecordWriter<TableRowkeyPair, V> createRecordWriter(final TaskAttemptContext context) throws IOException { // Get the path of the temporary output file final Path outputPath = FileOutputFormat.getOutputPath(context); final Path outputdir = new FileOutputCommitter(outputPath, context).getWorkPath(); final Configuration conf = context.getConfiguration(); final FileSystem fs = outputdir.getFileSystem(conf); final long maxsize = conf.getLong(HConstants.HREGION_MAX_FILESIZE, HConstants.DEFAULT_MAX_FILE_SIZE); // Invented config. Add to hbase-*.xml if other than default compression. final String defaultCompressionStr = conf.get("hfile.compression", Compression.Algorithm.NONE.getName()); final Algorithm defaultCompression = AbstractHFileWriter.compressionByName(defaultCompressionStr); final boolean compactionExclude = conf.getBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude", false); return new RecordWriter<TableRowkeyPair, V>() { // Map of families to writers and how much has been output on the writer. private final Map<byte[], WriterLength> writers = new TreeMap<byte[], WriterLength>( Bytes.BYTES_COMPARATOR); private byte[] previousRow = HConstants.EMPTY_BYTE_ARRAY; private final byte[] now = Bytes.toBytes(EnvironmentEdgeManager.currentTimeMillis()); private boolean rollRequested = false; @Override public void write(TableRowkeyPair row, V cell) throws IOException { KeyValue kv = KeyValueUtil.ensureKeyValue(cell); // null input == user explicitly wants to flush if (row == null && kv == null) { rollWriters(); return; } // phoenix-2216: start : extract table name from the rowkey String tableName = row.getTableName(); byte[] rowKey = row.getRowkey().get(); long length = kv.getLength(); byte[] family = CellUtil.cloneFamily(kv); byte[] tableAndFamily = join(tableName, Bytes.toString(family)); WriterLength wl = this.writers.get(tableAndFamily); // phoenix-2216: end // If this is a new column family, verify that the directory exists if (wl == null) { // phoenix-2216: start : create a directory for table and family within the output dir Path tableOutputPath = CsvBulkImportUtil.getOutputPath(outputdir, tableName); fs.mkdirs(new Path(tableOutputPath, Bytes.toString(family))); // phoenix-2216: end } // If any of the HFiles for the column families has reached // maxsize, we need to roll all the writers if (wl != null && wl.written + length >= maxsize) { this.rollRequested = true; } // This can only happen once a row is finished though if (rollRequested && Bytes.compareTo(this.previousRow, rowKey) != 0) { rollWriters(); } // create a new WAL writer, if necessary if (wl == null || wl.writer == null) { // phoenix-2216: start : passed even the table name wl = getNewWriter(tableName, family, conf); // phoenix-2216: end } // we now have the proper WAL writer. full steam ahead kv.updateLatestStamp(this.now); wl.writer.append(kv); wl.written += length; // Copy the row so we know when a row transition. this.previousRow = rowKey; } private void rollWriters() throws IOException { for (WriterLength wl : this.writers.values()) { if (wl.writer != null) { LOG.info("Writer=" + wl.writer.getPath() + ((wl.written == 0) ? "" : ", wrote=" + wl.written)); close(wl.writer); } wl.writer = null; wl.written = 0; } this.rollRequested = false; } /* Create a new StoreFile.Writer. * @param family * @return A WriterLength, containing a new StoreFile.Writer. * @throws IOException */ @edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "BX_UNBOXING_IMMEDIATELY_REBOXED", justification = "Not important") private WriterLength getNewWriter(final String tableName, byte[] family, Configuration conf) throws IOException { WriterLength wl = new WriterLength(); Path tableOutputPath = CsvBulkImportUtil.getOutputPath(outputdir, tableName); Path familydir = new Path(tableOutputPath, Bytes.toString(family)); // phoenix-2216: start : fetching the configuration properties that were set to the table. // create a map from column family to the compression algorithm for the table. final Map<byte[], Algorithm> compressionMap = createFamilyCompressionMap(conf, tableName); final Map<byte[], BloomType> bloomTypeMap = createFamilyBloomTypeMap(conf, tableName); final Map<byte[], Integer> blockSizeMap = createFamilyBlockSizeMap(conf, tableName); // phoenix-2216: end String dataBlockEncodingStr = conf.get(DATABLOCK_ENCODING_OVERRIDE_CONF_KEY); final Map<byte[], DataBlockEncoding> datablockEncodingMap = createFamilyDataBlockEncodingMap(conf, tableName); final DataBlockEncoding overriddenEncoding; if (dataBlockEncodingStr != null) { overriddenEncoding = DataBlockEncoding.valueOf(dataBlockEncodingStr); } else { overriddenEncoding = null; } Algorithm compression = compressionMap.get(family); compression = compression == null ? defaultCompression : compression; BloomType bloomType = bloomTypeMap.get(family); bloomType = bloomType == null ? BloomType.NONE : bloomType; Integer blockSize = blockSizeMap.get(family); blockSize = blockSize == null ? HConstants.DEFAULT_BLOCKSIZE : blockSize; DataBlockEncoding encoding = overriddenEncoding; encoding = encoding == null ? datablockEncodingMap.get(family) : encoding; encoding = encoding == null ? DataBlockEncoding.NONE : encoding; Configuration tempConf = new Configuration(conf); tempConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f); HFileContextBuilder contextBuilder = new HFileContextBuilder().withCompression(compression) .withChecksumType(HStore.getChecksumType(conf)) .withBytesPerCheckSum(HStore.getBytesPerChecksum(conf)).withBlockSize(blockSize); contextBuilder.withDataBlockEncoding(encoding); HFileContext hFileContext = contextBuilder.build(); wl.writer = new StoreFile.WriterBuilder(conf, new CacheConfig(tempConf), fs) .withOutputDir(familydir).withBloomType(bloomType).withComparator(KeyValue.COMPARATOR) .withFileContext(hFileContext).build(); // join and put it in the writers map . // phoenix-2216: start : holds a map of writers where the // key in the map is a join byte array of table name and family. byte[] tableAndFamily = join(tableName, Bytes.toString(family)); this.writers.put(tableAndFamily, wl); // phoenix-2216: end return wl; } private void close(final StoreFile.Writer w) throws IOException { if (w != null) { w.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY, Bytes.toBytes(EnvironmentEdgeManager.currentTimeMillis())); w.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY, Bytes.toBytes(context.getTaskAttemptID().toString())); w.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY, Bytes.toBytes(true)); w.appendFileInfo(StoreFile.EXCLUDE_FROM_MINOR_COMPACTION_KEY, Bytes.toBytes(compactionExclude)); w.appendTrackedTimestampsToMetadata(); w.close(); } } @Override public void close(TaskAttemptContext c) throws IOException, InterruptedException { for (WriterLength wl : this.writers.values()) { close(wl.writer); } } }; }
From source file:org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigOutputCommitter.java
License:Apache License
private TaskAttemptContext setUpContext(TaskAttemptContext context, POStore store) throws IOException { // Setup UDFContext so StoreFunc can make use of it MapRedUtil.setupUDFContext(context.getConfiguration()); // make a copy of the context so that the actions after this call // do not end up updating the same context TaskAttemptContext contextCopy = HadoopShims.createTaskAttemptContext(context.getConfiguration(), context.getTaskAttemptID()); // call setLocation() on the storeFunc so that if there are any // side effects like setting map.output.dir on the Configuration // in the Context are needed by the OutputCommitter, those actions // will be done before the committer is created. PigOutputFormat.setLocation(contextCopy, store); return contextCopy; }
From source file:org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigOutputCommitter.java
License:Apache License
@Override public void abortTask(TaskAttemptContext context) throws IOException { if (HadoopShims.isMap(context.getTaskAttemptID())) { for (Pair<OutputCommitter, POStore> mapCommitter : mapOutputCommitters) { if (mapCommitter.first != null) { TaskAttemptContext updatedContext = setUpContext(context, mapCommitter.second); mapCommitter.first.abortTask(updatedContext); }/*from w w w . ja v a 2 s . c om*/ } } else { for (Pair<OutputCommitter, POStore> reduceCommitter : reduceOutputCommitters) { if (reduceCommitter.first != null) { TaskAttemptContext updatedContext = setUpContext(context, reduceCommitter.second); reduceCommitter.first.abortTask(updatedContext); } } } }
From source file:org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigOutputCommitter.java
License:Apache License
@Override public void commitTask(TaskAttemptContext context) throws IOException { if (HadoopShims.isMap(context.getTaskAttemptID())) { for (Pair<OutputCommitter, POStore> mapCommitter : mapOutputCommitters) { if (mapCommitter.first != null) { TaskAttemptContext updatedContext = setUpContext(context, mapCommitter.second); mapCommitter.first.commitTask(updatedContext); }//from w w w . j av a2 s . c om } } else { for (Pair<OutputCommitter, POStore> reduceCommitter : reduceOutputCommitters) { if (reduceCommitter.first != null) { TaskAttemptContext updatedContext = setUpContext(context, reduceCommitter.second); reduceCommitter.first.commitTask(updatedContext); } } } }
From source file:org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigOutputCommitter.java
License:Apache License
@Override public boolean needsTaskCommit(TaskAttemptContext context) throws IOException { boolean needCommit = false; if (HadoopShims.isMap(context.getTaskAttemptID())) { for (Pair<OutputCommitter, POStore> mapCommitter : mapOutputCommitters) { if (mapCommitter.first != null) { TaskAttemptContext updatedContext = setUpContext(context, mapCommitter.second); needCommit = needCommit || mapCommitter.first.needsTaskCommit(updatedContext); }//from ww w .j a va 2 s. c om } return needCommit; } else { for (Pair<OutputCommitter, POStore> reduceCommitter : reduceOutputCommitters) { if (reduceCommitter.first != null) { TaskAttemptContext updatedContext = setUpContext(context, reduceCommitter.second); needCommit = needCommit || reduceCommitter.first.needsTaskCommit(updatedContext); } } return needCommit; } }
From source file:org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigOutputCommitter.java
License:Apache License
@Override public void setupTask(TaskAttemptContext context) throws IOException { if (HadoopShims.isMap(context.getTaskAttemptID())) { for (Pair<OutputCommitter, POStore> mapCommitter : mapOutputCommitters) { if (mapCommitter.first != null) { TaskAttemptContext updatedContext = setUpContext(context, mapCommitter.second); mapCommitter.first.setupTask(updatedContext); }/*from w ww . java2 s. co m*/ } } else { for (Pair<OutputCommitter, POStore> reduceCommitter : reduceOutputCommitters) { if (reduceCommitter.first != null) { TaskAttemptContext updatedContext = setUpContext(context, reduceCommitter.second); reduceCommitter.first.setupTask(updatedContext); } } } }
From source file:org.apache.pig.builtin.TrevniStorage.java
License:Apache License
@Override public OutputFormat<NullWritable, Object> getOutputFormat() throws IOException { class TrevniStorageOutputFormat extends FileOutputFormat<NullWritable, Object> { private Schema schema; TrevniStorageOutputFormat(final Schema s) { schema = s;// ww w .ja va 2 s .com if (s == null) { String schemaString = getProperties(AvroStorage.class, udfContextSignature) .getProperty(OUTPUT_AVRO_SCHEMA); if (schemaString != null) { schema = (new Schema.Parser()).parse(schemaString); } } } @Override public RecordWriter<NullWritable, Object> getRecordWriter(final TaskAttemptContext tc) throws IOException, InterruptedException { if (schema == null) { String schemaString = getProperties(AvroStorage.class, udfContextSignature) .getProperty(OUTPUT_AVRO_SCHEMA); if (schemaString != null) { schema = (new Schema.Parser()).parse(schemaString); } if (schema == null) { throw new IOException("Null output schema"); } } final ColumnFileMetaData meta = new ColumnFileMetaData(); for (Entry<String, String> e : tc.getConfiguration()) { if (e.getKey().startsWith(org.apache.trevni.avro.AvroTrevniOutputFormat.META_PREFIX)) { meta.put(e.getKey().substring(AvroJob.TEXT_PREFIX.length()), e.getValue().getBytes(MetaData.UTF8)); } } final Path dir = getOutputPath(tc); final FileSystem fs = FileSystem.get(tc.getConfiguration()); final long blockSize = fs.getDefaultBlockSize(); if (!fs.mkdirs(dir)) { throw new IOException("Failed to create directory: " + dir); } meta.setCodec("deflate"); return new AvroRecordWriter(dir, tc.getConfiguration()) { private int part = 0; private Schema avroRecordWriterSchema; private AvroColumnWriter<GenericData.Record> writer; private void flush() throws IOException { Integer taskAttemptId = tc.getTaskAttemptID().getTaskID().getId(); String partName = String.format("%05d_%03d", taskAttemptId, part++); OutputStream out = fs .create(new Path(dir, "part-" + partName + AvroTrevniOutputFormat.EXT)); try { writer.writeTo(out); } finally { out.flush(); out.close(); } } @Override public void close(final TaskAttemptContext arg0) throws IOException, InterruptedException { flush(); } @Override public void write(final NullWritable n, final Object o) throws IOException, InterruptedException { GenericData.Record r = AvroStorageDataConversionUtilities.packIntoAvro((Tuple) o, schema); writer.write(r); if (writer.sizeEstimate() >= blockSize) { flush(); writer = new AvroColumnWriter<GenericData.Record>(avroRecordWriterSchema, meta); } } @Override public void prepareToWrite(Schema s) throws IOException { avroRecordWriterSchema = s; writer = new AvroColumnWriter<GenericData.Record>(avroRecordWriterSchema, meta); } }; } } return new TrevniStorageOutputFormat(schema); }