Example usage for org.apache.hadoop.mapreduce TaskAttemptContext getTaskAttemptID

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptContext getTaskAttemptID.

Prototype

public TaskAttemptID getTaskAttemptID();

Source Link

Document

Get the unique name for this task attempt.

Usage

From source file:org.apache.mahout.classifier.df.mapreduce.partial.PartialSequentialBuilder.java

License:Apache License

@Override
protected boolean runJob(Job job) throws IOException, InterruptedException {
    Configuration conf = job.getConfiguration();

    // retrieve the splits
    TextInputFormat input = new TextInputFormat();
    List<InputSplit> splits = input.getSplits(job);

    int nbSplits = splits.size();
    log.debug("Nb splits : {}", nbSplits);

    InputSplit[] sorted = new InputSplit[nbSplits];
    splits.toArray(sorted);// w  w w  .  j  a  v  a 2  s  .c  om
    Builder.sortSplits(sorted);

    int numTrees = Builder.getNbTrees(conf); // total number of trees

    TaskAttemptContext task = new TaskAttemptContext(conf, new TaskAttemptID());

    firstOutput = new MockContext(new Step1Mapper(), conf, task.getTaskAttemptID(), numTrees);

    /* first instance id in hadoop's order */
    //int[] firstIds = new int[nbSplits];
    /* partitions' sizes in hadoop order */
    int[] sizes = new int[nbSplits];

    // to compute firstIds, process the splits in file order
    long slowest = 0; // duration of slowest map
    int firstId = 0;
    for (InputSplit split : splits) {
        int hp = ArrayUtils.indexOf(sorted, split); // hadoop's partition

        RecordReader<LongWritable, Text> reader = input.createRecordReader(split, task);
        reader.initialize(split, task);

        Step1Mapper mapper = new MockStep1Mapper(getTreeBuilder(), dataset, getSeed(), hp, nbSplits, numTrees);

        long time = System.currentTimeMillis();

        //firstIds[hp] = firstId;

        while (reader.nextKeyValue()) {
            mapper.map(reader.getCurrentKey(), reader.getCurrentValue(), firstOutput);
            firstId++;
            sizes[hp]++;
        }

        mapper.cleanup(firstOutput);

        time = System.currentTimeMillis() - time;
        log.info("Duration : {}", DFUtils.elapsedTime(time));

        if (time > slowest) {
            slowest = time;
        }
    }

    log.info("Longest duration : {}", DFUtils.elapsedTime(slowest));
    return true;
}

From source file:org.apache.mahout.df.mapreduce.partial.PartialSequentialBuilder.java

License:Apache License

@Override
protected boolean runJob(Job job) throws IOException, InterruptedException {
    Configuration conf = job.getConfiguration();

    // retrieve the splits
    TextInputFormat input = new TextInputFormat();
    List<InputSplit> splits = input.getSplits(job);

    int nbSplits = splits.size();
    log.debug("Nb splits : {}", nbSplits);

    InputSplit[] sorted = new InputSplit[nbSplits];
    splits.toArray(sorted);/*from  w w  w . ja  v a2 s.co  m*/
    Builder.sortSplits(sorted);

    int numTrees = Builder.getNbTrees(conf); // total number of trees

    TaskAttemptContext task = new TaskAttemptContext(conf, new TaskAttemptID());

    firstOutput = new MockContext(new Step1Mapper(), conf, task.getTaskAttemptID(), numTrees);

    firstIds = new int[nbSplits];
    sizes = new int[nbSplits];

    // to compute firstIds, process the splits in file order
    long slowest = 0; // duration of slowest map
    int firstId = 0;
    for (int p = 0; p < nbSplits; p++) {
        InputSplit split = splits.get(p);
        int hp = ArrayUtils.indexOf(sorted, split); // hadoop's partition

        RecordReader<LongWritable, Text> reader = input.createRecordReader(split, task);
        reader.initialize(split, task);

        Step1Mapper mapper = new MockStep1Mapper(getTreeBuilder(), dataset, getSeed(), hp, nbSplits, numTrees);

        long time = System.currentTimeMillis();

        firstIds[hp] = firstId;

        while (reader.nextKeyValue()) {
            mapper.map(reader.getCurrentKey(), reader.getCurrentValue(), firstOutput);
            firstId++;
            sizes[hp]++;
        }

        mapper.cleanup(firstOutput);

        time = System.currentTimeMillis() - time;
        log.info("Duration : {}", DFUtils.elapsedTime(time));

        if (time > slowest) {
            slowest = time;
        }
    }

    log.info("Longest duration : {}", DFUtils.elapsedTime(slowest));
    return true;
}

From source file:org.apache.mahout.df.mapreduce.partial.PartialSequentialBuilder.java

License:Apache License

/**
 * The second step uses the trees to predict the rest of the instances outside
 * their own partition//w ww  . j a va 2s. c  o m
 */
protected void secondStep(Configuration conf, Path forestPath, PredictionCallback callback)
        throws IOException, InterruptedException {
    JobContext jobContext = new JobContext(conf, new JobID());

    // retrieve the splits
    TextInputFormat input = new TextInputFormat();
    List<InputSplit> splits = input.getSplits(jobContext);

    int nbSplits = splits.size();
    log.debug("Nb splits : {}", nbSplits);

    InputSplit[] sorted = new InputSplit[nbSplits];
    splits.toArray(sorted);
    Builder.sortSplits(sorted);

    int numTrees = Builder.getNbTrees(conf); // total number of trees

    // compute the expected number of outputs
    int total = 0;
    for (int p = 0; p < nbSplits; p++) {
        total += Step2Mapper.nbConcerned(nbSplits, numTrees, p);
    }

    TaskAttemptContext task = new TaskAttemptContext(conf, new TaskAttemptID());

    secondOutput = new MockContext(new Step2Mapper(), conf, task.getTaskAttemptID(), numTrees);
    long slowest = 0; // duration of slowest map

    for (int partition = 0; partition < nbSplits; partition++) {

        InputSplit split = sorted[partition];
        RecordReader<LongWritable, Text> reader = input.createRecordReader(split, task);

        // load the output of the 1st step
        int nbConcerned = Step2Mapper.nbConcerned(nbSplits, numTrees, partition);
        TreeID[] fsKeys = new TreeID[nbConcerned];
        Node[] fsTrees = new Node[nbConcerned];

        FileSystem fs = forestPath.getFileSystem(conf);
        int numInstances = InterResults.load(fs, forestPath, nbSplits, numTrees, partition, fsKeys, fsTrees);

        Step2Mapper mapper = new Step2Mapper();
        mapper.configure(partition, dataset, fsKeys, fsTrees, numInstances);

        long time = System.currentTimeMillis();

        while (reader.nextKeyValue()) {
            mapper.map(reader.getCurrentKey(), reader.getCurrentValue(), secondOutput);
        }

        mapper.cleanup(secondOutput);

        time = System.currentTimeMillis() - time;
        log.info("Duration : {}", DFUtils.elapsedTime(time));

        if (time > slowest) {
            slowest = time;
        }
    }

    log.info("Longest duration : {}", DFUtils.elapsedTime(slowest));
}

From source file:org.apache.phoenix.mapreduce.MultiHfileOutputFormat.java

License:Apache License

/**
 * //www .j  a va  2 s . c  om
 * @param context
 * @return
 * @throws IOException 
 */
static <V extends Cell> RecordWriter<TableRowkeyPair, V> createRecordWriter(final TaskAttemptContext context)
        throws IOException {
    // Get the path of the temporary output file
    final Path outputPath = FileOutputFormat.getOutputPath(context);
    final Path outputdir = new FileOutputCommitter(outputPath, context).getWorkPath();
    final Configuration conf = context.getConfiguration();
    final FileSystem fs = outputdir.getFileSystem(conf);

    final long maxsize = conf.getLong(HConstants.HREGION_MAX_FILESIZE, HConstants.DEFAULT_MAX_FILE_SIZE);
    // Invented config.  Add to hbase-*.xml if other than default compression.
    final String defaultCompressionStr = conf.get("hfile.compression", Compression.Algorithm.NONE.getName());
    final Algorithm defaultCompression = AbstractHFileWriter.compressionByName(defaultCompressionStr);
    final boolean compactionExclude = conf.getBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude",
            false);

    return new RecordWriter<TableRowkeyPair, V>() {
        // Map of families to writers and how much has been output on the writer.
        private final Map<byte[], WriterLength> writers = new TreeMap<byte[], WriterLength>(
                Bytes.BYTES_COMPARATOR);
        private byte[] previousRow = HConstants.EMPTY_BYTE_ARRAY;
        private final byte[] now = Bytes.toBytes(EnvironmentEdgeManager.currentTimeMillis());
        private boolean rollRequested = false;

        @Override
        public void write(TableRowkeyPair row, V cell) throws IOException {
            KeyValue kv = KeyValueUtil.ensureKeyValue(cell);
            // null input == user explicitly wants to flush
            if (row == null && kv == null) {
                rollWriters();
                return;
            }

            // phoenix-2216: start : extract table name from the rowkey
            String tableName = row.getTableName();
            byte[] rowKey = row.getRowkey().get();
            long length = kv.getLength();
            byte[] family = CellUtil.cloneFamily(kv);
            byte[] tableAndFamily = join(tableName, Bytes.toString(family));
            WriterLength wl = this.writers.get(tableAndFamily);
            // phoenix-2216: end

            // If this is a new column family, verify that the directory exists
            if (wl == null) {
                // phoenix-2216: start : create a directory for table and family within the output dir 
                Path tableOutputPath = CsvBulkImportUtil.getOutputPath(outputdir, tableName);
                fs.mkdirs(new Path(tableOutputPath, Bytes.toString(family)));
                // phoenix-2216: end
            }

            // If any of the HFiles for the column families has reached
            // maxsize, we need to roll all the writers
            if (wl != null && wl.written + length >= maxsize) {
                this.rollRequested = true;
            }

            // This can only happen once a row is finished though
            if (rollRequested && Bytes.compareTo(this.previousRow, rowKey) != 0) {
                rollWriters();
            }

            // create a new WAL writer, if necessary
            if (wl == null || wl.writer == null) {
                // phoenix-2216: start : passed even the table name
                wl = getNewWriter(tableName, family, conf);
                // phoenix-2216: end
            }

            // we now have the proper WAL writer. full steam ahead
            kv.updateLatestStamp(this.now);
            wl.writer.append(kv);
            wl.written += length;

            // Copy the row so we know when a row transition.
            this.previousRow = rowKey;
        }

        private void rollWriters() throws IOException {
            for (WriterLength wl : this.writers.values()) {
                if (wl.writer != null) {
                    LOG.info("Writer=" + wl.writer.getPath()
                            + ((wl.written == 0) ? "" : ", wrote=" + wl.written));
                    close(wl.writer);
                }
                wl.writer = null;
                wl.written = 0;
            }
            this.rollRequested = false;
        }

        /* Create a new StoreFile.Writer.
         * @param family
         * @return A WriterLength, containing a new StoreFile.Writer.
         * @throws IOException
         */
        @edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "BX_UNBOXING_IMMEDIATELY_REBOXED", justification = "Not important")
        private WriterLength getNewWriter(final String tableName, byte[] family, Configuration conf)
                throws IOException {

            WriterLength wl = new WriterLength();
            Path tableOutputPath = CsvBulkImportUtil.getOutputPath(outputdir, tableName);
            Path familydir = new Path(tableOutputPath, Bytes.toString(family));

            // phoenix-2216: start : fetching the configuration properties that were set to the table.
            // create a map from column family to the compression algorithm for the table.
            final Map<byte[], Algorithm> compressionMap = createFamilyCompressionMap(conf, tableName);
            final Map<byte[], BloomType> bloomTypeMap = createFamilyBloomTypeMap(conf, tableName);
            final Map<byte[], Integer> blockSizeMap = createFamilyBlockSizeMap(conf, tableName);
            // phoenix-2216: end

            String dataBlockEncodingStr = conf.get(DATABLOCK_ENCODING_OVERRIDE_CONF_KEY);
            final Map<byte[], DataBlockEncoding> datablockEncodingMap = createFamilyDataBlockEncodingMap(conf,
                    tableName);
            final DataBlockEncoding overriddenEncoding;
            if (dataBlockEncodingStr != null) {
                overriddenEncoding = DataBlockEncoding.valueOf(dataBlockEncodingStr);
            } else {
                overriddenEncoding = null;
            }

            Algorithm compression = compressionMap.get(family);
            compression = compression == null ? defaultCompression : compression;
            BloomType bloomType = bloomTypeMap.get(family);
            bloomType = bloomType == null ? BloomType.NONE : bloomType;
            Integer blockSize = blockSizeMap.get(family);
            blockSize = blockSize == null ? HConstants.DEFAULT_BLOCKSIZE : blockSize;
            DataBlockEncoding encoding = overriddenEncoding;
            encoding = encoding == null ? datablockEncodingMap.get(family) : encoding;
            encoding = encoding == null ? DataBlockEncoding.NONE : encoding;
            Configuration tempConf = new Configuration(conf);
            tempConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f);
            HFileContextBuilder contextBuilder = new HFileContextBuilder().withCompression(compression)
                    .withChecksumType(HStore.getChecksumType(conf))
                    .withBytesPerCheckSum(HStore.getBytesPerChecksum(conf)).withBlockSize(blockSize);
            contextBuilder.withDataBlockEncoding(encoding);
            HFileContext hFileContext = contextBuilder.build();

            wl.writer = new StoreFile.WriterBuilder(conf, new CacheConfig(tempConf), fs)
                    .withOutputDir(familydir).withBloomType(bloomType).withComparator(KeyValue.COMPARATOR)
                    .withFileContext(hFileContext).build();

            // join and put it in the writers map .
            // phoenix-2216: start : holds a map of writers where the 
            //                       key in the map is a join byte array of table name and family.
            byte[] tableAndFamily = join(tableName, Bytes.toString(family));
            this.writers.put(tableAndFamily, wl);
            // phoenix-2216: end
            return wl;
        }

        private void close(final StoreFile.Writer w) throws IOException {
            if (w != null) {
                w.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY,
                        Bytes.toBytes(EnvironmentEdgeManager.currentTimeMillis()));
                w.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY,
                        Bytes.toBytes(context.getTaskAttemptID().toString()));
                w.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY, Bytes.toBytes(true));
                w.appendFileInfo(StoreFile.EXCLUDE_FROM_MINOR_COMPACTION_KEY, Bytes.toBytes(compactionExclude));
                w.appendTrackedTimestampsToMetadata();
                w.close();
            }
        }

        @Override
        public void close(TaskAttemptContext c) throws IOException, InterruptedException {
            for (WriterLength wl : this.writers.values()) {
                close(wl.writer);
            }
        }
    };
}

From source file:org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigOutputCommitter.java

License:Apache License

private TaskAttemptContext setUpContext(TaskAttemptContext context, POStore store) throws IOException {
    // Setup UDFContext so StoreFunc can make use of it
    MapRedUtil.setupUDFContext(context.getConfiguration());
    // make a copy of the context so that the actions after this call
    // do not end up updating the same context
    TaskAttemptContext contextCopy = HadoopShims.createTaskAttemptContext(context.getConfiguration(),
            context.getTaskAttemptID());

    // call setLocation() on the storeFunc so that if there are any
    // side effects like setting map.output.dir on the Configuration
    // in the Context are needed by the OutputCommitter, those actions
    // will be done before the committer is created. 
    PigOutputFormat.setLocation(contextCopy, store);
    return contextCopy;
}

From source file:org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigOutputCommitter.java

License:Apache License

@Override
public void abortTask(TaskAttemptContext context) throws IOException {
    if (HadoopShims.isMap(context.getTaskAttemptID())) {
        for (Pair<OutputCommitter, POStore> mapCommitter : mapOutputCommitters) {
            if (mapCommitter.first != null) {
                TaskAttemptContext updatedContext = setUpContext(context, mapCommitter.second);
                mapCommitter.first.abortTask(updatedContext);
            }/*from  w w  w .  ja  v  a  2 s . c om*/
        }
    } else {
        for (Pair<OutputCommitter, POStore> reduceCommitter : reduceOutputCommitters) {
            if (reduceCommitter.first != null) {
                TaskAttemptContext updatedContext = setUpContext(context, reduceCommitter.second);
                reduceCommitter.first.abortTask(updatedContext);
            }
        }
    }
}

From source file:org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigOutputCommitter.java

License:Apache License

@Override
public void commitTask(TaskAttemptContext context) throws IOException {
    if (HadoopShims.isMap(context.getTaskAttemptID())) {
        for (Pair<OutputCommitter, POStore> mapCommitter : mapOutputCommitters) {
            if (mapCommitter.first != null) {
                TaskAttemptContext updatedContext = setUpContext(context, mapCommitter.second);
                mapCommitter.first.commitTask(updatedContext);
            }//from  w  w w . j av  a2 s . c om
        }
    } else {
        for (Pair<OutputCommitter, POStore> reduceCommitter : reduceOutputCommitters) {
            if (reduceCommitter.first != null) {
                TaskAttemptContext updatedContext = setUpContext(context, reduceCommitter.second);
                reduceCommitter.first.commitTask(updatedContext);
            }
        }
    }
}

From source file:org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigOutputCommitter.java

License:Apache License

@Override
public boolean needsTaskCommit(TaskAttemptContext context) throws IOException {
    boolean needCommit = false;
    if (HadoopShims.isMap(context.getTaskAttemptID())) {
        for (Pair<OutputCommitter, POStore> mapCommitter : mapOutputCommitters) {
            if (mapCommitter.first != null) {
                TaskAttemptContext updatedContext = setUpContext(context, mapCommitter.second);
                needCommit = needCommit || mapCommitter.first.needsTaskCommit(updatedContext);
            }//from   ww w  .j a  va 2  s.  c  om
        }
        return needCommit;
    } else {
        for (Pair<OutputCommitter, POStore> reduceCommitter : reduceOutputCommitters) {
            if (reduceCommitter.first != null) {
                TaskAttemptContext updatedContext = setUpContext(context, reduceCommitter.second);
                needCommit = needCommit || reduceCommitter.first.needsTaskCommit(updatedContext);
            }
        }
        return needCommit;
    }
}

From source file:org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigOutputCommitter.java

License:Apache License

@Override
public void setupTask(TaskAttemptContext context) throws IOException {
    if (HadoopShims.isMap(context.getTaskAttemptID())) {
        for (Pair<OutputCommitter, POStore> mapCommitter : mapOutputCommitters) {
            if (mapCommitter.first != null) {
                TaskAttemptContext updatedContext = setUpContext(context, mapCommitter.second);
                mapCommitter.first.setupTask(updatedContext);
            }/*from w ww  . java2  s.  co m*/
        }
    } else {
        for (Pair<OutputCommitter, POStore> reduceCommitter : reduceOutputCommitters) {
            if (reduceCommitter.first != null) {
                TaskAttemptContext updatedContext = setUpContext(context, reduceCommitter.second);
                reduceCommitter.first.setupTask(updatedContext);
            }
        }
    }
}

From source file:org.apache.pig.builtin.TrevniStorage.java

License:Apache License

@Override
public OutputFormat<NullWritable, Object> getOutputFormat() throws IOException {
    class TrevniStorageOutputFormat extends FileOutputFormat<NullWritable, Object> {

        private Schema schema;

        TrevniStorageOutputFormat(final Schema s) {
            schema = s;// ww w  .ja va 2  s .com
            if (s == null) {
                String schemaString = getProperties(AvroStorage.class, udfContextSignature)
                        .getProperty(OUTPUT_AVRO_SCHEMA);
                if (schemaString != null) {
                    schema = (new Schema.Parser()).parse(schemaString);
                }
            }

        }

        @Override
        public RecordWriter<NullWritable, Object> getRecordWriter(final TaskAttemptContext tc)
                throws IOException, InterruptedException {

            if (schema == null) {
                String schemaString = getProperties(AvroStorage.class, udfContextSignature)
                        .getProperty(OUTPUT_AVRO_SCHEMA);
                if (schemaString != null) {
                    schema = (new Schema.Parser()).parse(schemaString);
                }
                if (schema == null) {
                    throw new IOException("Null output schema");
                }
            }

            final ColumnFileMetaData meta = new ColumnFileMetaData();

            for (Entry<String, String> e : tc.getConfiguration()) {
                if (e.getKey().startsWith(org.apache.trevni.avro.AvroTrevniOutputFormat.META_PREFIX)) {
                    meta.put(e.getKey().substring(AvroJob.TEXT_PREFIX.length()),
                            e.getValue().getBytes(MetaData.UTF8));
                }
            }

            final Path dir = getOutputPath(tc);
            final FileSystem fs = FileSystem.get(tc.getConfiguration());
            final long blockSize = fs.getDefaultBlockSize();

            if (!fs.mkdirs(dir)) {
                throw new IOException("Failed to create directory: " + dir);
            }

            meta.setCodec("deflate");

            return new AvroRecordWriter(dir, tc.getConfiguration()) {
                private int part = 0;
                private Schema avroRecordWriterSchema;
                private AvroColumnWriter<GenericData.Record> writer;

                private void flush() throws IOException {
                    Integer taskAttemptId = tc.getTaskAttemptID().getTaskID().getId();
                    String partName = String.format("%05d_%03d", taskAttemptId, part++);
                    OutputStream out = fs
                            .create(new Path(dir, "part-" + partName + AvroTrevniOutputFormat.EXT));
                    try {
                        writer.writeTo(out);
                    } finally {
                        out.flush();
                        out.close();
                    }
                }

                @Override
                public void close(final TaskAttemptContext arg0) throws IOException, InterruptedException {
                    flush();
                }

                @Override
                public void write(final NullWritable n, final Object o)
                        throws IOException, InterruptedException {
                    GenericData.Record r = AvroStorageDataConversionUtilities.packIntoAvro((Tuple) o, schema);
                    writer.write(r);
                    if (writer.sizeEstimate() >= blockSize) {
                        flush();
                        writer = new AvroColumnWriter<GenericData.Record>(avroRecordWriterSchema, meta);
                    }
                }

                @Override
                public void prepareToWrite(Schema s) throws IOException {
                    avroRecordWriterSchema = s;
                    writer = new AvroColumnWriter<GenericData.Record>(avroRecordWriterSchema, meta);
                }
            };
        }
    }

    return new TrevniStorageOutputFormat(schema);
}