Example usage for org.apache.hadoop.mapreduce TaskAttemptContext getTaskAttemptID

List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getTaskAttemptID

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptContext getTaskAttemptID.

Prototype

public TaskAttemptID getTaskAttemptID();

Source Link

Document

Get the unique name for this task attempt.

Usage

From source file:org.apache.mahout.classifier.df.mapreduce.partial.PartialSequentialBuilder.java

License:Apache License

@Override
protected boolean runJob(Job job) throws IOException, InterruptedException {
    Configuration conf = job.getConfiguration();

    // retrieve the splits
    TextInputFormat input = new TextInputFormat();
    List<InputSplit> splits = input.getSplits(job);

    int nbSplits = splits.size();
    log.debug("Nb splits : {}", nbSplits);

    InputSplit[] sorted = new InputSplit[nbSplits];
    splits.toArray(sorted);// w  w w  .  j  a  v  a 2  s  .c  om
    Builder.sortSplits(sorted);

    int numTrees = Builder.getNbTrees(conf); // total number of trees

    TaskAttemptContext task = new TaskAttemptContext(conf, new TaskAttemptID());

    firstOutput = new MockContext(new Step1Mapper(), conf, task.getTaskAttemptID(), numTrees);

    /* first instance id in hadoop's order */
    //int[] firstIds = new int[nbSplits];
    /* partitions' sizes in hadoop order */
    int[] sizes = new int[nbSplits];

    // to compute firstIds, process the splits in file order
    long slowest = 0; // duration of slowest map
    int firstId = 0;
    for (InputSplit split : splits) {
        int hp = ArrayUtils.indexOf(sorted, split); // hadoop's partition

        RecordReader<LongWritable, Text> reader = input.createRecordReader(split, task);
        reader.initialize(split, task);

        Step1Mapper mapper = new MockStep1Mapper(getTreeBuilder(), dataset, getSeed(), hp, nbSplits, numTrees);

        long time = System.currentTimeMillis();

        //firstIds[hp] = firstId;

        while (reader.nextKeyValue()) {
            mapper.map(reader.getCurrentKey(), reader.getCurrentValue(), firstOutput);
            firstId++;
            sizes[hp]++;
        }

        mapper.cleanup(firstOutput);

        time = System.currentTimeMillis() - time;
        log.info("Duration : {}", DFUtils.elapsedTime(time));

        if (time > slowest) {
            slowest = time;
        }
    }

    log.info("Longest duration : {}", DFUtils.elapsedTime(slowest));
    return true;
}

From source file:org.apache.mahout.df.mapreduce.partial.PartialSequentialBuilder.java

License:Apache License

@Override
protected boolean runJob(Job job) throws IOException, InterruptedException {
    Configuration conf = job.getConfiguration();

    // retrieve the splits
    TextInputFormat input = new TextInputFormat();
    List<InputSplit> splits = input.getSplits(job);

    int nbSplits = splits.size();
    log.debug("Nb splits : {}", nbSplits);

    InputSplit[] sorted = new InputSplit[nbSplits];
    splits.toArray(sorted);/*from  w w  w . ja  v a2 s.co  m*/
    Builder.sortSplits(sorted);

    int numTrees = Builder.getNbTrees(conf); // total number of trees

    TaskAttemptContext task = new TaskAttemptContext(conf, new TaskAttemptID());

    firstOutput = new MockContext(new Step1Mapper(), conf, task.getTaskAttemptID(), numTrees);

    firstIds = new int[nbSplits];
    sizes = new int[nbSplits];

    // to compute firstIds, process the splits in file order
    long slowest = 0; // duration of slowest map
    int firstId = 0;
    for (int p = 0; p < nbSplits; p++) {
        InputSplit split = splits.get(p);
        int hp = ArrayUtils.indexOf(sorted, split); // hadoop's partition

        RecordReader<LongWritable, Text> reader = input.createRecordReader(split, task);
        reader.initialize(split, task);

        Step1Mapper mapper = new MockStep1Mapper(getTreeBuilder(), dataset, getSeed(), hp, nbSplits, numTrees);

        long time = System.currentTimeMillis();

        firstIds[hp] = firstId;

        while (reader.nextKeyValue()) {
            mapper.map(reader.getCurrentKey(), reader.getCurrentValue(), firstOutput);
            firstId++;
            sizes[hp]++;
        }

        mapper.cleanup(firstOutput);

        time = System.currentTimeMillis() - time;
        log.info("Duration : {}", DFUtils.elapsedTime(time));

        if (time > slowest) {
            slowest = time;
        }
    }

    log.info("Longest duration : {}", DFUtils.elapsedTime(slowest));
    return true;
}

From source file:org.apache.mahout.df.mapreduce.partial.PartialSequentialBuilder.java

License:Apache License

/**
 * The second step uses the trees to predict the rest of the instances outside
 * their own partition//w ww  . j a va 2s. c  o m
 */
protected void secondStep(Configuration conf, Path forestPath, PredictionCallback callback)
        throws IOException, InterruptedException {
    JobContext jobContext = new JobContext(conf, new JobID());

    // retrieve the splits
    TextInputFormat input = new TextInputFormat();
    List<InputSplit> splits = input.getSplits(jobContext);

    int nbSplits = splits.size();
    log.debug("Nb splits : {}", nbSplits);

    InputSplit[] sorted = new InputSplit[nbSplits];
    splits.toArray(sorted);
    Builder.sortSplits(sorted);

    int numTrees = Builder.getNbTrees(conf); // total number of trees

    // compute the expected number of outputs
    int total = 0;
    for (int p = 0; p < nbSplits; p++) {
        total += Step2Mapper.nbConcerned(nbSplits, numTrees, p);
    }

    TaskAttemptContext task = new TaskAttemptContext(conf, new TaskAttemptID());

    secondOutput = new MockContext(new Step2Mapper(), conf, task.getTaskAttemptID(), numTrees);
    long slowest = 0; // duration of slowest map

    for (int partition = 0; partition < nbSplits; partition++) {

        InputSplit split = sorted[partition];
        RecordReader<LongWritable, Text> reader = input.createRecordReader(split, task);

        // load the output of the 1st step
        int nbConcerned = Step2Mapper.nbConcerned(nbSplits, numTrees, partition);
        TreeID[] fsKeys = new TreeID[nbConcerned];
        Node[] fsTrees = new Node[nbConcerned];

        FileSystem fs = forestPath.getFileSystem(conf);
        int numInstances = InterResults.load(fs, forestPath, nbSplits, numTrees, partition, fsKeys, fsTrees);

        Step2Mapper mapper = new Step2Mapper();
        mapper.configure(partition, dataset, fsKeys, fsTrees, numInstances);

        long time = System.currentTimeMillis();

        while (reader.nextKeyValue()) {
            mapper.map(reader.getCurrentKey(), reader.getCurrentValue(), secondOutput);
        }

        mapper.cleanup(secondOutput);

        time = System.currentTimeMillis() - time;
        log.info("Duration : {}", DFUtils.elapsedTime(time));

        if (time > slowest) {
            slowest = time;
        }
    }

    log.info("Longest duration : {}", DFUtils.elapsedTime(slowest));
}

From source file:org.apache.phoenix.mapreduce.MultiHfileOutputFormat.java

License:Apache License

/**
 * //www .j  a va  2 s . c  om
 * @param context
 * @return
 * @throws IOException 
 */
static <V extends Cell> RecordWriter<TableRowkeyPair, V> createRecordWriter(final TaskAttemptContext context)
        throws IOException {
    // Get the path of the temporary output file
    final Path outputPath = FileOutputFormat.getOutputPath(context);
    final Path outputdir = new FileOutputCommitter(outputPath, context).getWorkPath();
    final Configuration conf = context.getConfiguration();
    final FileSystem fs = outputdir.getFileSystem(conf);

    final long maxsize = conf.getLong(HConstants.HREGION_MAX_FILESIZE, HConstants.DEFAULT_MAX_FILE_SIZE);
    // Invented config.  Add to hbase-*.xml if other than default compression.
    final String defaultCompressionStr = conf.get("hfile.compression", Compression.Algorithm.NONE.getName());
    final Algorithm defaultCompression = AbstractHFileWriter.compressionByName(defaultCompressionStr);
    final boolean compactionExclude = conf.getBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude",
            false);

    return new RecordWriter<TableRowkeyPair, V>() {
        // Map of families to writers and how much has been output on the writer.
        private final Map<byte[], WriterLength> writers = new TreeMap<byte[], WriterLength>(
                Bytes.BYTES_COMPARATOR);
        private byte[] previousRow = HConstants.EMPTY_BYTE_ARRAY;
        private final byte[] now = Bytes.toBytes(EnvironmentEdgeManager.currentTimeMillis());
        private boolean rollRequested = false;

        @Override
        public void write(TableRowkeyPair row, V cell) throws IOException {
            KeyValue kv = KeyValueUtil.ensureKeyValue(cell);
            // null input == user explicitly wants to flush
            if (row == null && kv == null) {
                rollWriters();
                return;
            }

            // phoenix-2216: start : extract table name from the rowkey
            String tableName = row.getTableName();
            byte[] rowKey = row.getRowkey().get();
            long length = kv.getLength();
            byte[] family = CellUtil.cloneFamily(kv);
            byte[] tableAndFamily = join(tableName, Bytes.toString(family));
            WriterLength wl = this.writers.get(tableAndFamily);
            // phoenix-2216: end

            // If this is a new column family, verify that the directory exists
            if (wl == null) {
                // phoenix-2216: start : create a directory for table and family within the output dir 
                Path tableOutputPath = CsvBulkImportUtil.getOutputPath(outputdir, tableName);
                fs.mkdirs(new Path(tableOutputPath, Bytes.toString(family)));
                // phoenix-2216: end
            }

            // If any of the HFiles for the column families has reached
            // maxsize, we need to roll all the writers
            if (wl != null && wl.written + length >= maxsize) {
                this.rollRequested = true;
            }

            // This can only happen once a row is finished though
            if (rollRequested && Bytes.compareTo(this.previousRow, rowKey) != 0) {
                rollWriters();
            }

            // create a new WAL writer, if necessary
            if (wl == null || wl.writer == null) {
                // phoenix-2216: start : passed even the table name
                wl = getNewWriter(tableName, family, conf);
                // phoenix-2216: end
            }

            // we now have the proper WAL writer. full steam ahead
            kv.updateLatestStamp(this.now);
            wl.writer.append(kv);
            wl.written += length;

            // Copy the row so we know when a row transition.
            this.previousRow = rowKey;
        }

        private void rollWriters() throws IOException {
            for (WriterLength wl : this.writers.values()) {
                if (wl.writer != null) {
                    LOG.info("Writer=" + wl.writer.getPath()
                            + ((wl.written == 0) ? "" : ", wrote=" + wl.written));
                    close(wl.writer);
                }
                wl.writer = null;
                wl.written = 0;
            }
            this.rollRequested = false;
        }

        /* Create a new StoreFile.Writer.
         * @param family
         * @return A WriterLength, containing a new StoreFile.Writer.
         * @throws IOException
         */
        @edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "BX_UNBOXING_IMMEDIATELY_REBOXED", justification = "Not important")
        private WriterLength getNewWriter(final String tableName, byte[] family, Configuration conf)
                throws IOException {

            WriterLength wl = new WriterLength();
            Path tableOutputPath = CsvBulkImportUtil.getOutputPath(outputdir, tableName);
            Path familydir = new Path(tableOutputPath, Bytes.toString(family));

            // phoenix-2216: start : fetching the configuration properties that were set to the table.
            // create a map from column family to the compression algorithm for the table.
            final Map<byte[], Algorithm> compressionMap = createFamilyCompressionMap(conf, tableName);
            final Map<byte[], BloomType> bloomTypeMap = createFamilyBloomTypeMap(conf, tableName);
            final Map<byte[], Integer> blockSizeMap = createFamilyBlockSizeMap(conf, tableName);
            // phoenix-2216: end

            String dataBlockEncodingStr = conf.get(DATABLOCK_ENCODING_OVERRIDE_CONF_KEY);
            final Map<byte[], DataBlockEncoding> datablockEncodingMap = createFamilyDataBlockEncodingMap(conf,
                    tableName);
            final DataBlockEncoding overriddenEncoding;
            if (dataBlockEncodingStr != null) {
                overriddenEncoding = DataBlockEncoding.valueOf(dataBlockEncodingStr);
            } else {
                overriddenEncoding = null;
            }

            Algorithm compression = compressionMap.get(family);
            compression = compression == null ? defaultCompression : compression;
            BloomType bloomType = bloomTypeMap.get(family);
            bloomType = bloomType == null ? BloomType.NONE : bloomType;
            Integer blockSize = blockSizeMap.get(family);
            blockSize = blockSize == null ? HConstants.DEFAULT_BLOCKSIZE : blockSize;
            DataBlockEncoding encoding = overriddenEncoding;
            encoding = encoding == null ? datablockEncodingMap.get(family) : encoding;
            encoding = encoding == null ? DataBlockEncoding.NONE : encoding;
            Configuration tempConf = new Configuration(conf);
            tempConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f);
            HFileContextBuilder contextBuilder = new HFileContextBuilder().withCompression(compression)
                    .withChecksumType(HStore.getChecksumType(conf))
                    .withBytesPerCheckSum(HStore.getBytesPerChecksum(conf)).withBlockSize(blockSize);
            contextBuilder.withDataBlockEncoding(encoding);
            HFileContext hFileContext = contextBuilder.build();

            wl.writer = new StoreFile.WriterBuilder(conf, new CacheConfig(tempConf), fs)
                    .withOutputDir(familydir).withBloomType(bloomType).withComparator(KeyValue.COMPARATOR)
                    .withFileContext(hFileContext).build();

            // join and put it in the writers map .
            // phoenix-2216: start : holds a map of writers where the 
            //                       key in the map is a join byte array of table name and family.
            byte[] tableAndFamily = join(tableName, Bytes.toString(family));
            this.writers.put(tableAndFamily, wl);
            // phoenix-2216: end
            return wl;
        }

        private void close(final StoreFile.Writer w) throws IOException {
            if (w != null) {
                w.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY,
                        Bytes.toBytes(EnvironmentEdgeManager.currentTimeMillis()));
                w.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY,
                        Bytes.toBytes(context.getTaskAttemptID().toString()));
                w.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY, Bytes.toBytes(true));
                w.appendFileInfo(StoreFile.EXCLUDE_FROM_MINOR_COMPACTION_KEY, Bytes.toBytes(compactionExclude));
                w.appendTrackedTimestampsToMetadata();
                w.close();
            }
        }

        @Override
        public void close(TaskAttemptContext c) throws IOException, InterruptedException {
            for (WriterLength wl : this.writers.values()) {
                close(wl.writer);
            }
        }
    };
}

From source file:org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigOutputCommitter.java

License:Apache License

private TaskAttemptContext setUpContext(TaskAttemptContext context, POStore store) throws IOException {
    // Setup UDFContext so StoreFunc can make use of it
    MapRedUtil.setupUDFContext(context.getConfiguration());
    // make a copy of the context so that the actions after this call
    // do not end up updating the same context
    TaskAttemptContext contextCopy = HadoopShims.createTaskAttemptContext(context.getConfiguration(),
            context.getTaskAttemptID());

    // call setLocation() on the storeFunc so that if there are any
    // side effects like setting map.output.dir on the Configuration
    // in the Context are needed by the OutputCommitter, those actions
    // will be done before the committer is created. 
    PigOutputFormat.setLocation(contextCopy, store);
    return contextCopy;
}

From source file:org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigOutputCommitter.java

License:Apache License

@Override
public void abortTask(TaskAttemptContext context) throws IOException {
    if (HadoopShims.isMap(context.getTaskAttemptID())) {
        for (Pair<OutputCommitter, POStore> mapCommitter : mapOutputCommitters) {
            if (mapCommitter.first != null) {
                TaskAttemptContext updatedContext = setUpContext(context, mapCommitter.second);
                mapCommitter.first.abortTask(updatedContext);
            }/*from  w w  w .  ja  v  a  2 s . c om*/
        }
    } else {
        for (Pair<OutputCommitter, POStore> reduceCommitter : reduceOutputCommitters) {
            if (reduceCommitter.first != null) {
                TaskAttemptContext updatedContext = setUpContext(context, reduceCommitter.second);
                reduceCommitter.first.abortTask(updatedContext);
            }
        }
    }
}

From source file:org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigOutputCommitter.java

License:Apache License

@Override
public void commitTask(TaskAttemptContext context) throws IOException {
    if (HadoopShims.isMap(context.getTaskAttemptID())) {
        for (Pair<OutputCommitter, POStore> mapCommitter : mapOutputCommitters) {
            if (mapCommitter.first != null) {
                TaskAttemptContext updatedContext = setUpContext(context, mapCommitter.second);
                mapCommitter.first.commitTask(updatedContext);
            }//from  w  w w . j av  a2 s . c om
        }
    } else {
        for (Pair<OutputCommitter, POStore> reduceCommitter : reduceOutputCommitters) {
            if (reduceCommitter.first != null) {
                TaskAttemptContext updatedContext = setUpContext(context, reduceCommitter.second);
                reduceCommitter.first.commitTask(updatedContext);
            }
        }
    }
}

From source file:org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigOutputCommitter.java

License:Apache License

@Override
public boolean needsTaskCommit(TaskAttemptContext context) throws IOException {
    boolean needCommit = false;
    if (HadoopShims.isMap(context.getTaskAttemptID())) {
        for (Pair<OutputCommitter, POStore> mapCommitter : mapOutputCommitters) {
            if (mapCommitter.first != null) {
                TaskAttemptContext updatedContext = setUpContext(context, mapCommitter.second);
                needCommit = needCommit || mapCommitter.first.needsTaskCommit(updatedContext);
            }//from   ww w  .j a  va 2  s.  c  om
        }
        return needCommit;
    } else {
        for (Pair<OutputCommitter, POStore> reduceCommitter : reduceOutputCommitters) {
            if (reduceCommitter.first != null) {
                TaskAttemptContext updatedContext = setUpContext(context, reduceCommitter.second);
                needCommit = needCommit || reduceCommitter.first.needsTaskCommit(updatedContext);
            }
        }
        return needCommit;
    }
}

From source file:org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigOutputCommitter.java

License:Apache License

@Override
public void setupTask(TaskAttemptContext context) throws IOException {
    if (HadoopShims.isMap(context.getTaskAttemptID())) {
        for (Pair<OutputCommitter, POStore> mapCommitter : mapOutputCommitters) {
            if (mapCommitter.first != null) {
                TaskAttemptContext updatedContext = setUpContext(context, mapCommitter.second);
                mapCommitter.first.setupTask(updatedContext);
            }/*from w ww  . java2  s.  co m*/
        }
    } else {
        for (Pair<OutputCommitter, POStore> reduceCommitter : reduceOutputCommitters) {
            if (reduceCommitter.first != null) {
                TaskAttemptContext updatedContext = setUpContext(context, reduceCommitter.second);
                reduceCommitter.first.setupTask(updatedContext);
            }
        }
    }
}

From source file:org.apache.pig.builtin.TrevniStorage.java

License:Apache License

@Override
public OutputFormat<NullWritable, Object> getOutputFormat() throws IOException {
    class TrevniStorageOutputFormat extends FileOutputFormat<NullWritable, Object> {

        private Schema schema;

        TrevniStorageOutputFormat(final Schema s) {
            schema = s;// ww w  .ja va 2  s .com
            if (s == null) {
                String schemaString = getProperties(AvroStorage.class, udfContextSignature)
                        .getProperty(OUTPUT_AVRO_SCHEMA);
                if (schemaString != null) {
                    schema = (new Schema.Parser()).parse(schemaString);
                }
            }

        }

        @Override
        public RecordWriter<NullWritable, Object> getRecordWriter(final TaskAttemptContext tc)
                throws IOException, InterruptedException {

            if (schema == null) {
                String schemaString = getProperties(AvroStorage.class, udfContextSignature)
                        .getProperty(OUTPUT_AVRO_SCHEMA);
                if (schemaString != null) {
                    schema = (new Schema.Parser()).parse(schemaString);
                }
                if (schema == null) {
                    throw new IOException("Null output schema");
                }
            }

            final ColumnFileMetaData meta = new ColumnFileMetaData();

            for (Entry<String, String> e : tc.getConfiguration()) {
                if (e.getKey().startsWith(org.apache.trevni.avro.AvroTrevniOutputFormat.META_PREFIX)) {
                    meta.put(e.getKey().substring(AvroJob.TEXT_PREFIX.length()),
                            e.getValue().getBytes(MetaData.UTF8));
                }
            }

            final Path dir = getOutputPath(tc);
            final FileSystem fs = FileSystem.get(tc.getConfiguration());
            final long blockSize = fs.getDefaultBlockSize();

            if (!fs.mkdirs(dir)) {
                throw new IOException("Failed to create directory: " + dir);
            }

            meta.setCodec("deflate");

            return new AvroRecordWriter(dir, tc.getConfiguration()) {
                private int part = 0;
                private Schema avroRecordWriterSchema;
                private AvroColumnWriter<GenericData.Record> writer;

                private void flush() throws IOException {
                    Integer taskAttemptId = tc.getTaskAttemptID().getTaskID().getId();
                    String partName = String.format("%05d_%03d", taskAttemptId, part++);
                    OutputStream out = fs
                            .create(new Path(dir, "part-" + partName + AvroTrevniOutputFormat.EXT));
                    try {
                        writer.writeTo(out);
                    } finally {
                        out.flush();
                        out.close();
                    }
                }

                @Override
                public void close(final TaskAttemptContext arg0) throws IOException, InterruptedException {
                    flush();
                }

                @Override
                public void write(final NullWritable n, final Object o)
                        throws IOException, InterruptedException {
                    GenericData.Record r = AvroStorageDataConversionUtilities.packIntoAvro((Tuple) o, schema);
                    writer.write(r);
                    if (writer.sizeEstimate() >= blockSize) {
                        flush();
                        writer = new AvroColumnWriter<GenericData.Record>(avroRecordWriterSchema, meta);
                    }
                }

                @Override
                public void prepareToWrite(Schema s) throws IOException {
                    avroRecordWriterSchema = s;
                    writer = new AvroColumnWriter<GenericData.Record>(avroRecordWriterSchema, meta);
                }
            };
        }
    }

    return new TrevniStorageOutputFormat(schema);
}