Example usage for org.apache.hadoop.mapreduce TaskAttemptContext getTaskAttemptID

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptContext getTaskAttemptID.

Prototype

public TaskAttemptID getTaskAttemptID();

Source Link

Document

Get the unique name for this task attempt.

Usage

From source file:com.neusoft.hbase.test.hadoop.dataload.HFileOutputFormatBase.java

License:Apache License

public RecordWriter<ImmutableBytesWritable, KeyValue> getRecordWriter(// cellKeyValue
        //static <V extends Cell> RecordWriter<ImmutableBytesWritable, V> createRecordWriter()
        //getRecordWriter()
        final TaskAttemptContext context) throws IOException, InterruptedException {

    // Get the path of the temporary output file
    final Path outputPath = FileOutputFormat.getOutputPath(context);
    final Path outputdir = new FileOutputCommitter(outputPath, context).getWorkPath();
    final Path ignoreOutputPath = getDeleteRowKeyFile(outputPath);// 

    final Configuration conf = context.getConfiguration();
    final FileSystem fs = outputdir.getFileSystem(conf);
    // These configs. are from hbase-*.xml
    final long maxsize = conf.getLong(HConstants.HREGION_MAX_FILESIZE, HConstants.DEFAULT_MAX_FILE_SIZE);
    // Invented config. Add to hbase-*.xml if other than default
    // compression.
    final String defaultCompression = conf.get("hfile.compression", Compression.Algorithm.NONE.getName());
    final boolean compactionExclude = conf.getBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude",
            false);//from  www.j av a  2s.  c o m

    // create a map from column family to the compression algorithm
    final Map<byte[], String> compressionMap = createFamilyCompressionMap(conf);
    final Map<byte[], String> bloomTypeMap = createFamilyBloomMap(conf);
    final Map<byte[], String> blockSizeMap = createFamilyBlockSizeMap(conf);

    String dataBlockEncodingStr = conf.get(DATABLOCK_ENCODING_CONF_KEY);
    final HFileDataBlockEncoder encoder;
    if (dataBlockEncodingStr == null) {
        encoder = NoOpDataBlockEncoder.INSTANCE;
    } else {
        try {
            encoder = new HFileDataBlockEncoderImpl(DataBlockEncoding.valueOf(dataBlockEncodingStr));
        } catch (IllegalArgumentException ex) {
            throw new RuntimeException("Invalid data block encoding type configured for the param "
                    + DATABLOCK_ENCODING_CONF_KEY + " : " + dataBlockEncodingStr);
        }
    }

    return new RecordWriter<ImmutableBytesWritable, KeyValue>() {// VKeyValue

        // Map of families to writers and how much has been output on the
        // writer.
        private final Map<byte[], WriterLength> writers = new TreeMap<byte[], WriterLength>(
                Bytes.BYTES_COMPARATOR);
        private final FSDataOutputStream dos = fs.create(ignoreOutputPath);
        private byte[] previousRow = HConstants.EMPTY_BYTE_ARRAY;
        private final byte[] now = Bytes.toBytes(System.currentTimeMillis());
        private boolean rollRequested = false;

        public void write(ImmutableBytesWritable row, KeyValue kv)// V cellKeyValue kv

                throws IOException {
            // KeyValue kv = KeyValueUtil.ensureKeyValue(cell);//

            // null input == user explicitly wants to flush
            if (row == null && kv == null) {
                rollWriters();
                return;
            }

            byte[] rowKey = kv.getRow();
            long length = kv.getLength();
            byte[] family = kv.getFamily();

            if (ignore(kv)) {// if
                byte[] readBuf = rowKey;
                dos.write(readBuf, 0, readBuf.length);
                dos.write(Bytes.toBytes("\n"));
                return;
            }

            WriterLength wl = this.writers.get(family);

            // If this is a new column family, verify that the directory
            // exists
            if (wl == null) {
                fs.mkdirs(new Path(outputdir, Bytes.toString(family)));
            }

            // If any of the HFiles for the column families has reached
            // maxsize, we need to roll all the writers
            if (wl != null && wl.written + length >= maxsize) {
                this.rollRequested = true;
            }

            // This can only happen once a row is finished though
            if (rollRequested && Bytes.compareTo(this.previousRow, rowKey) != 0) {
                rollWriters();
            }

            // create a new HLog writer, if necessary
            if (wl == null || wl.writer == null) {
                wl = getNewWriter(family, conf);
            }

            // we now have the proper HLog writer. full steam ahead
            kv.updateLatestStamp(this.now);
            wl.writer.append(kv);
            wl.written += length;

            // Copy the row so we know when a row transition.
            this.previousRow = rowKey;
        }

        private void rollWriters() throws IOException {
            for (WriterLength wl : this.writers.values()) {
                if (wl.writer != null) {
                    LOG.info("Writer=" + wl.writer.getPath()
                            + ((wl.written == 0) ? "" : ", wrote=" + wl.written));
                    close(wl.writer);
                }
                wl.writer = null;
                wl.written = 0;
            }
            this.rollRequested = false;
        }

        /*
         * Create a new StoreFile.Writer.
         * 
         * @param family
         * 
         * @return A WriterLength, containing a new StoreFile.Writer.
         * 
         * @throws IOException
         */
        private WriterLength getNewWriter(byte[] family, Configuration conf) throws IOException {
            WriterLength wl = new WriterLength();
            Path familydir = new Path(outputdir, Bytes.toString(family));
            String compression = compressionMap.get(family);
            compression = compression == null ? defaultCompression : compression;
            String bloomTypeStr = bloomTypeMap.get(family);
            BloomType bloomType = BloomType.NONE;
            if (bloomTypeStr != null) {
                bloomType = BloomType.valueOf(bloomTypeStr);
            }
            String blockSizeString = blockSizeMap.get(family);
            int blockSize = blockSizeString == null ? HConstants.DEFAULT_BLOCKSIZE
                    : Integer.parseInt(blockSizeString);
            Configuration tempConf = new Configuration(conf);
            tempConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f);
            wl.writer = new StoreFile.WriterBuilder(conf, new CacheConfig(tempConf), fs)
                    .withOutputDir(familydir).withBloomType(bloomType).withComparator(KeyValue.COMPARATOR)
                    .build();

            this.writers.put(family, wl);
            return wl;
        }

        private void close(final StoreFile.Writer w) throws IOException {
            if (w != null) {
                w.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY, Bytes.toBytes(System.currentTimeMillis()));
                w.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY,
                        Bytes.toBytes(context.getTaskAttemptID().toString()));
                w.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY, Bytes.toBytes(true));
                w.appendFileInfo(StoreFile.EXCLUDE_FROM_MINOR_COMPACTION_KEY, Bytes.toBytes(compactionExclude));
                w.appendTrackedTimestampsToMetadata();
                w.close();
            }
        }

        public void close(TaskAttemptContext c) throws IOException, InterruptedException {
            dos.flush();// 
            dos.close();// 
            for (WriterLength wl : this.writers.values()) {
                close(wl.writer);
            }
        }

    };
}

From source file:com.pinterest.terrapin.hadoop.HFileOutputFormat.java

License:Apache License

public RecordWriter<BytesWritable, BytesWritable> getRecordWriter(TaskAttemptContext context)
        throws IOException {
    // Get the path of the temporary output file
    final Path outputPath = FileOutputFormat.getOutputPath(context);
    final Path outputDir = new FileOutputCommitter(outputPath, context).getWorkPath();
    final Configuration conf = context.getConfiguration();
    final FileSystem fs = outputDir.getFileSystem(conf);

    int blockSize = conf.getInt(Constants.HFILE_BLOCKSIZE, 16384);
    // Default to snappy.
    Compression.Algorithm compressionAlgorithm = getAlgorithm(conf.get(Constants.HFILE_COMPRESSION));
    final StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, new CacheConfig(conf), fs, blockSize)
            .withFilePath(hfilePath(outputPath, context.getTaskAttemptID().getTaskID().getId()))
            .withCompression(compressionAlgorithm).build();
    return new HFileRecordWriter(writer);
}

From source file:com.practicalHadoop.outputformat.MultpleDirectories.FileOutputCommitter.java

License:Apache License

/**
 * Create a file output committer/*from w  ww . j  a  va 2  s  . c  o  m*/
 * @param outputPath the job's output path
 * @param context the task's context
 * @throws IOException
 */
public FileOutputCommitter(Path outputPath, TaskAttemptContext context) throws IOException {
    super(outputPath, context);
    Job job = new Job(context.getConfiguration());
    String outputDirectories = job.getConfiguration().get(MULTIPLE_OUTPUTS, "");
    if (outputDirectories != null) {
        StringTokenizer st = new StringTokenizer(outputDirectories, " ");
        while (st.hasMoreTokens()) {
            pathNames.add(st.nextToken());
        }
    }
    if (outputPath != null) {
        this.outputPath = outputPath;
        outputFileSystem = outputPath.getFileSystem(context.getConfiguration());
        workPath = new Path(outputPath, (FileOutputCommitter.TEMP_DIR_NAME + Path.SEPARATOR + "_"
                + context.getTaskAttemptID().toString())).makeQualified(outputFileSystem);
        for (String p : pathNames) {
            if (outputPath.toString().endsWith(p)) {
                committers.put(p, this);
                fake = false;
                break;
            }
        }
    }
}

From source file:com.practicalHadoop.outputformat.MultpleDirectories.FileOutputCommitter.java

License:Apache License

/**
 * Move the files from the work directory to the job output directory
 * @param context the task context/*from w ww.j  a  v  a2s .  c o m*/
 */
public void commitTask(TaskAttemptContext context) throws IOException {
    if (!fake || (committers.size() == 0)) {
        TaskAttemptID attemptId = context.getTaskAttemptID();
        if (workPath != null) {
            context.progress();
            if (outputFileSystem.exists(workPath)) {
                // Move the task outputs to their final place
                moveTaskOutputs(context, outputFileSystem, outputPath, workPath);
                // Delete the temporary task-specific output directory
                if (!outputFileSystem.delete(workPath, true)) {
                    LOG.warn("Failed to delete the temporary output" + " directory of task: " + attemptId
                            + " - " + workPath);
                }
                LOG.info("Saved output of task '" + attemptId + "' to " + outputPath);
            }
        }
        //        commitJob(context);
    } else {
        for (FileOutputCommitter c : committers.values()) {
            c.commitTask(context);
        }
    }
}

From source file:com.practicalHadoop.outputformat.MultpleDirectories.FileOutputCommitter.java

License:Apache License

/**
 * Move all of the files from the work directory to the final output
 * @param context the task context// ww w  .j  a  va  2 s.c  om
 * @param fs the output file system
 * @param jobOutputDir the final output direcotry
 * @param taskOutput the work path
 * @throws IOException
 */
private void moveTaskOutputs(TaskAttemptContext context, FileSystem fs, Path jobOutputDir, Path taskOutput)
        throws IOException {
    TaskAttemptID attemptId = context.getTaskAttemptID();
    context.progress();
    if (fs.isFile(taskOutput)) {
        Path finalOutputPath = getFinalPath(jobOutputDir, taskOutput, workPath);
        if (!fs.rename(taskOutput, finalOutputPath)) {
            if (!fs.delete(finalOutputPath, true)) {
                throw new IOException("Failed to delete earlier output of task: " + attemptId);
            }
            if (!fs.rename(taskOutput, finalOutputPath)) {
                throw new IOException("Failed to save output of task: " + attemptId);
            }
        }
        LOG.debug("Moved " + taskOutput + " to " + finalOutputPath);
    } else if (fs.getFileStatus(taskOutput).isDir()) {
        FileStatus[] paths = fs.listStatus(taskOutput);
        Path finalOutputPath = getFinalPath(jobOutputDir, taskOutput, workPath);
        fs.mkdirs(finalOutputPath);
        if (paths != null) {
            for (FileStatus path : paths) {
                moveTaskOutputs(context, fs, jobOutputDir, path.getPath());
            }
        }
    }
}

From source file:com.scaleoutsoftware.soss.hserver.CachingRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
    this.split = (ImageInputSplit) split;
    this.context = context;
    LOG.info("Recording split: " + split);
    fallBackRecordReader.initialize(((ImageInputSplit) split).getFallbackInputSplit(), context);

    try {//w  ww  .j a  v a  2s. co m
        bucketStore = BucketStoreFactory.getBucketStore(this.split.getImageIdString());
        BucketId = ((ImageInputSplit) split).getBucketId();

        sossAvailable = true;

        if (BucketId.isDummyId()) //This split has never been recorded
        {
            BucketId = bucketStore.getNextLocalBucketId();
        }
        LOG.debug("Updating image before recording. " + this.split.getImageIdString());
        updateImageBeforeRecording(context.getTaskAttemptID().getTaskID().toString());
    } catch (StateServerException e) {
        LOG.error("Cannot connect to ScaleOut StateServer.", e);
        sossAvailable = false;
    }
    numberOfKeys = 0;
}

From source file:com.splicemachine.derby.stream.spark.fake.FakeOutputCommitter.java

License:Apache License

@Override
public void abortTask(TaskAttemptContext taskAttemptContext) throws IOException {
    String abortDirectory = taskAttemptContext.getConfiguration().get("abort.directory");
    File file = new File(abortDirectory, taskAttemptContext.getTaskAttemptID().getTaskID().toString());
    file.createNewFile();//  w w  w . ja v  a  2  s. co  m
}

From source file:com.splicemachine.stream.index.HTableOutputFormat.java

License:Apache License

@Override
public RecordWriter<byte[], Either<Exception, KVPair>> getRecordWriter(TaskAttemptContext taskAttemptContext)
        throws IOException, InterruptedException {
    try {//from   ww  w.  j ava  2s  .  com
        DataSetWriterBuilder tableWriter = TableWriterUtils
                .deserializeTableWriter(taskAttemptContext.getConfiguration());
        TxnView childTxn = outputCommitter.getChildTransaction(taskAttemptContext.getTaskAttemptID());
        if (childTxn == null)
            throw new IOException("child transaction lookup failed");
        tableWriter.txn(childTxn);
        return new HTableRecordWriter(tableWriter.buildTableWriter(), outputCommitter);
    } catch (Exception e) {
        throw new IOException(e);
    }
}

From source file:com.splicemachine.stream.output.SMOutputFormat.java

License:Apache License

@Override
public RecordWriter<RowLocation, Either<Exception, ExecRow>> getRecordWriter(
        TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
    try {/*from   ww  w  .  j  a v a 2  s  .  c o  m*/
        DataSetWriterBuilder dsWriter = TableWriterUtils
                .deserializeTableWriter(taskAttemptContext.getConfiguration());
        TxnView childTxn = outputCommitter.getChildTransaction(taskAttemptContext.getTaskAttemptID());
        if (childTxn == null)
            throw new IOException("child transaction lookup failed");
        dsWriter.txn(outputCommitter.getChildTransaction(taskAttemptContext.getTaskAttemptID()));
        return new SMRecordWriter(dsWriter.buildTableWriter(), outputCommitter);
    } catch (Exception e) {
        throw new IOException(e);
    }
}

From source file:com.splicemachine.stream.output.SpliceOutputCommitter.java

License:Apache License

@Override
public void setupTask(TaskAttemptContext taskContext) throws IOException {

    if (LOG.isDebugEnabled())
        SpliceLogUtils.debug(LOG, "setupTask");
    // Create child additive transaction so we don't read rows inserted by ourselves in this operation
    TxnView txn = SIDriver.driver().lifecycleManager().beginChildTransaction(parentTxn,
            parentTxn.getIsolationLevel(), true, destinationTable);
    ActiveWriteTxn childTxn = new ActiveWriteTxn(txn.getTxnId(), txn.getTxnId(), parentTxn, true,
            parentTxn.getIsolationLevel());
    taskAttemptMap.put(taskContext.getTaskAttemptID(), childTxn);
    if (LOG.isDebugEnabled())
        SpliceLogUtils.debug(LOG, "beginTxn=%s and destinationTable=%s", childTxn, destinationTable);

}