Example usage for org.apache.hadoop.mapreduce TaskAttemptContext getTaskAttemptID

List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getTaskAttemptID

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptContext getTaskAttemptID.

Prototype

public TaskAttemptID getTaskAttemptID();

Source Link

Document

Get the unique name for this task attempt.

Usage

From source file:com.neusoft.hbase.test.hadoop.dataload.HFileOutputFormatBase.java

License:Apache License

public RecordWriter<ImmutableBytesWritable, KeyValue> getRecordWriter(// cellKeyValue
        //static <V extends Cell> RecordWriter<ImmutableBytesWritable, V> createRecordWriter()
        //getRecordWriter()
        final TaskAttemptContext context) throws IOException, InterruptedException {

    // Get the path of the temporary output file
    final Path outputPath = FileOutputFormat.getOutputPath(context);
    final Path outputdir = new FileOutputCommitter(outputPath, context).getWorkPath();
    final Path ignoreOutputPath = getDeleteRowKeyFile(outputPath);// 

    final Configuration conf = context.getConfiguration();
    final FileSystem fs = outputdir.getFileSystem(conf);
    // These configs. are from hbase-*.xml
    final long maxsize = conf.getLong(HConstants.HREGION_MAX_FILESIZE, HConstants.DEFAULT_MAX_FILE_SIZE);
    // Invented config. Add to hbase-*.xml if other than default
    // compression.
    final String defaultCompression = conf.get("hfile.compression", Compression.Algorithm.NONE.getName());
    final boolean compactionExclude = conf.getBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude",
            false);//from  www.j av a  2s.  c o m

    // create a map from column family to the compression algorithm
    final Map<byte[], String> compressionMap = createFamilyCompressionMap(conf);
    final Map<byte[], String> bloomTypeMap = createFamilyBloomMap(conf);
    final Map<byte[], String> blockSizeMap = createFamilyBlockSizeMap(conf);

    String dataBlockEncodingStr = conf.get(DATABLOCK_ENCODING_CONF_KEY);
    final HFileDataBlockEncoder encoder;
    if (dataBlockEncodingStr == null) {
        encoder = NoOpDataBlockEncoder.INSTANCE;
    } else {
        try {
            encoder = new HFileDataBlockEncoderImpl(DataBlockEncoding.valueOf(dataBlockEncodingStr));
        } catch (IllegalArgumentException ex) {
            throw new RuntimeException("Invalid data block encoding type configured for the param "
                    + DATABLOCK_ENCODING_CONF_KEY + " : " + dataBlockEncodingStr);
        }
    }

    return new RecordWriter<ImmutableBytesWritable, KeyValue>() {// VKeyValue

        // Map of families to writers and how much has been output on the
        // writer.
        private final Map<byte[], WriterLength> writers = new TreeMap<byte[], WriterLength>(
                Bytes.BYTES_COMPARATOR);
        private final FSDataOutputStream dos = fs.create(ignoreOutputPath);
        private byte[] previousRow = HConstants.EMPTY_BYTE_ARRAY;
        private final byte[] now = Bytes.toBytes(System.currentTimeMillis());
        private boolean rollRequested = false;

        public void write(ImmutableBytesWritable row, KeyValue kv)// V cellKeyValue kv

                throws IOException {
            // KeyValue kv = KeyValueUtil.ensureKeyValue(cell);//

            // null input == user explicitly wants to flush
            if (row == null && kv == null) {
                rollWriters();
                return;
            }

            byte[] rowKey = kv.getRow();
            long length = kv.getLength();
            byte[] family = kv.getFamily();

            if (ignore(kv)) {// if
                byte[] readBuf = rowKey;
                dos.write(readBuf, 0, readBuf.length);
                dos.write(Bytes.toBytes("\n"));
                return;
            }

            WriterLength wl = this.writers.get(family);

            // If this is a new column family, verify that the directory
            // exists
            if (wl == null) {
                fs.mkdirs(new Path(outputdir, Bytes.toString(family)));
            }

            // If any of the HFiles for the column families has reached
            // maxsize, we need to roll all the writers
            if (wl != null && wl.written + length >= maxsize) {
                this.rollRequested = true;
            }

            // This can only happen once a row is finished though
            if (rollRequested && Bytes.compareTo(this.previousRow, rowKey) != 0) {
                rollWriters();
            }

            // create a new HLog writer, if necessary
            if (wl == null || wl.writer == null) {
                wl = getNewWriter(family, conf);
            }

            // we now have the proper HLog writer. full steam ahead
            kv.updateLatestStamp(this.now);
            wl.writer.append(kv);
            wl.written += length;

            // Copy the row so we know when a row transition.
            this.previousRow = rowKey;
        }

        private void rollWriters() throws IOException {
            for (WriterLength wl : this.writers.values()) {
                if (wl.writer != null) {
                    LOG.info("Writer=" + wl.writer.getPath()
                            + ((wl.written == 0) ? "" : ", wrote=" + wl.written));
                    close(wl.writer);
                }
                wl.writer = null;
                wl.written = 0;
            }
            this.rollRequested = false;
        }

        /*
         * Create a new StoreFile.Writer.
         * 
         * @param family
         * 
         * @return A WriterLength, containing a new StoreFile.Writer.
         * 
         * @throws IOException
         */
        private WriterLength getNewWriter(byte[] family, Configuration conf) throws IOException {
            WriterLength wl = new WriterLength();
            Path familydir = new Path(outputdir, Bytes.toString(family));
            String compression = compressionMap.get(family);
            compression = compression == null ? defaultCompression : compression;
            String bloomTypeStr = bloomTypeMap.get(family);
            BloomType bloomType = BloomType.NONE;
            if (bloomTypeStr != null) {
                bloomType = BloomType.valueOf(bloomTypeStr);
            }
            String blockSizeString = blockSizeMap.get(family);
            int blockSize = blockSizeString == null ? HConstants.DEFAULT_BLOCKSIZE
                    : Integer.parseInt(blockSizeString);
            Configuration tempConf = new Configuration(conf);
            tempConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f);
            wl.writer = new StoreFile.WriterBuilder(conf, new CacheConfig(tempConf), fs)
                    .withOutputDir(familydir).withBloomType(bloomType).withComparator(KeyValue.COMPARATOR)
                    .build();

            this.writers.put(family, wl);
            return wl;
        }

        private void close(final StoreFile.Writer w) throws IOException {
            if (w != null) {
                w.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY, Bytes.toBytes(System.currentTimeMillis()));
                w.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY,
                        Bytes.toBytes(context.getTaskAttemptID().toString()));
                w.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY, Bytes.toBytes(true));
                w.appendFileInfo(StoreFile.EXCLUDE_FROM_MINOR_COMPACTION_KEY, Bytes.toBytes(compactionExclude));
                w.appendTrackedTimestampsToMetadata();
                w.close();
            }
        }

        public void close(TaskAttemptContext c) throws IOException, InterruptedException {
            dos.flush();// 
            dos.close();// 
            for (WriterLength wl : this.writers.values()) {
                close(wl.writer);
            }
        }

    };
}

From source file:com.pinterest.terrapin.hadoop.HFileOutputFormat.java

License:Apache License

public RecordWriter<BytesWritable, BytesWritable> getRecordWriter(TaskAttemptContext context)
        throws IOException {
    // Get the path of the temporary output file
    final Path outputPath = FileOutputFormat.getOutputPath(context);
    final Path outputDir = new FileOutputCommitter(outputPath, context).getWorkPath();
    final Configuration conf = context.getConfiguration();
    final FileSystem fs = outputDir.getFileSystem(conf);

    int blockSize = conf.getInt(Constants.HFILE_BLOCKSIZE, 16384);
    // Default to snappy.
    Compression.Algorithm compressionAlgorithm = getAlgorithm(conf.get(Constants.HFILE_COMPRESSION));
    final StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, new CacheConfig(conf), fs, blockSize)
            .withFilePath(hfilePath(outputPath, context.getTaskAttemptID().getTaskID().getId()))
            .withCompression(compressionAlgorithm).build();
    return new HFileRecordWriter(writer);
}

From source file:com.practicalHadoop.outputformat.MultpleDirectories.FileOutputCommitter.java

License:Apache License

/**
 * Create a file output committer/*from w  ww . j  a  va 2  s  . c  o  m*/
 * @param outputPath the job's output path
 * @param context the task's context
 * @throws IOException
 */
public FileOutputCommitter(Path outputPath, TaskAttemptContext context) throws IOException {
    super(outputPath, context);
    Job job = new Job(context.getConfiguration());
    String outputDirectories = job.getConfiguration().get(MULTIPLE_OUTPUTS, "");
    if (outputDirectories != null) {
        StringTokenizer st = new StringTokenizer(outputDirectories, " ");
        while (st.hasMoreTokens()) {
            pathNames.add(st.nextToken());
        }
    }
    if (outputPath != null) {
        this.outputPath = outputPath;
        outputFileSystem = outputPath.getFileSystem(context.getConfiguration());
        workPath = new Path(outputPath, (FileOutputCommitter.TEMP_DIR_NAME + Path.SEPARATOR + "_"
                + context.getTaskAttemptID().toString())).makeQualified(outputFileSystem);
        for (String p : pathNames) {
            if (outputPath.toString().endsWith(p)) {
                committers.put(p, this);
                fake = false;
                break;
            }
        }
    }
}

From source file:com.practicalHadoop.outputformat.MultpleDirectories.FileOutputCommitter.java

License:Apache License

/**
 * Move the files from the work directory to the job output directory
 * @param context the task context/*from w ww.j  a  v  a2s .  c o m*/
 */
public void commitTask(TaskAttemptContext context) throws IOException {
    if (!fake || (committers.size() == 0)) {
        TaskAttemptID attemptId = context.getTaskAttemptID();
        if (workPath != null) {
            context.progress();
            if (outputFileSystem.exists(workPath)) {
                // Move the task outputs to their final place
                moveTaskOutputs(context, outputFileSystem, outputPath, workPath);
                // Delete the temporary task-specific output directory
                if (!outputFileSystem.delete(workPath, true)) {
                    LOG.warn("Failed to delete the temporary output" + " directory of task: " + attemptId
                            + " - " + workPath);
                }
                LOG.info("Saved output of task '" + attemptId + "' to " + outputPath);
            }
        }
        //        commitJob(context);
    } else {
        for (FileOutputCommitter c : committers.values()) {
            c.commitTask(context);
        }
    }
}

From source file:com.practicalHadoop.outputformat.MultpleDirectories.FileOutputCommitter.java

License:Apache License

/**
 * Move all of the files from the work directory to the final output
 * @param context the task context// ww w  .j  a  va  2 s.c  om
 * @param fs the output file system
 * @param jobOutputDir the final output direcotry
 * @param taskOutput the work path
 * @throws IOException
 */
private void moveTaskOutputs(TaskAttemptContext context, FileSystem fs, Path jobOutputDir, Path taskOutput)
        throws IOException {
    TaskAttemptID attemptId = context.getTaskAttemptID();
    context.progress();
    if (fs.isFile(taskOutput)) {
        Path finalOutputPath = getFinalPath(jobOutputDir, taskOutput, workPath);
        if (!fs.rename(taskOutput, finalOutputPath)) {
            if (!fs.delete(finalOutputPath, true)) {
                throw new IOException("Failed to delete earlier output of task: " + attemptId);
            }
            if (!fs.rename(taskOutput, finalOutputPath)) {
                throw new IOException("Failed to save output of task: " + attemptId);
            }
        }
        LOG.debug("Moved " + taskOutput + " to " + finalOutputPath);
    } else if (fs.getFileStatus(taskOutput).isDir()) {
        FileStatus[] paths = fs.listStatus(taskOutput);
        Path finalOutputPath = getFinalPath(jobOutputDir, taskOutput, workPath);
        fs.mkdirs(finalOutputPath);
        if (paths != null) {
            for (FileStatus path : paths) {
                moveTaskOutputs(context, fs, jobOutputDir, path.getPath());
            }
        }
    }
}

From source file:com.scaleoutsoftware.soss.hserver.CachingRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
    this.split = (ImageInputSplit) split;
    this.context = context;
    LOG.info("Recording split: " + split);
    fallBackRecordReader.initialize(((ImageInputSplit) split).getFallbackInputSplit(), context);

    try {//w  ww  .j a  v a  2s. co m
        bucketStore = BucketStoreFactory.getBucketStore(this.split.getImageIdString());
        BucketId = ((ImageInputSplit) split).getBucketId();

        sossAvailable = true;

        if (BucketId.isDummyId()) //This split has never been recorded
        {
            BucketId = bucketStore.getNextLocalBucketId();
        }
        LOG.debug("Updating image before recording. " + this.split.getImageIdString());
        updateImageBeforeRecording(context.getTaskAttemptID().getTaskID().toString());
    } catch (StateServerException e) {
        LOG.error("Cannot connect to ScaleOut StateServer.", e);
        sossAvailable = false;
    }
    numberOfKeys = 0;
}

From source file:com.splicemachine.derby.stream.spark.fake.FakeOutputCommitter.java

License:Apache License

@Override
public void abortTask(TaskAttemptContext taskAttemptContext) throws IOException {
    String abortDirectory = taskAttemptContext.getConfiguration().get("abort.directory");
    File file = new File(abortDirectory, taskAttemptContext.getTaskAttemptID().getTaskID().toString());
    file.createNewFile();//  w w  w . ja v  a  2  s. co  m
}

From source file:com.splicemachine.stream.index.HTableOutputFormat.java

License:Apache License

@Override
public RecordWriter<byte[], Either<Exception, KVPair>> getRecordWriter(TaskAttemptContext taskAttemptContext)
        throws IOException, InterruptedException {
    try {//from   ww  w.  j ava  2s  .  com
        DataSetWriterBuilder tableWriter = TableWriterUtils
                .deserializeTableWriter(taskAttemptContext.getConfiguration());
        TxnView childTxn = outputCommitter.getChildTransaction(taskAttemptContext.getTaskAttemptID());
        if (childTxn == null)
            throw new IOException("child transaction lookup failed");
        tableWriter.txn(childTxn);
        return new HTableRecordWriter(tableWriter.buildTableWriter(), outputCommitter);
    } catch (Exception e) {
        throw new IOException(e);
    }
}

From source file:com.splicemachine.stream.output.SMOutputFormat.java

License:Apache License

@Override
public RecordWriter<RowLocation, Either<Exception, ExecRow>> getRecordWriter(
        TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
    try {/*from   ww  w  .  j  a v a 2  s  .  c o  m*/
        DataSetWriterBuilder dsWriter = TableWriterUtils
                .deserializeTableWriter(taskAttemptContext.getConfiguration());
        TxnView childTxn = outputCommitter.getChildTransaction(taskAttemptContext.getTaskAttemptID());
        if (childTxn == null)
            throw new IOException("child transaction lookup failed");
        dsWriter.txn(outputCommitter.getChildTransaction(taskAttemptContext.getTaskAttemptID()));
        return new SMRecordWriter(dsWriter.buildTableWriter(), outputCommitter);
    } catch (Exception e) {
        throw new IOException(e);
    }
}

From source file:com.splicemachine.stream.output.SpliceOutputCommitter.java

License:Apache License

@Override
public void setupTask(TaskAttemptContext taskContext) throws IOException {

    if (LOG.isDebugEnabled())
        SpliceLogUtils.debug(LOG, "setupTask");
    // Create child additive transaction so we don't read rows inserted by ourselves in this operation
    TxnView txn = SIDriver.driver().lifecycleManager().beginChildTransaction(parentTxn,
            parentTxn.getIsolationLevel(), true, destinationTable);
    ActiveWriteTxn childTxn = new ActiveWriteTxn(txn.getTxnId(), txn.getTxnId(), parentTxn, true,
            parentTxn.getIsolationLevel());
    taskAttemptMap.put(taskContext.getTaskAttemptID(), childTxn);
    if (LOG.isDebugEnabled())
        SpliceLogUtils.debug(LOG, "beginTxn=%s and destinationTable=%s", childTxn, destinationTable);

}