Example usage for org.apache.hadoop.mapreduce TaskAttemptContext getTaskAttemptID

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptContext getTaskAttemptID.

Prototype

public TaskAttemptID getTaskAttemptID();

Source Link

Document

Get the unique name for this task attempt.

Usage

From source file:com.bonc.mr_roamRecognition_hjpt.comm.NewFileOutputFormat.java

License:Apache License

/**
 * Generate a unique filename, based on the task id, name, and extension
 * //  www . j a va  2s. co  m
 * @param context
 *            the task that is calling this
 * @param name
 *            the base filename
 * @param extension
 *            the filename extension
 * @return a string like $name-[mrsct]-$id$extension
 */
public synchronized static String getUniqueFile(TaskAttemptContext context, String name, String extension) {
    TaskID taskId = context.getTaskAttemptID().getTaskID();
    int partition = taskId.getId();
    StringBuilder sb = new StringBuilder();
    sb.append(name);
    sb.append(extension);
    String result = sb.toString().replace("{fileCount}", NUMBER_FORMAT.format(partition));
    return result;
}

From source file:com.ci.backports.hadoop.hbase.ZHFileOutputFormat.java

License:Apache License

public RecordWriter<ImmutableBytesWritable, KeyValue> getRecordWriter(final TaskAttemptContext context)
        throws IOException, InterruptedException {
    // Get the path of the temporary output file
    final Path outputPath = FileOutputFormat.getOutputPath(context);
    final Path outputdir = new FileOutputCommitter(outputPath, context).getWorkPath();
    Configuration conf = context.getConfiguration();
    final FileSystem fs = outputdir.getFileSystem(conf);
    // These configs. are from hbase-*.xml
    final long maxsize = conf.getLong("hbase.hregion.max.filesize", HConstants.DEFAULT_MAX_FILE_SIZE);
    final int blocksize = conf.getInt("hfile.min.blocksize.size", HFile.DEFAULT_BLOCKSIZE);
    // Invented config.  Add to hbase-*.xml if other than default compression.
    final String compression = conf.get("hfile.compression", Compression.Algorithm.NONE.getName());

    return new RecordWriter<ImmutableBytesWritable, KeyValue>() {
        // Map of families to writers and how much has been output on the writer.
        private final Map<byte[], WriterLength> writers = new TreeMap<byte[], WriterLength>(
                Bytes.BYTES_COMPARATOR);
        private byte[] previousRow = HConstants.EMPTY_BYTE_ARRAY;
        private final byte[] now = Bytes.toBytes(System.currentTimeMillis());
        private boolean rollRequested = false;

        public void write(ImmutableBytesWritable row, KeyValue kv) throws IOException {
            // null input == user explicitly wants to flush
            if (row == null && kv == null) {
                rollWriters();// w  ww  . ja  v a  2  s . c o m
                return;
            }

            byte[] rowKey = kv.getRow();
            long length = kv.getLength();
            byte[] family = kv.getFamily();
            WriterLength wl = this.writers.get(family);

            // If this is a new column family, verify that the directory exists
            if (wl == null) {
                fs.mkdirs(new Path(outputdir, Bytes.toString(family)));
            }

            // If any of the HFiles for the column families has reached
            // maxsize, we need to roll all the writers
            if (wl != null && wl.written + length >= maxsize) {
                this.rollRequested = true;
            }

            // This can only happen once a row is finished though
            if (rollRequested && Bytes.compareTo(this.previousRow, rowKey) != 0) {
                rollWriters();
            }

            // create a new HLog writer, if necessary
            if (wl == null || wl.writer == null) {
                wl = getNewWriter(family);
            }

            // we now have the proper HLog writer. full steam ahead
            kv.updateLatestStamp(this.now);
            wl.writer.append(kv);
            wl.written += length;

            // Copy the row so we know when a row transition.
            this.previousRow = rowKey;
        }

        private void rollWriters() throws IOException {
            for (WriterLength wl : this.writers.values()) {
                if (wl.writer != null) {
                    LOG.info("Writer=" + wl.writer.getPath()
                            + ((wl.written == 0) ? "" : ", wrote=" + wl.written));
                    close(wl.writer);
                }
                wl.writer = null;
                wl.written = 0;
            }
            this.rollRequested = false;
        }

        /* Create a new HFile.Writer.
         * @param family
         * @return A WriterLength, containing a new HFile.Writer.
         * @throws IOException
         */
        private WriterLength getNewWriter(byte[] family) throws IOException {
            WriterLength wl = new WriterLength();
            Path familydir = new Path(outputdir, Bytes.toString(family));
            wl.writer = new HFile.Writer(fs, StoreFile.getUniqueFile(fs, familydir), blocksize, compression,
                    KeyValue.KEY_COMPARATOR);
            this.writers.put(family, wl);
            return wl;
        }

        private void close(final HFile.Writer w) throws IOException {
            if (w != null) {
                w.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY, Bytes.toBytes(System.currentTimeMillis()));
                w.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY,
                        Bytes.toBytes(context.getTaskAttemptID().toString()));
                w.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY, Bytes.toBytes(true));
                w.close();
            }
        }

        public void close(TaskAttemptContext c) throws IOException, InterruptedException {
            for (WriterLength wl : this.writers.values()) {
                close(wl.writer);
            }
        }
    };
}

From source file:com.citic.zxyjs.zwlscx.mapreduce.lib.input.HFileOutputFormatBase.java

License:Apache License

public RecordWriter<ImmutableBytesWritable, KeyValue> getRecordWriter(final TaskAttemptContext context)
        throws IOException, InterruptedException {
    // Get the path of the temporary output file
    final Path outputPath = FileOutputFormat.getOutputPath(context);
    final Path outputdir = new FileOutputCommitter(outputPath, context).getWorkPath();
    final Path ignoreOutputPath = new Path(outputPath + "_ignore");

    final Configuration conf = context.getConfiguration();
    final FileSystem fs = outputdir.getFileSystem(conf);
    // These configs. are from hbase-*.xml
    final long maxsize = conf.getLong(HConstants.HREGION_MAX_FILESIZE, HConstants.DEFAULT_MAX_FILE_SIZE);
    // Invented config. Add to hbase-*.xml if other than default
    // compression.
    final String defaultCompression = conf.get("hfile.compression", Compression.Algorithm.NONE.getName());
    final boolean compactionExclude = conf.getBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude",
            false);/*from  w w w. j a  va2 s . c o  m*/

    if (fs.exists(ignoreOutputPath)) {
        LOG.info("Deleted " + ignoreOutputPath.toString() + " success.");
        fs.delete(ignoreOutputPath, true);
    }

    // create a map from column family to the compression algorithm
    final Map<byte[], String> compressionMap = createFamilyCompressionMap(conf);
    final Map<byte[], String> bloomTypeMap = createFamilyBloomMap(conf);
    final Map<byte[], String> blockSizeMap = createFamilyBlockSizeMap(conf);

    String dataBlockEncodingStr = conf.get(DATABLOCK_ENCODING_CONF_KEY);
    final HFileDataBlockEncoder encoder;
    if (dataBlockEncodingStr == null) {
        encoder = NoOpDataBlockEncoder.INSTANCE;
    } else {
        try {
            encoder = new HFileDataBlockEncoderImpl(DataBlockEncoding.valueOf(dataBlockEncodingStr));
        } catch (IllegalArgumentException ex) {
            throw new RuntimeException("Invalid data block encoding type configured for the param "
                    + DATABLOCK_ENCODING_CONF_KEY + " : " + dataBlockEncodingStr);
        }
    }

    return new RecordWriter<ImmutableBytesWritable, KeyValue>() {
        // Map of families to writers and how much has been output on the
        // writer.
        private final Map<byte[], WriterLength> writers = new TreeMap<byte[], WriterLength>(
                Bytes.BYTES_COMPARATOR);
        private final FSDataOutputStream dos = fs.create(ignoreOutputPath);
        private byte[] previousRow = HConstants.EMPTY_BYTE_ARRAY;
        private final byte[] now = Bytes.toBytes(System.currentTimeMillis());
        private boolean rollRequested = false;

        public void write(ImmutableBytesWritable row, KeyValue kv) throws IOException {
            // null input == user explicitly wants to flush
            if (row == null && kv == null) {
                rollWriters();
                return;
            }

            byte[] rowKey = kv.getRow();
            long length = kv.getLength();
            byte[] family = kv.getFamily();

            if (ignore(kv)) {
                byte[] readBuf = rowKey;
                dos.write(readBuf, 0, readBuf.length);
                dos.write(Bytes.toBytes("\n"));
                return;
            }
            WriterLength wl = this.writers.get(family);

            // If this is a new column family, verify that the directory
            // exists
            if (wl == null) {
                Path path = null;
                path = new Path(outputdir, Bytes.toString(family));
                fs.mkdirs(path);
            }

            // If any of the HFiles for the column families has reached
            // maxsize, we need to roll all the writers
            if (wl != null && wl.written + length >= maxsize) {
                this.rollRequested = true;
            }

            // This can only happen once a row is finished though
            if (rollRequested && Bytes.compareTo(this.previousRow, rowKey) != 0) {
                rollWriters();
            }

            // create a new HLog writer, if necessary
            if (wl == null || wl.writer == null) {
                wl = getNewWriter(family, conf);
            }

            // we now have the proper HLog writer. full steam ahead
            kv.updateLatestStamp(this.now);
            wl.writer.append(kv);
            wl.written += length;

            // Copy the row so we know when a row transition.
            this.previousRow = rowKey;
        }

        private void rollWriters() throws IOException {
            for (WriterLength wl : this.writers.values()) {
                if (wl.writer != null) {
                    LOG.info("Writer=" + wl.writer.getPath()
                            + ((wl.written == 0) ? "" : ", wrote=" + wl.written));
                    close(wl.writer);
                }
                wl.writer = null;
                wl.written = 0;
            }
            this.rollRequested = false;
        }

        /*
         * Create a new StoreFile.Writer.
         * @param family
         * @return A WriterLength, containing a new StoreFile.Writer.
         * @throws IOException
         */
        private WriterLength getNewWriter(byte[] family, Configuration conf) throws IOException {
            WriterLength wl = new WriterLength();
            Path familydir = new Path(outputdir, Bytes.toString(family));
            String compression = compressionMap.get(family);
            compression = compression == null ? defaultCompression : compression;
            String bloomTypeStr = bloomTypeMap.get(family);
            BloomType bloomType = BloomType.NONE;
            if (bloomTypeStr != null) {
                bloomType = BloomType.valueOf(bloomTypeStr);
            }
            String blockSizeString = blockSizeMap.get(family);
            int blockSize = blockSizeString == null ? HConstants.DEFAULT_BLOCKSIZE
                    : Integer.parseInt(blockSizeString);
            Configuration tempConf = new Configuration(conf);
            tempConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f);
            wl.writer = new StoreFile.WriterBuilder(conf, new CacheConfig(tempConf), fs, blockSize)
                    .withOutputDir(familydir)
                    .withCompression(AbstractHFileWriter.compressionByName(compression))
                    .withBloomType(bloomType).withComparator(KeyValue.COMPARATOR).withDataBlockEncoder(encoder)
                    .withChecksumType(HStore.getChecksumType(conf))
                    .withBytesPerChecksum(HStore.getBytesPerChecksum(conf)).build();

            this.writers.put(family, wl);
            return wl;
        }

        private void close(final StoreFile.Writer w) throws IOException {
            if (w != null) {
                w.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY, Bytes.toBytes(System.currentTimeMillis()));
                w.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY,
                        Bytes.toBytes(context.getTaskAttemptID().toString()));
                w.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY, Bytes.toBytes(true));
                w.appendFileInfo(StoreFile.EXCLUDE_FROM_MINOR_COMPACTION_KEY, Bytes.toBytes(compactionExclude));
                w.appendTrackedTimestampsToMetadata();
                w.close();
            }
        }

        public void close(TaskAttemptContext c) throws IOException, InterruptedException {
            dos.flush();
            dos.close();
            for (WriterLength wl : this.writers.values()) {
                close(wl.writer);
            }
        }
    };
}

From source file:com.cloudera.dataflow.spark.ShardNameTemplateHelper.java

License:Open Source License

private static String getOutputFile(TaskAttemptContext context) {
    TaskID taskId = context.getTaskAttemptID().getTaskID();
    int partition = taskId.getId();

    String filePrefix = context.getConfiguration().get(OUTPUT_FILE_PREFIX);
    String fileTemplate = context.getConfiguration().get(OUTPUT_FILE_TEMPLATE);
    String fileSuffix = context.getConfiguration().get(OUTPUT_FILE_SUFFIX);
    return filePrefix + replaceShardNumber(fileTemplate, partition) + fileSuffix;
}

From source file:com.datasalt.pangool.solr.SolrRecordWriter.java

License:Apache License

private String getOutFileName(TaskAttemptContext context, String prefix) {
    TaskID taskId = context.getTaskAttemptID().getTaskID();
    int partition = taskId.getId();
    NumberFormat nf = NumberFormat.getInstance();
    nf.setMinimumIntegerDigits(5);//www  .j  a v a  2 s .  c o m
    nf.setGroupingUsed(false);
    StringBuilder result = new StringBuilder();
    result.append(prefix);
    result.append("-");
    result.append(nf.format(partition));
    return result.toString();
}

From source file:com.datasalt.pangool.solr.SolrRecordWriter.java

License:Apache License

public SolrRecordWriter(int batchSize, boolean outputZipFile, int threadCount, int queueSize,
        String localSolrHome, String zipName, TupleDocumentConverter converter, TaskAttemptContext context) {
    this.localSolrHome = localSolrHome;
    this.zipName = zipName;
    conf = context.getConfiguration();/*from  w  w w .ja  v  a2 s  .  co  m*/
    this.batchSize = batchSize;

    setLogLevel("org.apache.solr.core", "WARN");
    setLogLevel("org.apache.solr.update", "WARN");
    Logger.getLogger("org.apache.solr.core").setLevel(Level.WARN);
    Logger.getLogger("org.apache.solr.update").setLevel(Level.WARN);
    java.util.logging.Logger.getLogger("org.apache.solr.core").setLevel(java.util.logging.Level.WARNING);
    java.util.logging.Logger.getLogger("org.apache.solr.update").setLevel(java.util.logging.Level.WARNING);

    setLogLevel("org.apache.solr", "WARN");
    Logger.getLogger("org.apache.solr").setLevel(Level.WARN);
    java.util.logging.Logger.getLogger("org.apache.solr").setLevel(java.util.logging.Level.WARNING);

    heartBeater = new HeartBeater(context);
    try {
        heartBeater.needHeartBeat();
        /** The actual file in hdfs that holds the configuration. */
        this.outputZipFile = outputZipFile;

        this.fs = FileSystem.get(conf);
        perm = new Path(FileOutputFormat.getOutputPath(context), getOutFileName(context, "part"));

        // Make a task unique name that contains the actual index output name to
        // make debugging simpler
        // Note: if using JVM reuse, the sequence number will not be reset for a
        // new task using the jvm

        Path temp = conf.getLocalPath("mapred.local.dir",
                "solr_" + conf.get("mapred.task.id") + '.' + sequence.incrementAndGet());

        if (outputZipFile && !perm.getName().endsWith(".zip")) {
            perm = perm.suffix(".zip");
        }
        fs.delete(temp, true); // delete old, if any
        fs.delete(perm, true); // delete old, if any
        local = fs.startLocalOutput(perm, temp);

        solrHome = findSolrConfig(conf);

        // }
        // Verify that the solr home has a conf and lib directory
        if (solrHome == null) {
            throw new IOException("Unable to find solr home setting");
        }

        // Setup a solr instance that we can batch writes to
        LOG.info("SolrHome: " + solrHome.toUri());
        String dataDir = new File(local.toString(), "data").getAbsoluteFile().toString();
        // copy the schema to the conf dir
        File confDir = new File(local.toString(), "conf");
        confDir.mkdirs();

        File unpackedSolrHome = new File(solrHome.toString());
        FileUtils.copyDirectory(new File(unpackedSolrHome, "conf"), confDir);

        Properties props = new Properties();
        props.setProperty("solr.data.dir", dataDir);
        props.setProperty("solr.home", solrHome.toString());
        SolrResourceLoader loader = new SolrResourceLoader(solrHome.toString(), null, props);
        LOG.info(String.format(
                "Constructed instance information solr.home %s (%s), instance dir %s, conf dir %s, writing index to temporary directory %s, with permdir %s",
                solrHome, solrHome.toUri(), loader.getInstanceDir(), loader.getConfigDir(), dataDir, perm));
        CoreContainer container = new CoreContainer(loader);
        CoreDescriptor descr = new CoreDescriptor(container, "core1", solrHome.toString());
        descr.setDataDir(dataDir);
        descr.setCoreProperties(props);
        core = container.create(descr);
        container.register(core, false);
        solr = new EmbeddedSolrServer(container, "core1");
        batchWriter = new BatchWriter(solr, batchSize, context.getTaskAttemptID().getTaskID(), threadCount,
                queueSize);

        this.converter = converter;
    } catch (Exception e) {
        e.printStackTrace();
        LOG.error(e);
        throw new IllegalStateException(String.format("Failed to initialize record writer for %s, %s",
                context.getJobName(), conf.get("mapred.task.id")), e);
    } finally {
        heartBeater.cancelHeartBeat();
    }
}

From source file:com.datasalt.pangool.tuplemr.mapred.lib.output.ProxyOutputFormat.java

License:Apache License

@Override
public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException {
    createOutputFormatIfNeeded(context);

    String outDir = context.getConfiguration().get("mapred.output.dir");
    originalDir = outDir;/*from ww  w .  j  a va2  s.com*/
    FileOutputCommitter committer = (FileOutputCommitter) super.getOutputCommitter(context);
    baseDir = committer.getWorkPath() + "";
    Configuration conf = new Configuration(context.getConfiguration());
    TaskAttemptContext reContext;
    try {
        reContext = TaskAttemptContextFactory.get(conf, context.getTaskAttemptID());
    } catch (Exception e) {
        throw new IOException(e);
    }

    reContext.getConfiguration().set("mapred.output.dir", baseDir);
    // This is for Hadoop 2.0 :
    reContext.getConfiguration().set("mapreduce.output.fileoutputformat.outputdir", baseDir);

    try {
        return new ProxyOutputCommitter(new Path(originalDir), context,
                outputFormat.getOutputCommitter(reContext));
    } catch (InterruptedException e) {
        throw new RuntimeException(e);
    }
}

From source file:com.david.mos.out.FileOutputFormat.java

License:Apache License

/**
 * Generate a unique filename, based on the task id, name, and extension
 * @param context the task that is calling this
 * @param name the base filename/*www  .  j a v  a2  s .c  o m*/
 * @param extension the filename extension
 * @return a string like $name-[mrsct]-$id$extension
 */
public synchronized static String getUniqueFile(TaskAttemptContext context, String name, String extension) {
    TaskID taskId = context.getTaskAttemptID().getTaskID();
    int partition = taskId.getId();
    StringBuilder result = new StringBuilder();
    result.append(name); //TODO enb-r-00000
    System.out.println("###################################" + name);
    //    result.append('-');
    //    result.append(
    //        TaskID.getRepresentingCharacter(taskId.getTaskType()));
    //    result.append('-');
    //    result.append(NUMBER_FORMAT.format(partition));
    //    result.append(extension);
    return result.toString();
}

From source file:com.google.cloud.hadoop.util.HadoopToStringUtil.java

License:Open Source License

public static String toString(TaskAttemptContext input) {
    if (input == null) {
        return "null";
    }// w w  w  . j a v a2  s  .c om

    String result = "TaskAttemptContext::";
    result += " TaskAttemptID:" + input.getTaskAttemptID();
    result += " Status:" + input.getStatus();
    return result;
}

From source file:com.inmobi.conduit.distcp.tools.mapred.lib.TestDynamicInputFormat.java

License:Apache License

@Test
public void testGetSplits() throws Exception {
    DistCpOptions options = getOptions();
    Configuration configuration = new Configuration();
    configuration.set("mapred.map.tasks", String.valueOf(options.getMaxMaps()));
    CopyListing.getCopyListing(configuration, CREDENTIALS, options).buildListing(
            new Path(cluster.getFileSystem().getUri().toString() + "/tmp/testDynInputFormat/fileList.seq"),
            options);//from   w  w  w  .  j  av  a2  s.  c  om

    JobID jobId = new JobID();
    JobContext jobContext = mock(JobContext.class);
    when(jobContext.getConfiguration()).thenReturn(configuration);
    when(jobContext.getJobID()).thenReturn(jobId);
    DynamicInputFormat<Text, FileStatus> inputFormat = new DynamicInputFormat<Text, FileStatus>();
    List<InputSplit> splits = inputFormat.getSplits(jobContext);

    int nFiles = 0;
    int taskId = 0;

    for (InputSplit split : splits) {
        TaskAttemptID tId = new TaskAttemptID("", 0, true, taskId, 0);
        final TaskAttemptContext taskAttemptContext = mock(TaskAttemptContext.class);
        when(taskAttemptContext.getConfiguration()).thenReturn(configuration);
        when(taskAttemptContext.getTaskAttemptID()).thenReturn(tId);
        RecordReader<Text, FileStatus> recordReader = inputFormat.createRecordReader(split, taskAttemptContext);
        recordReader.initialize(splits.get(0), taskAttemptContext);
        float previousProgressValue = 0f;
        while (recordReader.nextKeyValue()) {
            FileStatus fileStatus = recordReader.getCurrentValue();
            String source = fileStatus.getPath().toString();
            System.out.println(source);
            Assert.assertTrue(expectedFilePaths.contains(source));
            final float progress = recordReader.getProgress();
            Assert.assertTrue(progress >= previousProgressValue);
            Assert.assertTrue(progress >= 0.0f);
            Assert.assertTrue(progress <= 1.0f);
            previousProgressValue = progress;
            ++nFiles;
        }
        Assert.assertTrue(recordReader.getProgress() == 1.0f);

        ++taskId;
    }

    Assert.assertEquals(expectedFilePaths.size(), nFiles);
}