Example usage for org.apache.hadoop.mapreduce TaskAttemptContext getTaskAttemptID

List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getTaskAttemptID

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptContext getTaskAttemptID.

Prototype

public TaskAttemptID getTaskAttemptID();

Source Link

Document

Get the unique name for this task attempt.

Usage

From source file:org.schedoscope.export.ftp.outputformat.FtpUploadOutputCommitter.java

License:Apache License

@Override
public void commitTask(TaskAttemptContext context) throws IOException {

    super.commitTask(context);

    String fileName = FtpUploadOutputFormat.getOutputName(context);
    String remote = endpoint + "/" + filePrefix + context.getTaskAttemptID().getTaskID().getId() + "-"
            + numReducer + FtpUploadOutputFormat.getOutputNameExtension();

    Configuration conf = context.getConfiguration();

    if (keyContent != null && !keyContent.isEmpty()) {

        File keyFile = File.createTempFile(TMP_FILE_PREFIX, TMP_FILE_SUFFIX);
        keyFile.deleteOnExit();//from w  w w.j  a  v  a 2 s  .  c  o m
        Files.write(keyContent.getBytes(StandardCharsets.US_ASCII), keyFile);

        uploader = new Uploader(user, new File(keyFile.getCanonicalPath()), pass, conf, passiveMode,
                userIsRoot);
    } else {
        uploader = new Uploader(user, pass, conf, passiveMode, userIsRoot);

    }
    uploader.uploadFile(new Path(outputPath, fileName).toString(), remote);
    uploader.closeFilesystem();
}

From source file:org.schedoscope.export.jdbc.outputformat.JdbcOutputFormat.java

License:Apache License

@Override
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException {

    Schema outputSchema = SchemaFactory.getSchema(context.getConfiguration());

    String tmpOutputTable = getTablePrefix(outputSchema) + outputSchema.getTable() + "_"
            + context.getTaskAttemptID().getTaskID().getId();
    String createTableQuery = outputSchema.getCreateTableQuery();

    createTableQuery = createTableQuery.replace(outputSchema.getTable(), tmpOutputTable);

    int commitSize = outputSchema.getCommitSize();
    String[] fieldNames = outputSchema.getColumnNames();

    try {/*from ww w. j ava  2  s.co m*/
        Connection connection = outputSchema.getConnection();

        JdbcQueryUtils.dropTable(tmpOutputTable, connection);
        JdbcQueryUtils.createTable(createTableQuery, connection);

        PreparedStatement statement = null;
        statement = connection.prepareStatement(JdbcQueryUtils.createInsertQuery(tmpOutputTable, fieldNames));

        return new JdbcRecordWriter(connection, statement, commitSize);

    } catch (Exception ex) {
        throw new IOException(ex.getMessage());
    }
}

From source file:org.tensorflow.hadoop.io.TFRecordFileTest.java

License:Open Source License

@Test
public void testInputOutputFormat() throws Exception {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf);//w w w .j  av  a  2s  .  com

    Path outdir = new Path(System.getProperty("test.build.data", "/tmp"), "tfr-test");

    TFRecordFileOutputFormat.setOutputPath(job, outdir);

    TaskAttemptContext context = MapReduceTestUtil.createDummyMapTaskAttemptContext(job.getConfiguration());
    OutputFormat<BytesWritable, NullWritable> outputFormat = new TFRecordFileOutputFormat();
    OutputCommitter committer = outputFormat.getOutputCommitter(context);
    committer.setupJob(job);
    RecordWriter<BytesWritable, NullWritable> writer = outputFormat.getRecordWriter(context);

    // Write Example with random numbers
    Random rand = new Random();
    Map<Long, Long> records = new TreeMap<Long, Long>();
    try {
        for (int i = 0; i < RECORDS; ++i) {
            long randValue = rand.nextLong();
            records.put((long) i, randValue);
            Int64List data = Int64List.newBuilder().addValue(i).addValue(randValue).build();
            Feature feature = Feature.newBuilder().setInt64List(data).build();
            Features features = Features.newBuilder().putFeature("data", feature).build();
            Example example = Example.newBuilder().setFeatures(features).build();
            BytesWritable key = new BytesWritable(example.toByteArray());
            writer.write(key, NullWritable.get());
        }
    } finally {
        writer.close(context);
    }
    committer.commitTask(context);
    committer.commitJob(job);

    // Read and compare
    TFRecordFileInputFormat.setInputPaths(job, outdir);
    InputFormat<BytesWritable, NullWritable> inputFormat = new TFRecordFileInputFormat();
    for (InputSplit split : inputFormat.getSplits(job)) {
        RecordReader<BytesWritable, NullWritable> reader = inputFormat.createRecordReader(split, context);
        MapContext<BytesWritable, NullWritable, BytesWritable, NullWritable> mcontext = new MapContextImpl<BytesWritable, NullWritable, BytesWritable, NullWritable>(
                job.getConfiguration(), context.getTaskAttemptID(), reader, null, null,
                MapReduceTestUtil.createDummyReporter(), split);
        reader.initialize(split, mcontext);
        try {
            while (reader.nextKeyValue()) {
                BytesWritable bytes = reader.getCurrentKey();
                Example example = Example.parseFrom(bytes.getBytes());
                Int64List data = example.getFeatures().getFeatureMap().get("data").getInt64List();
                Long key = data.getValue(0);
                Long value = data.getValue(1);
                assertEquals(records.get(key), value);
                records.remove(key);
            }
        } finally {
            reader.close();
        }
    }
    assertEquals(0, records.size());
}

From source file:parquet.hadoop.ParquetMultiOutputFormat.java

License:Apache License

/**
 * {@inheritDoc}//  w ww .  j ava 2s. c o  m
 */
@Override
public RecordWriter<K, T> getRecordWriter(TaskAttemptContext taskAttemptContext)
        throws IOException, InterruptedException {
    final Configuration conf = getConfiguration(taskAttemptContext);

    CompressionCodecName codec = getCodec(taskAttemptContext);
    String extension = codec.getExtension() + ".parquet";
    TaskID taskId = taskAttemptContext.getTaskAttemptID().getTaskID();

    Path workPath = ((ParquetMultiOutputCommitter) getOutputCommitter(taskAttemptContext)).getWorkPath();

    return getRecordWriter(conf, workPath, extension, String.format("%05d", taskId.getId()), codec);
}

From source file:simsql.runtime.RecordOutputFormat.java

License:Apache License

public RecordWriter<WritableKey, WritableValue> getRecordWriter(TaskAttemptContext job)
        throws IOException, InterruptedException {

    Configuration conf = job.getConfiguration();

    // here's what we do -- if we have a map-only job and a value for
    // lastInputSplit as given to us by RecordInputFormat, then we
    // will get our part number from that file. otherwise, we'll use
    // the one we get from the job.

    // get the part from the job.
    TaskID taskId = job.getTaskAttemptID().getTaskID();
    int part = taskId.getId();
    if (RecordOutputFormat.lastInputSplit != null && taskId.getTaskType() == TaskType.MAP) {

        part = RecordOutputFormat.getPartNumber(RecordOutputFormat.lastInputSplit);
        System.out.println("MAP-ONLY JOB: USING PART NUMBER " + part + " FROM INPUT SPLIT");

        // set it back to null
        RecordOutputFormat.lastInputSplit = null;
    }//ww  w. jav  a  2s.  c  o m

    FileOutputCommitter committer = (FileOutputCommitter) getOutputCommitter(job);
    Path file = new Path(committer.getWorkPath(), RecordOutputFormat.getFileNumber(part));

    /* Path file = getDefaultWorkFile (job, ".tbl"); */
    FileSystem fs = file.getFileSystem(conf);
    FSDataOutputStream fileOut = fs.create(file, false);
    return new OutputFileSerializer(fileOut);
}