List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getTaskAttemptID
public TaskAttemptID getTaskAttemptID();
From source file:org.schedoscope.export.ftp.outputformat.FtpUploadOutputCommitter.java
License:Apache License
@Override public void commitTask(TaskAttemptContext context) throws IOException { super.commitTask(context); String fileName = FtpUploadOutputFormat.getOutputName(context); String remote = endpoint + "/" + filePrefix + context.getTaskAttemptID().getTaskID().getId() + "-" + numReducer + FtpUploadOutputFormat.getOutputNameExtension(); Configuration conf = context.getConfiguration(); if (keyContent != null && !keyContent.isEmpty()) { File keyFile = File.createTempFile(TMP_FILE_PREFIX, TMP_FILE_SUFFIX); keyFile.deleteOnExit();//from w w w.j a v a 2 s . c o m Files.write(keyContent.getBytes(StandardCharsets.US_ASCII), keyFile); uploader = new Uploader(user, new File(keyFile.getCanonicalPath()), pass, conf, passiveMode, userIsRoot); } else { uploader = new Uploader(user, pass, conf, passiveMode, userIsRoot); } uploader.uploadFile(new Path(outputPath, fileName).toString(), remote); uploader.closeFilesystem(); }
From source file:org.schedoscope.export.jdbc.outputformat.JdbcOutputFormat.java
License:Apache License
@Override public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException { Schema outputSchema = SchemaFactory.getSchema(context.getConfiguration()); String tmpOutputTable = getTablePrefix(outputSchema) + outputSchema.getTable() + "_" + context.getTaskAttemptID().getTaskID().getId(); String createTableQuery = outputSchema.getCreateTableQuery(); createTableQuery = createTableQuery.replace(outputSchema.getTable(), tmpOutputTable); int commitSize = outputSchema.getCommitSize(); String[] fieldNames = outputSchema.getColumnNames(); try {/*from ww w. j ava 2 s.co m*/ Connection connection = outputSchema.getConnection(); JdbcQueryUtils.dropTable(tmpOutputTable, connection); JdbcQueryUtils.createTable(createTableQuery, connection); PreparedStatement statement = null; statement = connection.prepareStatement(JdbcQueryUtils.createInsertQuery(tmpOutputTable, fieldNames)); return new JdbcRecordWriter(connection, statement, commitSize); } catch (Exception ex) { throw new IOException(ex.getMessage()); } }
From source file:org.tensorflow.hadoop.io.TFRecordFileTest.java
License:Open Source License
@Test public void testInputOutputFormat() throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf);//w w w .j av a 2s . com Path outdir = new Path(System.getProperty("test.build.data", "/tmp"), "tfr-test"); TFRecordFileOutputFormat.setOutputPath(job, outdir); TaskAttemptContext context = MapReduceTestUtil.createDummyMapTaskAttemptContext(job.getConfiguration()); OutputFormat<BytesWritable, NullWritable> outputFormat = new TFRecordFileOutputFormat(); OutputCommitter committer = outputFormat.getOutputCommitter(context); committer.setupJob(job); RecordWriter<BytesWritable, NullWritable> writer = outputFormat.getRecordWriter(context); // Write Example with random numbers Random rand = new Random(); Map<Long, Long> records = new TreeMap<Long, Long>(); try { for (int i = 0; i < RECORDS; ++i) { long randValue = rand.nextLong(); records.put((long) i, randValue); Int64List data = Int64List.newBuilder().addValue(i).addValue(randValue).build(); Feature feature = Feature.newBuilder().setInt64List(data).build(); Features features = Features.newBuilder().putFeature("data", feature).build(); Example example = Example.newBuilder().setFeatures(features).build(); BytesWritable key = new BytesWritable(example.toByteArray()); writer.write(key, NullWritable.get()); } } finally { writer.close(context); } committer.commitTask(context); committer.commitJob(job); // Read and compare TFRecordFileInputFormat.setInputPaths(job, outdir); InputFormat<BytesWritable, NullWritable> inputFormat = new TFRecordFileInputFormat(); for (InputSplit split : inputFormat.getSplits(job)) { RecordReader<BytesWritable, NullWritable> reader = inputFormat.createRecordReader(split, context); MapContext<BytesWritable, NullWritable, BytesWritable, NullWritable> mcontext = new MapContextImpl<BytesWritable, NullWritable, BytesWritable, NullWritable>( job.getConfiguration(), context.getTaskAttemptID(), reader, null, null, MapReduceTestUtil.createDummyReporter(), split); reader.initialize(split, mcontext); try { while (reader.nextKeyValue()) { BytesWritable bytes = reader.getCurrentKey(); Example example = Example.parseFrom(bytes.getBytes()); Int64List data = example.getFeatures().getFeatureMap().get("data").getInt64List(); Long key = data.getValue(0); Long value = data.getValue(1); assertEquals(records.get(key), value); records.remove(key); } } finally { reader.close(); } } assertEquals(0, records.size()); }
From source file:parquet.hadoop.ParquetMultiOutputFormat.java
License:Apache License
/** * {@inheritDoc}// w ww . j ava 2s. c o m */ @Override public RecordWriter<K, T> getRecordWriter(TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { final Configuration conf = getConfiguration(taskAttemptContext); CompressionCodecName codec = getCodec(taskAttemptContext); String extension = codec.getExtension() + ".parquet"; TaskID taskId = taskAttemptContext.getTaskAttemptID().getTaskID(); Path workPath = ((ParquetMultiOutputCommitter) getOutputCommitter(taskAttemptContext)).getWorkPath(); return getRecordWriter(conf, workPath, extension, String.format("%05d", taskId.getId()), codec); }
From source file:simsql.runtime.RecordOutputFormat.java
License:Apache License
public RecordWriter<WritableKey, WritableValue> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException { Configuration conf = job.getConfiguration(); // here's what we do -- if we have a map-only job and a value for // lastInputSplit as given to us by RecordInputFormat, then we // will get our part number from that file. otherwise, we'll use // the one we get from the job. // get the part from the job. TaskID taskId = job.getTaskAttemptID().getTaskID(); int part = taskId.getId(); if (RecordOutputFormat.lastInputSplit != null && taskId.getTaskType() == TaskType.MAP) { part = RecordOutputFormat.getPartNumber(RecordOutputFormat.lastInputSplit); System.out.println("MAP-ONLY JOB: USING PART NUMBER " + part + " FROM INPUT SPLIT"); // set it back to null RecordOutputFormat.lastInputSplit = null; }//ww w. jav a 2s. c o m FileOutputCommitter committer = (FileOutputCommitter) getOutputCommitter(job); Path file = new Path(committer.getWorkPath(), RecordOutputFormat.getFileNumber(part)); /* Path file = getDefaultWorkFile (job, ".tbl"); */ FileSystem fs = file.getFileSystem(conf); FSDataOutputStream fileOut = fs.create(file, false); return new OutputFileSerializer(fileOut); }