List of usage examples for org.apache.hadoop.mapreduce RecordReader initialize
public abstract void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException;
From source file:org.mrgeo.format.AutoFeatureInputFormat.java
License:Apache License
@Override public RecordReader<LongWritable, Geometry> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { RecordReader<LongWritable, Geometry> result = new AutoRecordReader(); result.initialize(split, context); return result; }
From source file:org.mrgeo.format.AutoGeometryInputFormat.java
License:Apache License
@Override public RecordReader<LongWritable, GeometryWritable> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { RecordReader<LongWritable, GeometryWritable> result = new AutoRecordReader(); result.initialize(split, context); return result; }
From source file:org.mrgeo.format.CsvInputFormatTest.java
License:Apache License
@Test @Category(UnitTest.class) public void testNullIgnore() throws Exception { FileSystem fs = new RawLocalFileSystem(); try {//from ww w . ja va 2s . c om int lineCount = 0; // Write columns file which defines the columns title and type String cstr = "<?xml version='1.0' encoding='UTF-8'?>\n<AllColumns firstLineHeader='false'>\n"; cstr += " <Column name='name' type='Nominal'/>\n"; cstr += " <Column name='x' type='Numeric'/>\n"; cstr += " <Column name='y' type='Numeric'/>\n"; cstr += "</AllColumns>\n"; FileOutputStream fos = new FileOutputStream(output + "/nulXY.csv.columns"); PrintStream ps = new PrintStream(fos); ps.print(cstr); ps.close(); // Write csv test data fos = new FileOutputStream(output + "/nullXY.csv"); ps = new PrintStream(fos); // populated rows for (int ii = 0; ii < 10; ii++) { ps.print("ASDF,1.0,1.0\n"); lineCount++; } // empty rows ps.print("ASDF,,1.0\n"); ps.print("ASDF,1.0,\n"); ps.print("ASDF,,\n"); lineCount += 3; // populated rows for (int ii = 0; ii < 5; ii++) { ps.print("ASDF,1.0,1.0\n"); lineCount++; } ps.close(); System.out.println(output + "nulXY.csv"); Job j = new Job(new Configuration()); Configuration c = j.getConfiguration(); fs.setConf(c); Path testFile = new Path(output, "nullXY.csv"); testFile = fs.makeQualified(testFile); InputSplit split; long l; long start; TextInputFormat format = new TextInputFormat(); split = new FileSplit(testFile, 0, lineCount * 1000, null); RecordReader<LongWritable, Text> reader2 = format.createRecordReader(split, HadoopUtils.createTaskAttemptContext(c, new TaskAttemptID())); reader2.initialize(split, HadoopUtils.createTaskAttemptContext(c, new TaskAttemptID())); l = 0; start = System.currentTimeMillis(); while (reader2.nextKeyValue()) { reader2.getCurrentValue().toString(); l++; } Assert.assertEquals(lineCount, l); System.out.printf("text line reader with null x,y ignore: %d\n", System.currentTimeMillis() - start); } catch (Exception e) { e.printStackTrace(); throw e; } finally { fs.close(); } }
From source file:org.mrgeo.hdfs.ingest.format.IngestImageSplittingInputFormat.java
License:Apache License
@Override public RecordReader<TileIdWritable, RasterWritable> createRecordReader(final InputSplit split, final TaskAttemptContext context) throws IOException, InterruptedException { final RecordReader<TileIdWritable, RasterWritable> reader = new IngestImageSplittingRecordReader(); //FIXME: This seems to be called from AutoFeatureInputFormat.initialize() reader.initialize(split, context); return reader; }
From source file:org.mrgeo.hdfs.vector.ShpInputFormatTest.java
License:Apache License
public RecordReader<FeatureIdWritable, Geometry> openReader(Path p) throws IOException, InterruptedException { Job j = new Job(new Configuration()); Configuration c = j.getConfiguration(); try (FileSystem fs = new RawLocalFileSystem()) { fs.setConf(c);/* w w w.j av a 2 s . c o m*/ Path testFile = fs.makeQualified(p); c.set("mapred.input.dir", testFile.toString()); ShpInputFormat format = new ShpInputFormat(); InputSplit split = format.getSplits(j).get(0); TaskAttemptContext context = HadoopUtils.createTaskAttemptContext(c, new TaskAttemptID()); RecordReader<FeatureIdWritable, Geometry> reader = format.createRecordReader(split, context); reader.initialize(split, context); return reader; } }
From source file:org.mrgeo.mapreduce.ingestvector.IngestVectorGeometryInputFormat.java
License:Apache License
@Override public RecordReader<LongWritable, GeometryWritable> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { final RecordReader<LongWritable, GeometryWritable> reader = new IngestVectorRecordReader(); reader.initialize(split, context); return reader; }
From source file:org.msgpack.hadoop.mapreduce.input.TestMessagePackInputFormat.java
License:Apache License
void checkFormat(Job job) throws Exception { TaskAttemptContext attemptContext = new TaskAttemptContext(job.getConfiguration(), new TaskAttemptID("123", 0, false, 1, 2)); MessagePackInputFormat format = new MessagePackInputFormat(); FileInputFormat.setInputPaths(job, workDir); List<InputSplit> splits = format.getSplits(job); assertEquals(1, splits.size());//from w ww . j a va 2 s . c om for (int j = 0; j < splits.size(); j++) { RecordReader<LongWritable, MessagePackWritable> reader = format.createRecordReader(splits.get(j), attemptContext); reader.initialize(splits.get(j), attemptContext); int count = 0; try { while (reader.nextKeyValue()) { LongWritable key = reader.getCurrentKey(); Value val = reader.getCurrentValue().get(); assertEquals(count, val.asIntegerValue().getLong()); count++; } } finally { reader.close(); } } }
From source file:org.tensorflow.hadoop.io.TFRecordFileTest.java
License:Open Source License
@Test public void testInputOutputFormat() throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf);//from w w w .jav a2 s. c om Path outdir = new Path(System.getProperty("test.build.data", "/tmp"), "tfr-test"); TFRecordFileOutputFormat.setOutputPath(job, outdir); TaskAttemptContext context = MapReduceTestUtil.createDummyMapTaskAttemptContext(job.getConfiguration()); OutputFormat<BytesWritable, NullWritable> outputFormat = new TFRecordFileOutputFormat(); OutputCommitter committer = outputFormat.getOutputCommitter(context); committer.setupJob(job); RecordWriter<BytesWritable, NullWritable> writer = outputFormat.getRecordWriter(context); // Write Example with random numbers Random rand = new Random(); Map<Long, Long> records = new TreeMap<Long, Long>(); try { for (int i = 0; i < RECORDS; ++i) { long randValue = rand.nextLong(); records.put((long) i, randValue); Int64List data = Int64List.newBuilder().addValue(i).addValue(randValue).build(); Feature feature = Feature.newBuilder().setInt64List(data).build(); Features features = Features.newBuilder().putFeature("data", feature).build(); Example example = Example.newBuilder().setFeatures(features).build(); BytesWritable key = new BytesWritable(example.toByteArray()); writer.write(key, NullWritable.get()); } } finally { writer.close(context); } committer.commitTask(context); committer.commitJob(job); // Read and compare TFRecordFileInputFormat.setInputPaths(job, outdir); InputFormat<BytesWritable, NullWritable> inputFormat = new TFRecordFileInputFormat(); for (InputSplit split : inputFormat.getSplits(job)) { RecordReader<BytesWritable, NullWritable> reader = inputFormat.createRecordReader(split, context); MapContext<BytesWritable, NullWritable, BytesWritable, NullWritable> mcontext = new MapContextImpl<BytesWritable, NullWritable, BytesWritable, NullWritable>( job.getConfiguration(), context.getTaskAttemptID(), reader, null, null, MapReduceTestUtil.createDummyReporter(), split); reader.initialize(split, mcontext); try { while (reader.nextKeyValue()) { BytesWritable bytes = reader.getCurrentKey(); Example example = Example.parseFrom(bytes.getBytes()); Int64List data = example.getFeatures().getFeatureMap().get("data").getInt64List(); Long key = data.getValue(0); Long value = data.getValue(1); assertEquals(records.get(key), value); records.remove(key); } } finally { reader.close(); } } assertEquals(0, records.size()); }
From source file:org.warcbase.io.GenericArchiveRecordWritableTest.java
License:Apache License
@Test public void testArcInputFormat() throws Exception { String arcFile = Resources.getResource("arc/example.arc.gz").getPath(); Configuration conf = new Configuration(false); conf.set("fs.defaultFS", "file:///"); File testFile = new File(arcFile); Path path = new Path(testFile.getAbsoluteFile().toURI()); FileSplit split = new FileSplit(path, 0, testFile.length(), null); InputFormat<LongWritable, GenericArchiveRecordWritable> inputFormat = ReflectionUtils .newInstance(WacGenericInputFormat.class, conf); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); RecordReader<LongWritable, GenericArchiveRecordWritable> reader = inputFormat.createRecordReader(split, context);/* w ww .jav a 2s. c om*/ reader.initialize(split, context); int cnt = 0; while (reader.nextKeyValue()) { GenericArchiveRecordWritable record = reader.getCurrentValue(); cnt++; ByteArrayOutputStream bytesOut = new ByteArrayOutputStream(); DataOutputStream dataOut = new DataOutputStream(bytesOut); record.write(dataOut); GenericArchiveRecordWritable reconstructed = new GenericArchiveRecordWritable(); reconstructed.setFormat(ArchiveFormat.ARC); reconstructed.readFields(new DataInputStream(new ByteArrayInputStream(bytesOut.toByteArray()))); boolean isArc = (record.getFormat() == ArchiveFormat.ARC); assertEquals(isArc, true); if (isArc) { assertEquals(((ARCRecord) record.getRecord()).getMetaData().getUrl(), ((ARCRecord) reconstructed.getRecord()).getMetaData().getUrl()); } } assertEquals(300, cnt); }
From source file:org.warcbase.io.GenericArchiveRecordWritableTest.java
License:Apache License
@Test public void testWarcInputFormat() throws Exception { String warcFile = Resources.getResource("warc/example.warc.gz").getPath(); Configuration conf = new Configuration(false); conf.set("fs.defaultFS", "file:///"); File testFile = new File(warcFile); Path path = new Path(testFile.getAbsoluteFile().toURI()); FileSplit split = new FileSplit(path, 0, testFile.length(), null); InputFormat<LongWritable, GenericArchiveRecordWritable> inputFormat = ReflectionUtils .newInstance(WacGenericInputFormat.class, conf); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); RecordReader<LongWritable, GenericArchiveRecordWritable> reader = inputFormat.createRecordReader(split, context);/* www . j av a 2s .com*/ reader.initialize(split, context); int cnt = 0; while (reader.nextKeyValue()) { GenericArchiveRecordWritable record = reader.getCurrentValue(); cnt++; ByteArrayOutputStream bytesOut = new ByteArrayOutputStream(); DataOutputStream dataOut = new DataOutputStream(bytesOut); record.write(dataOut); GenericArchiveRecordWritable reconstructed = new GenericArchiveRecordWritable(); reconstructed.setFormat(ArchiveFormat.WARC); reconstructed.readFields(new DataInputStream(new ByteArrayInputStream(bytesOut.toByteArray()))); boolean isWarc = (record.getFormat() == ArchiveFormat.WARC); assertTrue(isWarc); if (isWarc) { assertEquals(record.getRecord().getHeader().getUrl(), reconstructed.getRecord().getHeader().getUrl()); assertEquals(record.getRecord().getHeader().getContentLength(), reconstructed.getRecord().getHeader().getContentLength()); } } assertEquals(822, cnt); }