List of usage examples for org.apache.hadoop.io MapFile.Writer MapFile.Writer
@SuppressWarnings("unchecked") public Writer(Configuration conf, Path dirName, SequenceFile.Writer.Option... opts) throws IOException
From source file:org.datavec.hadoop.records.reader.TestMapFileRecordReader.java
License:Apache License
@BeforeClass public static void buildMapFiles() throws IOException { //----- Sequence RR setup ----- Configuration c = new Configuration(); Class<? extends WritableComparable> keyClass = LongWritable.class; Class<? extends Writable> valueClass = SequenceRecordWritable.class; SequenceFile.Writer.Option[] opts = new SequenceFile.Writer.Option[] { MapFile.Writer.keyClass(keyClass), SequenceFile.Writer.valueClass(valueClass) }; tempDirSeq = Files.createTempDir(); seqMapFilePath = new Path("file:///" + tempDirSeq.getAbsolutePath()); MapFile.Writer writer = new MapFile.Writer(c, seqMapFilePath, opts); seqMap = new HashMap<>(); seqMap.put(new LongWritable(0), new SequenceRecordWritable(Arrays.asList( Arrays.<org.datavec.api.writable.Writable>asList(new Text("zero"), new IntWritable(0), new DoubleWritable(0), new NDArrayWritable(Nd4j.valueArrayOf(10, 0.0))), Arrays.<org.datavec.api.writable.Writable>asList(new Text("one"), new IntWritable(1), new DoubleWritable(1.0), new NDArrayWritable(Nd4j.valueArrayOf(10, 1.0))), Arrays.<org.datavec.api.writable.Writable>asList(new Text("two"), new IntWritable(2), new DoubleWritable(2.0), new NDArrayWritable(Nd4j.valueArrayOf(10, 2.0)))))); seqMap.put(new LongWritable(1), new SequenceRecordWritable(Arrays.asList( Arrays.<org.datavec.api.writable.Writable>asList(new Text("Bzero"), new IntWritable(10), new DoubleWritable(10), new NDArrayWritable(Nd4j.valueArrayOf(10, 10.0))), Arrays.<org.datavec.api.writable.Writable>asList(new Text("Bone"), new IntWritable(11), new DoubleWritable(11.0), new NDArrayWritable(Nd4j.valueArrayOf(10, 11.0))), Arrays.<org.datavec.api.writable.Writable>asList(new Text("Btwo"), new IntWritable(12), new DoubleWritable(12.0), new NDArrayWritable(Nd4j.valueArrayOf(10, 12.0)))))); seqMap.put(new LongWritable(2), new SequenceRecordWritable(Arrays.asList( Arrays.<org.datavec.api.writable.Writable>asList(new Text("Czero"), new IntWritable(20), new DoubleWritable(20), new NDArrayWritable(Nd4j.valueArrayOf(10, 20.0))), Arrays.<org.datavec.api.writable.Writable>asList(new Text("Cone"), new IntWritable(21), new DoubleWritable(21.0), new NDArrayWritable(Nd4j.valueArrayOf(10, 21.0))), Arrays.<org.datavec.api.writable.Writable>asList(new Text("Ctwo"), new IntWritable(22), new DoubleWritable(22.0), new NDArrayWritable(Nd4j.valueArrayOf(10, 22.0)))))); //Need to write in order for (int i = 0; i <= 2; i++) { LongWritable key = new LongWritable(i); SequenceRecordWritable value = seqMap.get(key); writer.append(key, value);/*from w w w .j a v a 2 s . c om*/ } writer.close(); //----- Standard RR setup ----- valueClass = RecordWritable.class; opts = new SequenceFile.Writer.Option[] { MapFile.Writer.keyClass(keyClass), SequenceFile.Writer.valueClass(valueClass) }; tempDir = Files.createTempDir(); mapFilePath = new Path("file:///" + tempDir.getAbsolutePath()); writer = new MapFile.Writer(c, mapFilePath, opts); recordMap = new HashMap<>(); recordMap.put(new LongWritable(0), new RecordWritable( Arrays.<org.datavec.api.writable.Writable>asList(new Text("zero"), new IntWritable(0), new DoubleWritable(0), new NDArrayWritable(Nd4j.valueArrayOf(10, 0.0))))); recordMap.put(new LongWritable(1), new RecordWritable( Arrays.<org.datavec.api.writable.Writable>asList(new Text("one"), new IntWritable(11), new DoubleWritable(11.0), new NDArrayWritable(Nd4j.valueArrayOf(10, 11.0))))); recordMap.put(new LongWritable(2), new RecordWritable( Arrays.<org.datavec.api.writable.Writable>asList(new Text("two"), new IntWritable(22), new DoubleWritable(22.0), new NDArrayWritable(Nd4j.valueArrayOf(10, 22.0))))); //Need to write in order for (int i = 0; i <= 2; i++) { LongWritable key = new LongWritable(i); RecordWritable value = recordMap.get(key); writer.append(key, value); } writer.close(); }
From source file:org.datavec.hadoop.records.reader.TestMapFileRecordReaderMultipleParts.java
License:Apache License
@BeforeClass public static void buildMapFiles() throws IOException { //----- Sequence RR setup ----- Configuration c = new Configuration(); Class<? extends WritableComparable> keyClass = LongWritable.class; Class<? extends Writable> valueClass = SequenceRecordWritable.class; SequenceFile.Writer.Option[] opts = new SequenceFile.Writer.Option[] { MapFile.Writer.keyClass(keyClass), SequenceFile.Writer.valueClass(valueClass) }; tempDirSeq = Files.createTempDir(); File[] subdirs = new File[3]; Path[] paths = new Path[subdirs.length]; MapFile.Writer[] writers = new MapFile.Writer[subdirs.length]; for (int i = 0; i < subdirs.length; i++) { subdirs[i] = new File(tempDirSeq, "part-r-0000" + i); subdirs[i].mkdir();// ww w . j a v a 2s . c o m paths[i] = new Path("file:///" + subdirs[i].getAbsolutePath()); writers[i] = new MapFile.Writer(c, paths[i], opts); } seqMapFilePath = new Path("file:///" + tempDirSeq.getAbsolutePath()); seqMap = new HashMap<>(); for (int i = 0; i < 9; i++) { seqMap.put(new LongWritable(i), new SequenceRecordWritable(Arrays.asList( Arrays.<org.datavec.api.writable.Writable>asList(new Text(i + "-0"), new IntWritable(3 * i), new DoubleWritable(3 * i)), Arrays.<org.datavec.api.writable.Writable>asList(new Text(i + "-1"), new IntWritable(3 * i + 1), new DoubleWritable(3 * i + 1.0)), Arrays.<org.datavec.api.writable.Writable>asList(new Text(i + "-2"), new IntWritable(3 * i + 2), new DoubleWritable(3 * i + 2.0))))); } //Need to write in order, to different map files separately for (int i = 0; i < seqMap.size(); i++) { int mapFileIdx = i / writers.length; LongWritable key = new LongWritable(i); SequenceRecordWritable value = seqMap.get(key); writers[mapFileIdx].append(key, value); } for (MapFile.Writer m : writers) { m.close(); } //----- Standard RR setup ----- valueClass = RecordWritable.class; opts = new SequenceFile.Writer.Option[] { MapFile.Writer.keyClass(keyClass), SequenceFile.Writer.valueClass(valueClass) }; tempDir = Files.createTempDir(); subdirs = new File[3]; paths = new Path[subdirs.length]; writers = new MapFile.Writer[subdirs.length]; for (int i = 0; i < subdirs.length; i++) { subdirs[i] = new File(tempDir, "part-r-0000" + i); subdirs[i].mkdir(); paths[i] = new Path("file:///" + subdirs[i].getAbsolutePath()); writers[i] = new MapFile.Writer(c, paths[i], opts); } mapFilePath = new Path("file:///" + tempDir.getAbsolutePath()); recordMap = new HashMap<>(); for (int i = 0; i < 9; i++) { recordMap.put(new LongWritable(i), new RecordWritable(Arrays.<org.datavec.api.writable.Writable>asList(new Text(String.valueOf(i)), new IntWritable(i), new DoubleWritable(i)))); } //Need to write in order for (int i = 0; i < recordMap.size(); i++) { int mapFileIdx = i / writers.length; LongWritable key = new LongWritable(i); RecordWritable value = recordMap.get(key); writers[mapFileIdx].append(key, value); } for (MapFile.Writer m : writers) { m.close(); } }
From source file:org.datavec.hadoop.records.reader.TestMapFileRecordReaderMultiplePartsSomeEmpty.java
License:Apache License
@BeforeClass public static void buildMapFiles() throws IOException { //----- Sequence RR setup ----- Configuration c = new Configuration(); Class<? extends WritableComparable> keyClass = LongWritable.class; Class<? extends Writable> valueClass = SequenceRecordWritable.class; SequenceFile.Writer.Option[] opts = new SequenceFile.Writer.Option[] { MapFile.Writer.keyClass(keyClass), SequenceFile.Writer.valueClass(valueClass) }; tempDirSeq = Files.createTempDir(); File[] subdirs = new File[3]; Path[] paths = new Path[subdirs.length]; MapFile.Writer[] writers = new MapFile.Writer[subdirs.length]; for (int i = 0; i < subdirs.length; i++) { subdirs[i] = new File(tempDirSeq, "part-r-0000" + i); subdirs[i].mkdir();/* www. j av a 2 s. com*/ paths[i] = new Path("file:///" + subdirs[i].getAbsolutePath()); writers[i] = new MapFile.Writer(c, paths[i], opts); } seqMapFilePath = new Path("file:///" + tempDirSeq.getAbsolutePath()); seqMap = new HashMap<>(); for (int i = 0; i < 6; i++) { seqMap.put(new LongWritable(i), new SequenceRecordWritable(Arrays.asList( Arrays.<org.datavec.api.writable.Writable>asList(new Text(i + "-0"), new IntWritable(3 * i), new DoubleWritable(3 * i)), Arrays.<org.datavec.api.writable.Writable>asList(new Text(i + "-1"), new IntWritable(3 * i + 1), new DoubleWritable(3 * i + 1.0)), Arrays.<org.datavec.api.writable.Writable>asList(new Text(i + "-2"), new IntWritable(3 * i + 2), new DoubleWritable(3 * i + 2.0))))); } //Need to write in order, to different map files separately for (int i = 0; i < seqMap.size(); i++) { int mapFileIdx; if (i < 3) { mapFileIdx = 0; } else { mapFileIdx = 2; } LongWritable key = new LongWritable(i); SequenceRecordWritable value = seqMap.get(key); writers[mapFileIdx].append(key, value); } for (MapFile.Writer m : writers) { m.close(); } //----- Standard RR setup ----- valueClass = RecordWritable.class; opts = new SequenceFile.Writer.Option[] { MapFile.Writer.keyClass(keyClass), SequenceFile.Writer.valueClass(valueClass) }; tempDir = Files.createTempDir(); subdirs = new File[3]; paths = new Path[subdirs.length]; writers = new MapFile.Writer[subdirs.length]; for (int i = 0; i < subdirs.length; i++) { subdirs[i] = new File(tempDir, "part-r-0000" + i); subdirs[i].mkdir(); paths[i] = new Path("file:///" + subdirs[i].getAbsolutePath()); writers[i] = new MapFile.Writer(c, paths[i], opts); } mapFilePath = new Path("file:///" + tempDir.getAbsolutePath()); recordMap = new HashMap<>(); for (int i = 0; i < 6; i++) { recordMap.put(new LongWritable(i), new RecordWritable(Arrays.<org.datavec.api.writable.Writable>asList(new Text(String.valueOf(i)), new IntWritable(i), new DoubleWritable(i)))); } //Need to write in order for (int i = 0; i < recordMap.size(); i++) { int mapFileIdx; if (i < 3) { mapFileIdx = 0; } else { mapFileIdx = 2; } LongWritable key = new LongWritable(i); RecordWritable value = recordMap.get(key); writers[mapFileIdx].append(key, value); } for (MapFile.Writer m : writers) { m.close(); } }