Example usage for org.apache.hadoop.io MapFile.Writer MapFile.Writer

List of usage examples for org.apache.hadoop.io MapFile.Writer MapFile.Writer

Introduction

In this page you can find the example usage for org.apache.hadoop.io MapFile.Writer MapFile.Writer.

Prototype

@SuppressWarnings("unchecked")
        public Writer(Configuration conf, Path dirName, SequenceFile.Writer.Option... opts) throws IOException 

Source Link

Usage

From source file:org.datavec.hadoop.records.reader.TestMapFileRecordReader.java

License:Apache License

@BeforeClass
public static void buildMapFiles() throws IOException {

    //----- Sequence RR setup -----

    Configuration c = new Configuration();
    Class<? extends WritableComparable> keyClass = LongWritable.class;
    Class<? extends Writable> valueClass = SequenceRecordWritable.class;

    SequenceFile.Writer.Option[] opts = new SequenceFile.Writer.Option[] { MapFile.Writer.keyClass(keyClass),
            SequenceFile.Writer.valueClass(valueClass) };

    tempDirSeq = Files.createTempDir();
    seqMapFilePath = new Path("file:///" + tempDirSeq.getAbsolutePath());

    MapFile.Writer writer = new MapFile.Writer(c, seqMapFilePath, opts);

    seqMap = new HashMap<>();
    seqMap.put(new LongWritable(0),
            new SequenceRecordWritable(Arrays.asList(
                    Arrays.<org.datavec.api.writable.Writable>asList(new Text("zero"), new IntWritable(0),
                            new DoubleWritable(0), new NDArrayWritable(Nd4j.valueArrayOf(10, 0.0))),
                    Arrays.<org.datavec.api.writable.Writable>asList(new Text("one"), new IntWritable(1),
                            new DoubleWritable(1.0), new NDArrayWritable(Nd4j.valueArrayOf(10, 1.0))),
                    Arrays.<org.datavec.api.writable.Writable>asList(new Text("two"), new IntWritable(2),
                            new DoubleWritable(2.0), new NDArrayWritable(Nd4j.valueArrayOf(10, 2.0))))));

    seqMap.put(new LongWritable(1),
            new SequenceRecordWritable(Arrays.asList(
                    Arrays.<org.datavec.api.writable.Writable>asList(new Text("Bzero"), new IntWritable(10),
                            new DoubleWritable(10), new NDArrayWritable(Nd4j.valueArrayOf(10, 10.0))),
                    Arrays.<org.datavec.api.writable.Writable>asList(new Text("Bone"), new IntWritable(11),
                            new DoubleWritable(11.0), new NDArrayWritable(Nd4j.valueArrayOf(10, 11.0))),
                    Arrays.<org.datavec.api.writable.Writable>asList(new Text("Btwo"), new IntWritable(12),
                            new DoubleWritable(12.0), new NDArrayWritable(Nd4j.valueArrayOf(10, 12.0))))));

    seqMap.put(new LongWritable(2),
            new SequenceRecordWritable(Arrays.asList(
                    Arrays.<org.datavec.api.writable.Writable>asList(new Text("Czero"), new IntWritable(20),
                            new DoubleWritable(20), new NDArrayWritable(Nd4j.valueArrayOf(10, 20.0))),
                    Arrays.<org.datavec.api.writable.Writable>asList(new Text("Cone"), new IntWritable(21),
                            new DoubleWritable(21.0), new NDArrayWritable(Nd4j.valueArrayOf(10, 21.0))),
                    Arrays.<org.datavec.api.writable.Writable>asList(new Text("Ctwo"), new IntWritable(22),
                            new DoubleWritable(22.0), new NDArrayWritable(Nd4j.valueArrayOf(10, 22.0))))));

    //Need to write in order
    for (int i = 0; i <= 2; i++) {
        LongWritable key = new LongWritable(i);
        SequenceRecordWritable value = seqMap.get(key);

        writer.append(key, value);/*from w  w  w .j a v a  2 s  .  c  om*/
    }
    writer.close();

    //----- Standard RR setup -----

    valueClass = RecordWritable.class;

    opts = new SequenceFile.Writer.Option[] { MapFile.Writer.keyClass(keyClass),
            SequenceFile.Writer.valueClass(valueClass) };

    tempDir = Files.createTempDir();
    mapFilePath = new Path("file:///" + tempDir.getAbsolutePath());

    writer = new MapFile.Writer(c, mapFilePath, opts);

    recordMap = new HashMap<>();
    recordMap.put(new LongWritable(0),
            new RecordWritable(
                    Arrays.<org.datavec.api.writable.Writable>asList(new Text("zero"), new IntWritable(0),
                            new DoubleWritable(0), new NDArrayWritable(Nd4j.valueArrayOf(10, 0.0)))));

    recordMap.put(new LongWritable(1),
            new RecordWritable(
                    Arrays.<org.datavec.api.writable.Writable>asList(new Text("one"), new IntWritable(11),
                            new DoubleWritable(11.0), new NDArrayWritable(Nd4j.valueArrayOf(10, 11.0)))));

    recordMap.put(new LongWritable(2),
            new RecordWritable(
                    Arrays.<org.datavec.api.writable.Writable>asList(new Text("two"), new IntWritable(22),
                            new DoubleWritable(22.0), new NDArrayWritable(Nd4j.valueArrayOf(10, 22.0)))));

    //Need to write in order
    for (int i = 0; i <= 2; i++) {
        LongWritable key = new LongWritable(i);
        RecordWritable value = recordMap.get(key);

        writer.append(key, value);
    }
    writer.close();

}

From source file:org.datavec.hadoop.records.reader.TestMapFileRecordReaderMultipleParts.java

License:Apache License

@BeforeClass
public static void buildMapFiles() throws IOException {

    //----- Sequence RR setup -----

    Configuration c = new Configuration();
    Class<? extends WritableComparable> keyClass = LongWritable.class;
    Class<? extends Writable> valueClass = SequenceRecordWritable.class;

    SequenceFile.Writer.Option[] opts = new SequenceFile.Writer.Option[] { MapFile.Writer.keyClass(keyClass),
            SequenceFile.Writer.valueClass(valueClass) };

    tempDirSeq = Files.createTempDir();
    File[] subdirs = new File[3];
    Path[] paths = new Path[subdirs.length];
    MapFile.Writer[] writers = new MapFile.Writer[subdirs.length];
    for (int i = 0; i < subdirs.length; i++) {
        subdirs[i] = new File(tempDirSeq, "part-r-0000" + i);
        subdirs[i].mkdir();//  ww w  . j  a  v a  2s . c o  m
        paths[i] = new Path("file:///" + subdirs[i].getAbsolutePath());
        writers[i] = new MapFile.Writer(c, paths[i], opts);
    }
    seqMapFilePath = new Path("file:///" + tempDirSeq.getAbsolutePath());

    seqMap = new HashMap<>();

    for (int i = 0; i < 9; i++) {
        seqMap.put(new LongWritable(i),
                new SequenceRecordWritable(Arrays.asList(
                        Arrays.<org.datavec.api.writable.Writable>asList(new Text(i + "-0"),
                                new IntWritable(3 * i), new DoubleWritable(3 * i)),
                        Arrays.<org.datavec.api.writable.Writable>asList(new Text(i + "-1"),
                                new IntWritable(3 * i + 1), new DoubleWritable(3 * i + 1.0)),
                        Arrays.<org.datavec.api.writable.Writable>asList(new Text(i + "-2"),
                                new IntWritable(3 * i + 2), new DoubleWritable(3 * i + 2.0)))));
    }

    //Need to write in order, to different map files separately
    for (int i = 0; i < seqMap.size(); i++) {
        int mapFileIdx = i / writers.length;

        LongWritable key = new LongWritable(i);
        SequenceRecordWritable value = seqMap.get(key);

        writers[mapFileIdx].append(key, value);
    }

    for (MapFile.Writer m : writers) {
        m.close();
    }

    //----- Standard RR setup -----

    valueClass = RecordWritable.class;

    opts = new SequenceFile.Writer.Option[] { MapFile.Writer.keyClass(keyClass),
            SequenceFile.Writer.valueClass(valueClass) };

    tempDir = Files.createTempDir();
    subdirs = new File[3];
    paths = new Path[subdirs.length];
    writers = new MapFile.Writer[subdirs.length];
    for (int i = 0; i < subdirs.length; i++) {
        subdirs[i] = new File(tempDir, "part-r-0000" + i);
        subdirs[i].mkdir();
        paths[i] = new Path("file:///" + subdirs[i].getAbsolutePath());
        writers[i] = new MapFile.Writer(c, paths[i], opts);
    }
    mapFilePath = new Path("file:///" + tempDir.getAbsolutePath());

    recordMap = new HashMap<>();
    for (int i = 0; i < 9; i++) {
        recordMap.put(new LongWritable(i),
                new RecordWritable(Arrays.<org.datavec.api.writable.Writable>asList(new Text(String.valueOf(i)),
                        new IntWritable(i), new DoubleWritable(i))));
    }

    //Need to write in order
    for (int i = 0; i < recordMap.size(); i++) {
        int mapFileIdx = i / writers.length;
        LongWritable key = new LongWritable(i);
        RecordWritable value = recordMap.get(key);

        writers[mapFileIdx].append(key, value);
    }

    for (MapFile.Writer m : writers) {
        m.close();
    }

}

From source file:org.datavec.hadoop.records.reader.TestMapFileRecordReaderMultiplePartsSomeEmpty.java

License:Apache License

@BeforeClass
public static void buildMapFiles() throws IOException {

    //----- Sequence RR setup -----

    Configuration c = new Configuration();
    Class<? extends WritableComparable> keyClass = LongWritable.class;
    Class<? extends Writable> valueClass = SequenceRecordWritable.class;

    SequenceFile.Writer.Option[] opts = new SequenceFile.Writer.Option[] { MapFile.Writer.keyClass(keyClass),
            SequenceFile.Writer.valueClass(valueClass) };

    tempDirSeq = Files.createTempDir();
    File[] subdirs = new File[3];
    Path[] paths = new Path[subdirs.length];
    MapFile.Writer[] writers = new MapFile.Writer[subdirs.length];
    for (int i = 0; i < subdirs.length; i++) {
        subdirs[i] = new File(tempDirSeq, "part-r-0000" + i);
        subdirs[i].mkdir();/* www. j av  a  2  s. com*/
        paths[i] = new Path("file:///" + subdirs[i].getAbsolutePath());
        writers[i] = new MapFile.Writer(c, paths[i], opts);
    }
    seqMapFilePath = new Path("file:///" + tempDirSeq.getAbsolutePath());

    seqMap = new HashMap<>();

    for (int i = 0; i < 6; i++) {
        seqMap.put(new LongWritable(i),
                new SequenceRecordWritable(Arrays.asList(
                        Arrays.<org.datavec.api.writable.Writable>asList(new Text(i + "-0"),
                                new IntWritable(3 * i), new DoubleWritable(3 * i)),
                        Arrays.<org.datavec.api.writable.Writable>asList(new Text(i + "-1"),
                                new IntWritable(3 * i + 1), new DoubleWritable(3 * i + 1.0)),
                        Arrays.<org.datavec.api.writable.Writable>asList(new Text(i + "-2"),
                                new IntWritable(3 * i + 2), new DoubleWritable(3 * i + 2.0)))));
    }

    //Need to write in order, to different map files separately
    for (int i = 0; i < seqMap.size(); i++) {
        int mapFileIdx;
        if (i < 3) {
            mapFileIdx = 0;
        } else {
            mapFileIdx = 2;
        }

        LongWritable key = new LongWritable(i);
        SequenceRecordWritable value = seqMap.get(key);

        writers[mapFileIdx].append(key, value);
    }

    for (MapFile.Writer m : writers) {
        m.close();
    }

    //----- Standard RR setup -----

    valueClass = RecordWritable.class;

    opts = new SequenceFile.Writer.Option[] { MapFile.Writer.keyClass(keyClass),
            SequenceFile.Writer.valueClass(valueClass) };

    tempDir = Files.createTempDir();
    subdirs = new File[3];
    paths = new Path[subdirs.length];
    writers = new MapFile.Writer[subdirs.length];
    for (int i = 0; i < subdirs.length; i++) {
        subdirs[i] = new File(tempDir, "part-r-0000" + i);
        subdirs[i].mkdir();
        paths[i] = new Path("file:///" + subdirs[i].getAbsolutePath());
        writers[i] = new MapFile.Writer(c, paths[i], opts);
    }
    mapFilePath = new Path("file:///" + tempDir.getAbsolutePath());

    recordMap = new HashMap<>();
    for (int i = 0; i < 6; i++) {
        recordMap.put(new LongWritable(i),
                new RecordWritable(Arrays.<org.datavec.api.writable.Writable>asList(new Text(String.valueOf(i)),
                        new IntWritable(i), new DoubleWritable(i))));
    }

    //Need to write in order
    for (int i = 0; i < recordMap.size(); i++) {
        int mapFileIdx;
        if (i < 3) {
            mapFileIdx = 0;
        } else {
            mapFileIdx = 2;
        }

        LongWritable key = new LongWritable(i);
        RecordWritable value = recordMap.get(key);

        writers[mapFileIdx].append(key, value);
    }

    for (MapFile.Writer m : writers) {
        m.close();
    }

}