Example usage for org.apache.hadoop.io NullWritable get

List of usage examples for org.apache.hadoop.io NullWritable get

Introduction

In this page you can find the example usage for org.apache.hadoop.io NullWritable get.

Prototype

public static NullWritable get() 

Source Link

Document

Returns the single instance of this class.

Usage

From source file:org.apache.gobblin.compaction.mapreduce.orc.OrcKeyDedupReducer.java

License:Apache License

@Override
protected void initReusableObject() {
    outKey = NullWritable.get();
    outValue = new OrcValue();
}

From source file:org.apache.gobblin.compaction.mapreduce.orc.OrcValueMapper.java

License:Apache License

@Override
protected void map(NullWritable key, OrcStruct orcStruct, Context context)
        throws IOException, InterruptedException {
    if (context.getNumReduceTasks() == 0) {
        this.outKey.key = orcStruct;
        context.write(this.outKey, NullWritable.get());
    } else {// ww  w . ja  va  2  s .co  m
        this.outValue.value = orcStruct;
        context.write(getDedupKey(orcStruct), this.outValue);
    }

    context.getCounter(EVENT_COUNTER.RECORD_COUNT).increment(1);
}

From source file:org.apache.gobblin.compaction.mapreduce.OrcCompactionTaskTest.java

License:Apache License

public void writeOrcRecordsInFile(Path path, TypeDescription schema, List<OrcStruct> orcStructs)
        throws Exception {
    Configuration configuration = new Configuration();
    OrcFile.WriterOptions options = OrcFile.writerOptions(configuration).setSchema(schema);

    Writer writer = OrcFile.createWriter(path, options);
    OrcMapreduceRecordWriter recordWriter = new OrcMapreduceRecordWriter(writer);
    for (OrcStruct orcRecord : orcStructs) {
        recordWriter.write(NullWritable.get(), orcRecord);
    }//from  ww  w. j av  a  2 s.  c  o m
    recordWriter.close(new TaskAttemptContextImpl(configuration, new TaskAttemptID()));
}

From source file:org.apache.hama.ml.kmeans.KMeansBSP.java

License:Apache License

@Override
public final void setup(BSPPeer<VectorWritable, NullWritable, IntWritable, VectorWritable, CenterMessage> peer)
        throws IOException, InterruptedException {
    conf = peer.getConfiguration();//from w  w w  .j  av  a  2 s . com

    Path centroids = new Path(peer.getConfiguration().get(CENTER_IN_PATH));
    FileSystem fs = FileSystem.get(peer.getConfiguration());
    final ArrayList<DoubleVector> centers = new ArrayList<DoubleVector>();
    SequenceFile.Reader reader = null;
    try {
        reader = new SequenceFile.Reader(fs, centroids, peer.getConfiguration());
        VectorWritable key = new VectorWritable();
        NullWritable value = NullWritable.get();
        while (reader.next(key, value)) {
            DoubleVector center = key.getVector();
            centers.add(center);
        }
    } catch (IOException e) {
        throw new RuntimeException(e);
    } finally {
        if (reader != null) {
            reader.close();
        }
    }

    Preconditions.checkArgument(centers.size() > 0, "Centers file must contain at least a single center!");
    this.centers = centers.toArray(new DoubleVector[centers.size()]);

    String distanceClass = peer.getConfiguration().get(DISTANCE_MEASURE_CLASS);
    if (distanceClass != null) {
        try {
            distanceMeasurer = ReflectionUtils.newInstance(distanceClass);
        } catch (ClassNotFoundException e) {
            throw new RuntimeException("Wrong DistanceMeasurer implementation " + distanceClass + " provided");
        }
    } else {
        distanceMeasurer = new EuclidianDistance();
    }

    maxIterations = peer.getConfiguration().getInt(MAX_ITERATIONS_KEY, -1);
    // normally we want to rely on OS caching, but if not, we can cache in heap
    if (peer.getConfiguration().getBoolean(CACHING_ENABLED_KEY, false)) {
        cache = new ArrayList<DoubleVector>();
    }
}

From source file:org.apache.hama.ml.kmeans.KMeansBSP.java

License:Apache License

private void assignCenters(
        BSPPeer<VectorWritable, NullWritable, IntWritable, VectorWritable, CenterMessage> peer)
        throws IOException {
    // each task has all the centers, if a center has been updated it
    // needs to be broadcasted.
    final DoubleVector[] newCenterArray = new DoubleVector[centers.length];
    final int[] summationCount = new int[centers.length];

    // if our cache is not enabled, iterate over the disk items
    if (cache == null) {
        // we have an assignment step
        final NullWritable value = NullWritable.get();
        final VectorWritable key = new VectorWritable();
        while (peer.readNext(key, value)) {
            assignCentersInternal(newCenterArray, summationCount, key.getVector().deepCopy());
        }//from   w w w. ja v a  2  s  .c o m
    } else {
        // if our cache is enabled but empty, we have to read it from disk first
        if (cache.isEmpty()) {
            final NullWritable value = NullWritable.get();
            final VectorWritable key = new VectorWritable();
            while (peer.readNext(key, value)) {
                DoubleVector deepCopy = key.getVector().deepCopy();
                cache.add(deepCopy);
                // but do the assignment directly
                assignCentersInternal(newCenterArray, summationCount, deepCopy);
            }
        } else {
            // now we can iterate in memory and check against the centers
            for (DoubleVector v : cache) {
                assignCentersInternal(newCenterArray, summationCount, v);
            }
        }
    }

    // now send messages about the local updates to each other peer
    for (int i = 0; i < newCenterArray.length; i++) {
        if (newCenterArray[i] != null) {
            for (String peerName : peer.getAllPeerNames()) {
                peer.send(peerName, new CenterMessage(i, summationCount[i], newCenterArray[i]));
            }
        }
    }
}

From source file:org.apache.hama.ml.kmeans.KMeansBSP.java

License:Apache License

private void recalculateAssignmentsAndWrite(
        BSPPeer<VectorWritable, NullWritable, IntWritable, VectorWritable, CenterMessage> peer)
        throws IOException {
    final NullWritable value = NullWritable.get();
    // also use our cache to speed up the final writes if exists
    if (cache == null) {
        final VectorWritable key = new VectorWritable();
        IntWritable keyWrite = new IntWritable();
        while (peer.readNext(key, value)) {
            final int lowestDistantCenter = getNearestCenter(key.getVector());
            keyWrite.set(lowestDistantCenter);
            peer.write(keyWrite, key);//from  w  w w  .  j a  va2  s.c  o m
        }
    } else {
        IntWritable keyWrite = new IntWritable();
        for (DoubleVector v : cache) {
            final int lowestDistantCenter = getNearestCenter(v);
            keyWrite.set(lowestDistantCenter);
            peer.write(keyWrite, new VectorWritable(v));
        }
    }
    // just on the first task write the centers to filesystem to prevent
    // collisions
    if (peer.getPeerName().equals(peer.getPeerName(0))) {
        String pathString = conf.get(CENTER_OUT_PATH);
        if (pathString != null) {
            final SequenceFile.Writer dataWriter = SequenceFile.createWriter(FileSystem.get(conf), conf,
                    new Path(pathString), VectorWritable.class, NullWritable.class, CompressionType.NONE);
            for (DoubleVector center : centers) {
                dataWriter.append(new VectorWritable(center), value);
            }
            dataWriter.close();
        }
    }
}

From source file:org.apache.hama.ml.kmeans.KMeansBSP.java

License:Apache License

/**
 * Reads the cluster centers.//from  www .  j  a v  a 2  s. c  om
 * 
 * @return an index on the key dimension, and a cluster center on the value.
 */
public static HashMap<Integer, DoubleVector> readClusterCenters(Configuration conf, Path out, Path centerPath,
        FileSystem fs) throws IOException {
    HashMap<Integer, DoubleVector> centerMap = new HashMap<Integer, DoubleVector>();
    SequenceFile.Reader centerReader = new SequenceFile.Reader(fs, centerPath, conf);
    int index = 0;
    VectorWritable center = new VectorWritable();
    while (centerReader.next(center, NullWritable.get())) {
        centerMap.put(index++, center.getVector());
    }
    centerReader.close();
    return centerMap;
}

From source file:org.apache.hama.ml.kmeans.KMeansBSP.java

License:Apache License

/**
 * Reads input text files and writes it to a sequencefile.
 * /*from  w w  w  .j a  v a2s.  c om*/
 * @param k
 * @param conf
 * @param txtIn
 * @param center
 * @param out
 * @param fs
 * @param hasKey true if first column is required to be the key.
 * @return the path of a sequencefile.
 * @throws IOException
 */
public static Path prepareInputText(int k, Configuration conf, Path txtIn, Path center, Path out, FileSystem fs,
        boolean hasKey) throws IOException {

    Path in;
    if (fs.isFile(txtIn)) {
        in = new Path(txtIn.getParent(), "textinput/in.seq");
    } else {
        in = new Path(txtIn, "textinput/in.seq");
    }

    if (fs.exists(out))
        fs.delete(out, true);

    if (fs.exists(center))
        fs.delete(center, true);

    if (fs.exists(in))
        fs.delete(in, true);

    final NullWritable value = NullWritable.get();

    Writer centerWriter = new SequenceFile.Writer(fs, conf, center, VectorWritable.class, NullWritable.class);

    final SequenceFile.Writer dataWriter = SequenceFile.createWriter(fs, conf, in, VectorWritable.class,
            NullWritable.class, CompressionType.NONE);

    int i = 0;

    BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(txtIn)));
    String line;
    while ((line = br.readLine()) != null) {
        String[] split = line.split("\t");
        int columnLength = split.length;
        int indexPos = 0;
        if (hasKey) {
            columnLength = columnLength - 1;
            indexPos++;
        }

        DenseDoubleVector vec = new DenseDoubleVector(columnLength);
        for (int j = 0; j < columnLength; j++) {
            vec.set(j, Double.parseDouble(split[j + indexPos]));
        }

        VectorWritable vector;
        if (hasKey) {
            NamedDoubleVector named = new NamedDoubleVector(split[0], vec);
            vector = new VectorWritable(named);
        } else {
            vector = new VectorWritable(vec);
        }

        dataWriter.append(vector, value);
        if (k > i) {
            centerWriter.append(vector, value);
        }
        i++;
    }
    br.close();
    centerWriter.close();
    dataWriter.close();
    return in;
}

From source file:org.apache.hama.ml.kmeans.KMeansBSP.java

License:Apache License

/**
 * Create some random vectors as input and assign the first k vectors as
 * intial centers.// ww  w .  java 2s . co  m
 */
public static void prepareInput(int count, int k, int dimension, Configuration conf, Path in, Path center,
        Path out, FileSystem fs) throws IOException {
    if (fs.exists(out))
        fs.delete(out, true);

    if (fs.exists(center))
        fs.delete(center, true);

    if (fs.exists(in))
        fs.delete(in, true);

    final SequenceFile.Writer centerWriter = SequenceFile.createWriter(fs, conf, center, VectorWritable.class,
            NullWritable.class, CompressionType.NONE);
    final NullWritable value = NullWritable.get();

    final SequenceFile.Writer dataWriter = SequenceFile.createWriter(fs, conf, in, VectorWritable.class,
            NullWritable.class, CompressionType.NONE);

    Random r = new Random();
    for (int i = 0; i < count; i++) {

        double[] arr = new double[dimension];
        for (int d = 0; d < dimension; d++) {
            arr[d] = r.nextInt(count);
        }
        VectorWritable vector = new VectorWritable(new DenseDoubleVector(arr));
        dataWriter.append(vector, value);
        if (k > i) {
            centerWriter.append(vector, value);
        }
    }
    centerWriter.close();
    dataWriter.close();
}

From source file:org.apache.hama.pipes.TestPipes.java

License:Apache License

static void verifyOutput(HamaConfiguration conf, Path outputPath, double expectedResult, double delta)
        throws IOException {
    FileStatus[] listStatus = fs.listStatus(outputPath);
    for (FileStatus status : listStatus) {
        if (!status.isDir()) {
            SequenceFile.Reader reader = new SequenceFile.Reader(fs, status.getPath(), conf);
            NullWritable key = NullWritable.get();
            DoubleWritable value = new DoubleWritable();
            if (reader.next(key, value)) {
                LOG.info("Output File: " + status.getPath());
                LOG.info("key: '" + key + "' value: '" + value + "' expected: '" + expectedResult + "'");
                assertEquals("Expected value: '" + expectedResult + "' != '" + value + "'", expectedResult,
                        value.get(), delta);
            }//ww  w  . j  a  v  a  2  s.  com
            reader.close();
        }
    }
}