Example usage for org.apache.hadoop.io IntWritable set

List of usage examples for org.apache.hadoop.io IntWritable set

Introduction

In this page you can find the example usage for org.apache.hadoop.io IntWritable set.

Prototype

public void set(int value) 

Source Link

Document

Set the value of this IntWritable.

Usage

From source file:edu.umn.cs.spatialHadoop.operations.KNN.java

License:Open Source License

/**
 * A MapReduce version of KNN query./*from   w  w  w  . j  av a  2  s  .  c  om*/
 * @param fs
 * @param inputPath
 * @param queryPoint
 * @param shape
 * @param output
 * @return
 * @throws IOException
 * @throws InterruptedException 
 * @throws ClassNotFoundException 
 */
private static <S extends Shape> Job knnMapReduce(Path inputPath, Path userOutputPath, OperationsParams params)
        throws IOException, ClassNotFoundException, InterruptedException {
    Job job = new Job(params, "KNN");
    job.setJarByClass(KNN.class);

    FileSystem inFs = inputPath.getFileSystem(params);
    job.setInputFormatClass(SpatialInputFormat3.class);
    SpatialInputFormat3.setInputPaths(job, inputPath);

    job.setMapperClass(KNNMap.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(TextWithDistance.class);

    job.setReducerClass(KNNReduce.class);
    job.setNumReduceTasks(1);

    job.getConfiguration().setClass(SpatialSite.FilterClass, RangeFilter.class, BlockFilter.class);
    final Point queryPoint = (Point) params.getShape("point");
    final int k = params.getInt("k", 1);

    final IntWritable additional_blocks_2b_processed = new IntWritable(0);
    long resultCount;
    int iterations = 0;

    Path outputPath = userOutputPath;
    if (outputPath == null) {
        do {
            outputPath = new Path(inputPath.getName() + ".knn_" + (int) (Math.random() * 1000000));
        } while (inFs.exists(outputPath));
    }
    job.setOutputFormatClass(TextOutputFormat3.class);
    TextOutputFormat3.setOutputPath(job, outputPath);

    GlobalIndex<Partition> globalIndex = SpatialSite.getGlobalIndex(inFs, inputPath);
    Configuration templateConf = job.getConfiguration();

    FileSystem outFs = outputPath.getFileSystem(params);
    // Start with the query point to select all partitions overlapping with it
    Shape range_for_this_iteration = new Point(queryPoint.x, queryPoint.y);

    do {
        job = new Job(templateConf);
        // Delete results of last iteration if not first iteration
        if (outputPath != null)
            outFs.delete(outputPath, true);

        LOG.info("Running iteration: " + (++iterations));
        // Set query range for the SpatialInputFormat
        OperationsParams.setShape(job.getConfiguration(), RangeFilter.QueryRange, range_for_this_iteration);

        // Submit the job
        if (params.getBoolean("background", false)) {
            // XXX this is incorrect because if the job needs multiple iterations,
            // it will run only the first one
            job.waitForCompletion(false);
            return job;
        }
        job.waitForCompletion(false);

        // Retrieve answers for this iteration
        Counters counters = job.getCounters();
        Counter resultSizeCounter = counters.findCounter(Task.Counter.REDUCE_OUTPUT_RECORDS);
        resultCount = resultSizeCounter.getValue();

        if (globalIndex != null) {
            Circle range_for_next_iteration;
            if (resultCount < k) {
                LOG.info("Found only " + resultCount + " results");
                // Did not find enough results in the query space
                // Increase the distance by doubling the maximum distance among all
                // partitions that were processed
                final DoubleWritable maximum_distance = new DoubleWritable(0);
                int matched_partitions = globalIndex.rangeQuery(range_for_this_iteration,
                        new ResultCollector<Partition>() {
                            @Override
                            public void collect(Partition p) {
                                double distance = p.getMaxDistanceTo(queryPoint.x, queryPoint.y);
                                if (distance > maximum_distance.get())
                                    maximum_distance.set(distance);
                            }
                        });
                if (matched_partitions == 0) {
                    // The query point is outside the search space
                    // Set the range to include the closest partition
                    globalIndex.knn(queryPoint.x, queryPoint.y, 1, new ResultCollector2<Partition, Double>() {
                        @Override
                        public void collect(Partition r, Double s) {
                            maximum_distance.set(s);
                        }
                    });
                }
                range_for_next_iteration = new Circle(queryPoint.x, queryPoint.y, maximum_distance.get() * 2);
                LOG.info("Expanding to " + maximum_distance.get() * 2);
            } else {
                // Calculate the new test range which is a circle centered at the
                // query point and distance to the k^{th} neighbor

                // Get distance to the kth neighbor
                final DoubleWritable distance_to_kth_neighbor = new DoubleWritable();
                FileStatus[] results = outFs.listStatus(outputPath);
                for (FileStatus result_file : results) {
                    if (result_file.getLen() > 0 && result_file.getPath().getName().startsWith("part-")) {
                        // Read the last line (kth neighbor)
                        Tail.tail(outFs, result_file.getPath(), 1, new TextWithDistance(),
                                new ResultCollector<TextWithDistance>() {

                                    @Override
                                    public void collect(TextWithDistance r) {
                                        distance_to_kth_neighbor.set(r.distance);
                                    }
                                });
                    }
                }
                range_for_next_iteration = new Circle(queryPoint.x, queryPoint.y,
                        distance_to_kth_neighbor.get());
                LOG.info("Expanding to kth neighbor: " + distance_to_kth_neighbor);
            }

            // Calculate the number of blocks to be processed to check the
            // terminating condition;
            additional_blocks_2b_processed.set(0);
            final Shape temp = range_for_this_iteration;
            globalIndex.rangeQuery(range_for_next_iteration, new ResultCollector<Partition>() {
                @Override
                public void collect(Partition p) {
                    if (!(p.isIntersected(temp))) {
                        additional_blocks_2b_processed.set(additional_blocks_2b_processed.get() + 1);
                    }
                }
            });
            range_for_this_iteration = range_for_next_iteration;
        }
    } while (additional_blocks_2b_processed.get() > 0);

    // If output file is not set by user, delete it
    if (userOutputPath == null)
        outFs.delete(outputPath, true);
    TotalIterations.addAndGet(iterations);

    return job;
}

From source file:edu.umn.cs.spatialHadoop.operations.Union.java

License:Open Source License

private static <S extends OGCJTSShape> void unionLocal(Path inPath, Path outPath, final OperationsParams params)
        throws IOException, InterruptedException, ClassNotFoundException {
    // 1- Split the input path/file to get splits that can be processed independently
    final SpatialInputFormat3<Rectangle, S> inputFormat = new SpatialInputFormat3<Rectangle, S>();
    Job job = Job.getInstance(params);//  w w w  . ja  v  a  2 s .c  om
    SpatialInputFormat3.setInputPaths(job, inPath);
    final List<InputSplit> splits = inputFormat.getSplits(job);
    int parallelism = params.getInt("parallel", Runtime.getRuntime().availableProcessors());

    // 2- Process splits in parallel
    final List<Float> progresses = new Vector<Float>();
    final IntWritable overallProgress = new IntWritable(0);
    List<List<Geometry>> results = Parallel.forEach(splits.size(), new RunnableRange<List<Geometry>>() {
        @Override
        public List<Geometry> run(final int i1, final int i2) {
            final int pi;
            final IntWritable splitsProgress = new IntWritable();
            synchronized (progresses) {
                pi = progresses.size();
                progresses.add(0f);
            }
            final float progressRatio = (i2 - i1) / (float) splits.size();
            Progressable progress = new Progressable.NullProgressable() {
                @Override
                public void progress(float p) {
                    progresses.set(pi, p * ((splitsProgress.get() - i1) / (float) (i2 - i1)) * progressRatio);
                    float sum = 0;
                    for (float f : progresses)
                        sum += f;
                    int newProgress = (int) (sum * 100);
                    if (newProgress > overallProgress.get()) {
                        overallProgress.set(newProgress);
                        LOG.info("Local union progress " + newProgress + "%");
                    }
                }
            };

            final List<Geometry> localUnion = new ArrayList<Geometry>();
            ResultCollector<Geometry> output = new ResultCollector<Geometry>() {
                @Override
                public void collect(Geometry r) {
                    localUnion.add(r);
                }
            };

            final int MaxBatchSize = 100000;
            Geometry[] batch = new Geometry[MaxBatchSize];
            int batchSize = 0;
            for (int i = i1; i < i2; i++) {
                splitsProgress.set(i);
                try {
                    FileSplit fsplit = (FileSplit) splits.get(i);
                    final RecordReader<Rectangle, Iterable<S>> reader = inputFormat.createRecordReader(fsplit,
                            null);
                    if (reader instanceof SpatialRecordReader3) {
                        ((SpatialRecordReader3) reader).initialize(fsplit, params);
                    } else if (reader instanceof RTreeRecordReader3) {
                        ((RTreeRecordReader3) reader).initialize(fsplit, params);
                    } else if (reader instanceof HDFRecordReader) {
                        ((HDFRecordReader) reader).initialize(fsplit, params);
                    } else {
                        throw new RuntimeException("Unknown record reader");
                    }
                    while (reader.nextKeyValue()) {
                        Iterable<S> shapes = reader.getCurrentValue();
                        for (S s : shapes) {
                            if (s.geom == null)
                                continue;
                            batch[batchSize++] = s.geom;
                            if (batchSize >= MaxBatchSize) {
                                SpatialAlgorithms.multiUnion(batch, progress, output);
                                batchSize = 0;
                            }
                        }
                    }
                    reader.close();
                } catch (IOException e) {
                    LOG.error("Error processing split " + splits.get(i), e);
                } catch (InterruptedException e) {
                    LOG.error("Error processing split " + splits.get(i), e);
                }
            }
            // Union all remaining geometries
            try {
                Geometry[] finalBatch = new Geometry[batchSize];
                System.arraycopy(batch, 0, finalBatch, 0, batchSize);
                SpatialAlgorithms.multiUnion(finalBatch, progress, output);
                return localUnion;
            } catch (IOException e) {
                // Should never happen as the context is passed as null
                throw new RuntimeException("Error in local union", e);
            }
        }
    }, parallelism);

    // Write result to output
    LOG.info("Merge the results of all splits");
    int totalNumGeometries = 0;
    for (List<Geometry> result : results)
        totalNumGeometries += result.size();
    List<Geometry> allInOne = new ArrayList<Geometry>(totalNumGeometries);
    for (List<Geometry> result : results)
        allInOne.addAll(result);

    final S outShape = (S) params.getShape("shape");
    final PrintStream out;
    if (outPath == null || !params.getBoolean("output", true)) {
        // Skip writing the output
        out = new PrintStream(new NullOutputStream());
    } else {
        FileSystem outFS = outPath.getFileSystem(params);
        out = new PrintStream(outFS.create(outPath));
    }

    SpatialAlgorithms.multiUnion(allInOne.toArray(new Geometry[allInOne.size()]),
            new Progressable.NullProgressable() {
                int lastProgress = 0;

                public void progress(float p) {
                    int newProgresss = (int) (p * 100);
                    if (newProgresss > lastProgress) {
                        LOG.info("Global union progress " + (lastProgress = newProgresss) + "%");
                    }
                }
            }, new ResultCollector<Geometry>() {
                Text line = new Text2();

                @Override
                public void collect(Geometry r) {
                    outShape.geom = r;
                    outShape.toText(line);
                    out.println(line);
                }
            });
    out.close();
}

From source file:edu.umn.cs.sthadoop.operations.HSPKNNQ.java

License:Open Source License

/**
 * A MapReduce version of KNN query./*w w w .j av  a  2 s. co  m*/
 * @param fs
 * @param inputPath
 * @param queryPoint
 * @param shape
 * @param output
 * @return
 * @throws IOException
 * @throws InterruptedException 
 * @throws ClassNotFoundException 
 */
private static <S extends Shape> Job knnMapReduce(Path inputPath, Path userOutputPath, OperationsParams params)
        throws IOException, ClassNotFoundException, InterruptedException {
    Job job = new Job(params, "PKNN");
    job.setJarByClass(HSPKNNQ.class);

    FileSystem inFs = inputPath.getFileSystem(params);
    job.setInputFormatClass(SpatialInputFormat3.class);
    SpatialInputFormat3.setInputPaths(job, inputPath);

    job.setMapperClass(KNNMap.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(TextWithDistance.class);

    job.setReducerClass(KNNReduce.class);
    job.setNumReduceTasks(1);

    job.getConfiguration().setClass(SpatialSite.FilterClass, RangeFilter.class, BlockFilter.class);
    final Point queryPoint = (Point) params.getShape("point");
    final int k = params.getInt("k", 1);

    final IntWritable additional_blocks_2b_processed = new IntWritable(0);
    long resultCount;
    int iterations = 0;

    Path outputPath = userOutputPath;
    if (outputPath == null) {
        do {
            outputPath = new Path(inputPath.getName() + ".knn_" + (int) (Math.random() * 1000000));
        } while (inFs.exists(outputPath));
    }
    job.setOutputFormatClass(TextOutputFormat3.class);
    TextOutputFormat3.setOutputPath(job, outputPath);

    GlobalIndex<Partition> globalIndex = SpatialSite.getGlobalIndex(inFs, inputPath);
    Configuration templateConf = job.getConfiguration();

    FileSystem outFs = outputPath.getFileSystem(params);
    // Start with the query point to select all partitions overlapping with it
    Shape range_for_this_iteration = new Point(queryPoint.x, queryPoint.y);

    do {
        job = new Job(templateConf);
        // Delete results of last iteration if not first iteration
        if (outputPath != null)
            outFs.delete(outputPath, true);

        LOG.info("Running iteration: " + (++iterations));
        // Set query range for the SpatialInputFormat
        OperationsParams.setShape(job.getConfiguration(), RangeFilter.QueryRange, range_for_this_iteration);

        // Submit the job
        if (params.getBoolean("background", false)) {
            // XXX this is incorrect because if the job needs multiple iterations,
            // it will run only the first one
            job.waitForCompletion(false);
            return job;
        }
        job.waitForCompletion(false);

        // Retrieve answers for this iteration
        Counters counters = job.getCounters();
        Counter resultSizeCounter = counters.findCounter(Task.Counter.REDUCE_OUTPUT_RECORDS);
        resultCount = resultSizeCounter.getValue();

        if (globalIndex != null) {
            Circle range_for_next_iteration;
            if (resultCount < k) {
                LOG.info("Found only " + resultCount + " results");
                // Did not find enough results in the query space
                // Increase the distance by doubling the maximum distance among all
                // partitions that were processed
                final DoubleWritable maximum_distance = new DoubleWritable(0);
                int matched_partitions = globalIndex.rangeQuery(range_for_this_iteration,
                        new ResultCollector<Partition>() {
                            @Override
                            public void collect(Partition p) {
                                double distance = p.getMaxDistanceTo(queryPoint.x, queryPoint.y);
                                if (distance > maximum_distance.get())
                                    maximum_distance.set(distance);
                            }
                        });
                if (matched_partitions == 0) {
                    // The query point is outside the search space
                    // Set the range to include the closest partition
                    globalIndex.knn(queryPoint.x, queryPoint.y, 1, new ResultCollector2<Partition, Double>() {
                        @Override
                        public void collect(Partition r, Double s) {
                            maximum_distance.set(s);
                        }
                    });
                }
                range_for_next_iteration = new Circle(queryPoint.x, queryPoint.y, maximum_distance.get() * 2);
                LOG.info("Expanding to " + maximum_distance.get() * 2);
            } else {
                // Calculate the new test range which is a circle centered at the
                // query point and distance to the k^{th} neighbor

                // Get distance to the kth neighbor
                final DoubleWritable distance_to_kth_neighbor = new DoubleWritable();
                FileStatus[] results = outFs.listStatus(outputPath);
                for (FileStatus result_file : results) {
                    if (result_file.getLen() > 0 && result_file.getPath().getName().startsWith("part-")) {
                        // Read the last line (kth neighbor)
                        Tail.tail(outFs, result_file.getPath(), 1, new TextWithDistance(),
                                new ResultCollector<TextWithDistance>() {

                                    @Override
                                    public void collect(TextWithDistance r) {
                                        distance_to_kth_neighbor.set(r.distance);
                                    }
                                });
                    }
                }
                range_for_next_iteration = new Circle(queryPoint.x, queryPoint.y,
                        distance_to_kth_neighbor.get());
                LOG.info("Expanding to kth neighbor: " + distance_to_kth_neighbor);
            }

            // Calculate the number of blocks to be processed to check the
            // terminating condition;
            additional_blocks_2b_processed.set(0);
            final Shape temp = range_for_this_iteration;
            globalIndex.rangeQuery(range_for_next_iteration, new ResultCollector<Partition>() {
                @Override
                public void collect(Partition p) {
                    if (!(p.isIntersected(temp))) {
                        additional_blocks_2b_processed.set(additional_blocks_2b_processed.get() + 1);
                    }
                }
            });
            range_for_this_iteration = range_for_next_iteration;
        }
    } while (additional_blocks_2b_processed.get() > 0);

    // If output file is not set by user, delete it
    if (userOutputPath == null)
        outFs.delete(outputPath, true);
    TotalIterations.addAndGet(iterations);

    return job;
}

From source file:edu.umn.cs.sthadoop.trajectory.KNNDTW.java

License:Open Source License

/**
 * A MapReduce version of KNN query.//from   w  w w .j  a va  2 s.  c  o  m
 * 
 * @param fs
 * @param inputPath
 * @param queryPoint
 * @param shape
 * @param output
 * @return
 * @throws IOException
 * @throws InterruptedException
 * @throws ClassNotFoundException
 */
private static <S extends Shape> Job knnMapReduce(Path inputPath, Path userOutputPath, OperationsParams params)
        throws IOException, ClassNotFoundException, InterruptedException {
    Job job = new Job(params, "KNN");
    job.setJarByClass(KNNDTW.class);

    FileSystem inFs = inputPath.getFileSystem(params);
    job.setInputFormatClass(SpatialInputFormat3.class);
    SpatialInputFormat3.setInputPaths(job, inputPath);

    job.setMapperClass(KNNMap.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(TextWithDistance.class);

    job.setReducerClass(KNNReduce.class);
    job.setNumReduceTasks(1);

    job.getConfiguration().setClass(SpatialSite.FilterClass, RangeFilter.class, BlockFilter.class);
    final Point queryPoint = (Point) params.getShape("point");
    final int k = params.getInt("k", 1);

    final IntWritable additional_blocks_2b_processed = new IntWritable(0);
    long resultCount;
    int iterations = 0;

    Path outputPath = userOutputPath;
    if (outputPath == null) {
        do {
            outputPath = new Path(inputPath.getName() + ".knn_" + (int) (Math.random() * 1000000));
        } while (inFs.exists(outputPath));
    }
    job.setOutputFormatClass(TextOutputFormat3.class);
    TextOutputFormat3.setOutputPath(job, outputPath);

    GlobalIndex<Partition> globalIndex = SpatialSite.getGlobalIndex(inFs, inputPath);
    Configuration templateConf = job.getConfiguration();

    FileSystem outFs = outputPath.getFileSystem(params);
    // Start with the query point to select all partitions overlapping with
    // it
    Shape range_for_this_iteration = new Point(queryPoint.x, queryPoint.y);

    do {
        job = new Job(templateConf);
        // Delete results of last iteration if not first iteration
        if (outputPath != null)
            outFs.delete(outputPath, true);

        LOG.info("Running iteration: " + (++iterations));
        // Set query range for the SpatialInputFormat
        OperationsParams.setShape(job.getConfiguration(), RangeFilter.QueryRange, range_for_this_iteration);

        // Submit the job
        if (params.getBoolean("background", false)) {
            // XXX this is incorrect because if the job needs multiple
            // iterations,
            // it will run only the first one
            job.waitForCompletion(false);
            return job;
        }
        job.waitForCompletion(false);

        // Retrieve answers for this iteration
        Counters counters = job.getCounters();
        Counter resultSizeCounter = counters.findCounter(Task.Counter.REDUCE_OUTPUT_RECORDS);
        resultCount = resultSizeCounter.getValue();

        if (globalIndex != null) {
            Circle range_for_next_iteration;
            if (resultCount < k) {
                LOG.info("Found only " + resultCount + " results");
                // Did not find enough results in the query space
                // Increase the distance by doubling the maximum distance
                // among all
                // partitions that were processed
                final DoubleWritable maximum_distance = new DoubleWritable(0);
                int matched_partitions = globalIndex.rangeQuery(range_for_this_iteration,
                        new ResultCollector<Partition>() {
                            @Override
                            public void collect(Partition p) {
                                double distance = p.getMaxDistanceTo(queryPoint.x, queryPoint.y);
                                if (distance > maximum_distance.get())
                                    maximum_distance.set(distance);
                            }
                        });
                if (matched_partitions == 0) {
                    // The query point is outside the search space
                    // Set the range to include the closest partition
                    globalIndex.knn(queryPoint.x, queryPoint.y, 1, new ResultCollector2<Partition, Double>() {
                        @Override
                        public void collect(Partition r, Double s) {
                            maximum_distance.set(s);
                        }
                    });
                }
                range_for_next_iteration = new Circle(queryPoint.x, queryPoint.y, maximum_distance.get() * 2);
                LOG.info("Expanding to " + maximum_distance.get() * 2);
            } else {
                // Calculate the new test range which is a circle centered
                // at the
                // query point and distance to the k^{th} neighbor

                // Get distance to the kth neighbor
                final DoubleWritable distance_to_kth_neighbor = new DoubleWritable();
                FileStatus[] results = outFs.listStatus(outputPath);
                for (FileStatus result_file : results) {
                    if (result_file.getLen() > 0 && result_file.getPath().getName().startsWith("part-")) {
                        // Read the last line (kth neighbor)
                        Tail.tail(outFs, result_file.getPath(), 1, new TextWithDistance(),
                                new ResultCollector<TextWithDistance>() {

                                    @Override
                                    public void collect(TextWithDistance r) {
                                        distance_to_kth_neighbor.set(r.distance);
                                    }
                                });
                    }
                }
                range_for_next_iteration = new Circle(queryPoint.x, queryPoint.y,
                        distance_to_kth_neighbor.get());
                LOG.info("Expanding to kth neighbor: " + distance_to_kth_neighbor);
            }

            // Calculate the number of blocks to be processed to check the
            // terminating condition;
            additional_blocks_2b_processed.set(0);
            final Shape temp = range_for_this_iteration;
            globalIndex.rangeQuery(range_for_next_iteration, new ResultCollector<Partition>() {
                @Override
                public void collect(Partition p) {
                    if (!(p.isIntersected(temp))) {
                        additional_blocks_2b_processed.set(additional_blocks_2b_processed.get() + 1);
                    }
                }
            });
            range_for_this_iteration = range_for_next_iteration;
        }
    } while (additional_blocks_2b_processed.get() > 0);

    // If output file is not set by user, delete it
    if (userOutputPath == null)
        outFs.delete(outputPath, true);
    TotalIterations.addAndGet(iterations);

    return job;
}

From source file:eu.stratosphere.addons.parquet.SequenceFileSourceTest.java

License:Apache License

private void populateSeqFile(File f, String[] content) throws IOException {
    URI uri = f.toURI();/*from  w ww  .j a v a 2 s.c om*/
    Configuration conf = new JobConf();
    FileSystem fs = FileSystem.get(uri, conf);
    Path path = new Path(uri);
    IntWritable key = new IntWritable();
    Text value = new Text();
    SequenceFile.Writer writer = null;
    try {
        writer = SequenceFile.createWriter(fs, conf, path, key.getClass(), value.getClass());
        for (int i = 0; i < content.length; i++) {
            key.set(i);
            value.set(content[i]);
            writer.append(key, value);
        }
    } finally {
        IOUtils.closeStream(writer);
    }
}

From source file:hadoop.examples.hdfs.SequenceFileWriteDemo.java

License:Open Source License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String uri = "hdfs://exoplatform:9000/user/haint/temp.file";
    Path path = new Path(uri);
    FileSystem fs = FileSystem.get(URI.create(uri), conf);

    ///*from   ww  w. j  a  v  a  2  s.c  om*/
    IntWritable key = new IntWritable();
    Text value = new Text();
    SequenceFile.Writer writer = null;
    try {
        writer = SequenceFile.createWriter(fs, conf, path, key.getClass(), value.getClass());
        for (int i = 0; i < 100; i++) {
            key.set(100 - i);
            value.set(DATA[i % DATA.length]);
            System.out.printf("[%s]\t%s\t%s\n", writer.getLength(), key, value);
            writer.append(key, value);
        }
    } finally {
        IOUtils.closeStream(writer);
    }
}

From source file:hivemall.classifier.KernelExpansionPassiveAggressiveUDTF.java

License:Apache License

@Override
public void close() throws HiveException {
    final IntWritable h = new IntWritable(0); // row[0]
    final FloatWritable w0 = new FloatWritable(_w0); // row[1]
    final FloatWritable w1 = new FloatWritable(); // row[2]
    final FloatWritable w2 = new FloatWritable(); // row[3]
    final IntWritable hk = new IntWritable(0); // row[4]
    final FloatWritable w3 = new FloatWritable(); // row[5]
    final Object[] row = new Object[] { h, w0, null, null, null, null };
    forward(row); // 0(f), w0
    row[1] = null;//from ww w  . ja  va  2  s. c  om

    row[2] = w1;
    row[3] = w2;
    final Int2FloatMap w2map = _w2;
    for (Int2FloatMap.Entry e : Fastutil.fastIterable(_w1)) {
        int k = e.getIntKey();
        Preconditions.checkArgument(k > 0, HiveException.class);
        h.set(k);
        w1.set(e.getFloatValue());
        w2.set(w2map.get(k));
        forward(row); // h(f), w1, w2
    }
    this._w1 = null;
    this._w2 = null;

    row[0] = null;
    row[2] = null;
    row[3] = null;
    row[4] = hk;
    row[5] = w3;

    _w3.int2FloatEntrySet();
    for (Int2FloatMap.Entry e : Fastutil.fastIterable(_w3)) {
        int k = e.getIntKey();
        Preconditions.checkArgument(k > 0, HiveException.class);
        hk.set(k);
        w3.set(e.getFloatValue());
        forward(row); // hk(f), w3
    }
    this._w3 = null;
}

From source file:hivemall.fm.FactorizationMachineUDTF.java

License:Apache License

private void forwardAsIntFeature(@Nonnull final FactorizationMachineModel model, final int factors)
        throws HiveException {
    final IntWritable f_idx = new IntWritable(0);
    final FloatWritable f_Wi = new FloatWritable(0.f);
    final FloatWritable[] f_Vi = HiveUtils.newFloatArray(factors, 0.f);

    final Object[] forwardObjs = new Object[3];
    forwardObjs[0] = f_idx;//w w  w . ja  v a  2s . c  o m
    forwardObjs[1] = f_Wi;
    forwardObjs[2] = null;
    // W0
    f_idx.set(0);
    f_Wi.set(model.getW0());
    // V0 is null
    forward(forwardObjs);

    // Wi, Vif (i starts from 1..P)
    forwardObjs[2] = Arrays.asList(f_Vi);

    for (int i = model.getMinIndex(), maxIdx = model.getMaxIndex(); i <= maxIdx; i++) {
        final float[] vi = model.getV(i);
        if (vi == null) {
            continue;
        }
        f_idx.set(i);
        // set Wi
        final float w = model.getW(i);
        f_Wi.set(w);
        // set Vif
        for (int f = 0; f < factors; f++) {
            float v = vi[f];
            f_Vi[f].set(v);
        }
        forward(forwardObjs);
    }
}

From source file:hivemall.ftvec.conv.QuantifyColumnsUDTF.java

License:Apache License

@Override
public void process(Object[] args) throws HiveException {
    boolean outputRow = boolOI.get(args[0]);
    if (outputRow) {
        final Object[] forwardObjs = this.forwardObjs;
        for (int i = 0, outputSize = args.length - 1; i < outputSize; i++) {
            Object arg = args[i + 1];
            Identifier<String> identifier = identifiers[i];
            if (identifier == null) {
                forwardObjs[i] = arg;/*from   w ww.  ja  v  a2  s.  co m*/
            } else {
                if (arg == null) {
                    forwardObjs[i] = null;
                } else {
                    String k = arg.toString();
                    int id = identifier.valueOf(k);
                    IntWritable o = forwardIntObjs[i];
                    o.set(id);
                    forwardObjs[i] = o;
                }
            }
        }
        forward(forwardObjs);
    } else {// load only  
        for (int i = 0, outputSize = args.length - 1; i < outputSize; i++) {
            Identifier<String> identifier = identifiers[i];
            if (identifier != null) {
                Object arg = args[i + 1];
                if (arg != null) {
                    String k = arg.toString();
                    identifier.valueOf(k);
                }
            }
        }
    }

}

From source file:hivemall.knn.similarity.DIMSUMMapperUDTF.java

License:Apache License

private void forwardAsIntFeature(@Nonnull Feature[] row) throws HiveException {
    final int length = row.length;

    Feature[] rowScaled = new Feature[length];
    for (int i = 0; i < length; i++) {
        int j = row[i].getFeatureIndex();

        double norm = Primitives.doubleValue(colNorms.get(j), 0.d);
        if (norm == 0.d) { // avoid zero-division
            norm = 1.d;/*from  w ww  .  ja  v a  2s  . c  o  m*/
        }
        double scaled = row[i].getValue() / Math.min(sqrtGamma, norm);

        rowScaled[i] = new IntFeature(j, scaled);
    }

    final IntWritable jWritable = new IntWritable();
    final IntWritable kWritable = new IntWritable();
    final DoubleWritable bWritable = new DoubleWritable();

    final Object[] forwardObjs = new Object[3];
    forwardObjs[0] = jWritable;
    forwardObjs[1] = kWritable;
    forwardObjs[2] = bWritable;

    for (int ij = 0; ij < length; ij++) {
        int j = rowScaled[ij].getFeatureIndex();
        double jVal = rowScaled[ij].getValue();
        double jProb = Primitives.doubleValue(colProbs.get(j), 0.d);

        if (jVal != 0.d && rnd.nextDouble() < jProb) {
            for (int ik = ij + 1; ik < length; ik++) {
                int k = rowScaled[ik].getFeatureIndex();
                double kVal = rowScaled[ik].getValue();
                double kProb = Primitives.doubleValue(colProbs.get(k), 0.d);

                if (kVal != 0.d && rnd.nextDouble() < kProb) {
                    // compute b_jk
                    bWritable.set(jVal * kVal);

                    if (symmetricOutput) {
                        // (j, k); similarity matrix is symmetric
                        jWritable.set(j);
                        kWritable.set(k);
                        forward(forwardObjs);

                        // (k, j)
                        jWritable.set(k);
                        kWritable.set(j);
                        forward(forwardObjs);
                    } else {
                        if (j < k) {
                            jWritable.set(j);
                            kWritable.set(k);
                        } else {
                            jWritable.set(k);
                            kWritable.set(j);
                        }
                        forward(forwardObjs);
                    }
                }
            }
        }
    }
}