Example usage for org.apache.hadoop.mapreduce RecordReader getCurrentValue

List of usage examples for org.apache.hadoop.mapreduce RecordReader getCurrentValue

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce RecordReader getCurrentValue.

Prototype

public abstract VALUEIN getCurrentValue() throws IOException, InterruptedException;

Source Link

Document

Get the current value.

Usage

From source file:edu.umn.cs.spatialHadoop.visualization.MultilevelPlot.java

License:Open Source License

private static void plotLocal(Path[] inFiles, final Path outPath, final Class<? extends Plotter> plotterClass,
        final OperationsParams params) throws IOException, InterruptedException, ClassNotFoundException {
    final boolean vflip = params.getBoolean("vflip", true);

    OperationsParams mbrParams = new OperationsParams(params);
    mbrParams.setBoolean("background", false);
    final Rectangle inputMBR = params.get("mbr") != null ? params.getShape("mbr").getMBR()
            : FileMBR.fileMBR(inFiles, mbrParams);
    OperationsParams.setShape(params, InputMBR, inputMBR);

    // Retrieve desired output image size and keep aspect ratio if needed
    int tileWidth = params.getInt("tilewidth", 256);
    int tileHeight = params.getInt("tileheight", 256);
    // Adjust width and height if aspect ratio is to be kept
    if (params.getBoolean("keepratio", true)) {
        // Expand input file to a rectangle for compatibility with the pyramid
        // structure
        if (inputMBR.getWidth() > inputMBR.getHeight()) {
            inputMBR.y1 -= (inputMBR.getWidth() - inputMBR.getHeight()) / 2;
            inputMBR.y2 = inputMBR.y1 + inputMBR.getWidth();
        } else {/*from w  w w . j  a  v  a2 s  .co  m*/
            inputMBR.x1 -= (inputMBR.getHeight() - inputMBR.getWidth()) / 2;
            inputMBR.x2 = inputMBR.x1 + inputMBR.getHeight();
        }
    }

    String outFName = outPath.getName();
    int extensionStart = outFName.lastIndexOf('.');
    final String extension = extensionStart == -1 ? ".png" : outFName.substring(extensionStart);

    // Start reading input file
    Vector<InputSplit> splits = new Vector<InputSplit>();
    final SpatialInputFormat3<Rectangle, Shape> inputFormat = new SpatialInputFormat3<Rectangle, Shape>();
    for (Path inFile : inFiles) {
        FileSystem inFs = inFile.getFileSystem(params);
        if (!OperationsParams.isWildcard(inFile) && inFs.exists(inFile) && !inFs.isDirectory(inFile)) {
            if (SpatialSite.NonHiddenFileFilter.accept(inFile)) {
                // Use the normal input format splitter to add this non-hidden file
                Job job = Job.getInstance(params);
                SpatialInputFormat3.addInputPath(job, inFile);
                splits.addAll(inputFormat.getSplits(job));
            } else {
                // A hidden file, add it immediately as one split
                // This is useful if the input is a hidden file which is automatically
                // skipped by FileInputFormat. We need to plot a hidden file for the case
                // of plotting partition boundaries of a spatial index
                splits.add(new FileSplit(inFile, 0, inFs.getFileStatus(inFile).getLen(), new String[0]));
            }
        } else {
            Job job = Job.getInstance(params);
            SpatialInputFormat3.addInputPath(job, inFile);
            splits.addAll(inputFormat.getSplits(job));
        }
    }

    try {
        Plotter plotter = plotterClass.newInstance();
        plotter.configure(params);

        String[] strLevels = params.get("levels", "7").split("\\.\\.");
        int minLevel, maxLevel;
        if (strLevels.length == 1) {
            minLevel = 0;
            maxLevel = Integer.parseInt(strLevels[0]);
        } else {
            minLevel = Integer.parseInt(strLevels[0]);
            maxLevel = Integer.parseInt(strLevels[1]);
        }

        GridInfo bottomGrid = new GridInfo(inputMBR.x1, inputMBR.y1, inputMBR.x2, inputMBR.y2);
        bottomGrid.rows = bottomGrid.columns = 1 << maxLevel;

        TileIndex key = new TileIndex();

        // All canvases in the pyramid, one per tile
        Map<TileIndex, Canvas> canvases = new HashMap<TileIndex, Canvas>();
        for (InputSplit split : splits) {
            FileSplit fsplit = (FileSplit) split;
            RecordReader<Rectangle, Iterable<Shape>> reader = inputFormat.createRecordReader(fsplit, null);
            if (reader instanceof SpatialRecordReader3) {
                ((SpatialRecordReader3) reader).initialize(fsplit, params);
            } else if (reader instanceof RTreeRecordReader3) {
                ((RTreeRecordReader3) reader).initialize(fsplit, params);
            } else if (reader instanceof HDFRecordReader) {
                ((HDFRecordReader) reader).initialize(fsplit, params);
            } else {
                throw new RuntimeException("Unknown record reader");
            }

            while (reader.nextKeyValue()) {
                Rectangle partition = reader.getCurrentKey();
                if (!partition.isValid())
                    partition.set(inputMBR);

                Iterable<Shape> shapes = reader.getCurrentValue();

                for (Shape shape : shapes) {
                    Rectangle shapeMBR = shape.getMBR();
                    if (shapeMBR == null)
                        continue;
                    java.awt.Rectangle overlappingCells = bottomGrid.getOverlappingCells(shapeMBR);
                    // Iterate over levels from bottom up
                    for (key.level = maxLevel; key.level >= minLevel; key.level--) {
                        for (key.x = overlappingCells.x; key.x < overlappingCells.x
                                + overlappingCells.width; key.x++) {
                            for (key.y = overlappingCells.y; key.y < overlappingCells.y
                                    + overlappingCells.height; key.y++) {
                                Canvas canvas = canvases.get(key);
                                if (canvas == null) {
                                    Rectangle tileMBR = new Rectangle();
                                    int gridSize = 1 << key.level;
                                    tileMBR.x1 = (inputMBR.x1 * (gridSize - key.x) + inputMBR.x2 * key.x)
                                            / gridSize;
                                    tileMBR.x2 = (inputMBR.x1 * (gridSize - (key.x + 1))
                                            + inputMBR.x2 * (key.x + 1)) / gridSize;
                                    tileMBR.y1 = (inputMBR.y1 * (gridSize - key.y) + inputMBR.y2 * key.y)
                                            / gridSize;
                                    tileMBR.y2 = (inputMBR.y1 * (gridSize - (key.y + 1))
                                            + inputMBR.y2 * (key.y + 1)) / gridSize;
                                    canvas = plotter.createCanvas(tileWidth, tileHeight, tileMBR);
                                    canvases.put(key.clone(), canvas);
                                }
                                plotter.plot(canvas, shape);
                            }
                        }
                        // Update overlappingCells for the higher level
                        int updatedX1 = overlappingCells.x / 2;
                        int updatedY1 = overlappingCells.y / 2;
                        int updatedX2 = (overlappingCells.x + overlappingCells.width - 1) / 2;
                        int updatedY2 = (overlappingCells.y + overlappingCells.height - 1) / 2;
                        overlappingCells.x = updatedX1;
                        overlappingCells.y = updatedY1;
                        overlappingCells.width = updatedX2 - updatedX1 + 1;
                        overlappingCells.height = updatedY2 - updatedY1 + 1;
                    }
                }
            }
            reader.close();
        }

        // Done with all splits. Write output to disk
        LOG.info("Done with plotting. Now writing the output");
        final FileSystem outFS = outPath.getFileSystem(params);

        LOG.info("Writing default empty image");
        // Write a default empty image to be displayed for non-generated tiles
        BufferedImage emptyImg = new BufferedImage(tileWidth, tileHeight, BufferedImage.TYPE_INT_ARGB);
        Graphics2D g = new SimpleGraphics(emptyImg);
        g.setBackground(new Color(0, 0, 0, 0));
        g.clearRect(0, 0, tileWidth, tileHeight);
        g.dispose();

        // Write HTML file to browse the mutlielvel image
        OutputStream out = outFS.create(new Path(outPath, "default.png"));
        ImageIO.write(emptyImg, "png", out);
        out.close();

        // Add an HTML file that visualizes the result using Google Maps
        LOG.info("Writing the HTML viewer file");
        LineReader templateFileReader = new LineReader(
                MultilevelPlot.class.getResourceAsStream("/zoom_view.html"));
        PrintStream htmlOut = new PrintStream(outFS.create(new Path(outPath, "index.html")));
        Text line = new Text();
        while (templateFileReader.readLine(line) > 0) {
            String lineStr = line.toString();
            lineStr = lineStr.replace("#{TILE_WIDTH}", Integer.toString(tileWidth));
            lineStr = lineStr.replace("#{TILE_HEIGHT}", Integer.toString(tileHeight));
            lineStr = lineStr.replace("#{MAX_ZOOM}", Integer.toString(maxLevel));
            lineStr = lineStr.replace("#{MIN_ZOOM}", Integer.toString(minLevel));
            lineStr = lineStr.replace("#{TILE_URL}",
                    "'tile-' + zoom + '-' + coord.x + '-' + coord.y + '" + extension + "'");

            htmlOut.println(lineStr);
        }
        templateFileReader.close();
        htmlOut.close();

        // Write the tiles
        final Entry<TileIndex, Canvas>[] entries = canvases.entrySet().toArray(new Map.Entry[canvases.size()]);
        // Clear the hash map to save memory as it is no longer needed
        canvases.clear();
        int parallelism = params.getInt("parallel", Runtime.getRuntime().availableProcessors());
        Parallel.forEach(entries.length, new RunnableRange<Object>() {
            @Override
            public Object run(int i1, int i2) {
                boolean output = params.getBoolean("output", true);
                try {
                    Plotter plotter = plotterClass.newInstance();
                    plotter.configure(params);
                    for (int i = i1; i < i2; i++) {
                        Map.Entry<TileIndex, Canvas> entry = entries[i];
                        TileIndex key = entry.getKey();
                        if (vflip)
                            key.y = ((1 << key.level) - 1) - key.y;

                        Path imagePath = new Path(outPath, key.getImageFileName() + extension);
                        // Write this tile to an image
                        DataOutputStream outFile = output ? outFS.create(imagePath)
                                : new DataOutputStream(new NullOutputStream());
                        plotter.writeImage(entry.getValue(), outFile, vflip);
                        outFile.close();

                        // Remove entry to allows GC to collect it
                        entries[i] = null;
                    }
                    return null;
                } catch (InstantiationException e) {
                    e.printStackTrace();
                } catch (IllegalAccessException e) {
                    e.printStackTrace();
                } catch (IOException e) {
                    e.printStackTrace();
                }
                return null;
            }
        }, parallelism);
    } catch (InstantiationException e) {
        throw new RuntimeException("Error creating rastierizer", e);
    } catch (IllegalAccessException e) {
        throw new RuntimeException("Error creating rastierizer", e);
    }
}

From source file:edu.umn.cs.spatialHadoop.visualization.SingleLevelPlot.java

License:Open Source License

public static void plotLocal(Path[] inFiles, Path outFile, final Class<? extends Plotter> plotterClass,
        final OperationsParams params) throws IOException, InterruptedException {
    OperationsParams mbrParams = new OperationsParams(params);
    mbrParams.setBoolean("background", false);
    final Rectangle inputMBR = params.get(InputMBR) != null ? params.getShape("mbr").getMBR()
            : FileMBR.fileMBR(inFiles, mbrParams);
    if (params.get(InputMBR) == null)
        OperationsParams.setShape(params, InputMBR, inputMBR);

    // Retrieve desired output image size and keep aspect ratio if needed
    int width = params.getInt("width", 1000);
    int height = params.getInt("height", 1000);
    if (params.getBoolean("keepratio", true)) {
        // Adjust width and height to maintain aspect ratio and store the adjusted
        // values back in params in case the caller needs to retrieve them
        if (inputMBR.getWidth() / inputMBR.getHeight() > (double) width / height)
            params.setInt("height", height = (int) (inputMBR.getHeight() * width / inputMBR.getWidth()));
        else/*from   w  w w  .  j a  v a2  s  .c  om*/
            params.setInt("width", width = (int) (inputMBR.getWidth() * height / inputMBR.getHeight()));
    }
    // Store width and height in final variables to make them accessible in parallel
    final int fwidth = width, fheight = height;

    // Start reading input file
    List<InputSplit> splits = new ArrayList<InputSplit>();
    final SpatialInputFormat3<Rectangle, Shape> inputFormat = new SpatialInputFormat3<Rectangle, Shape>();
    for (Path inFile : inFiles) {
        FileSystem inFs = inFile.getFileSystem(params);
        if (!OperationsParams.isWildcard(inFile) && inFs.exists(inFile) && !inFs.isDirectory(inFile)) {
            if (SpatialSite.NonHiddenFileFilter.accept(inFile)) {
                // Use the normal input format splitter to add this non-hidden file
                Job job = Job.getInstance(params);
                SpatialInputFormat3.addInputPath(job, inFile);
                splits.addAll(inputFormat.getSplits(job));
            } else {
                // A hidden file, add it immediately as one split
                // This is useful if the input is a hidden file which is automatically
                // skipped by FileInputFormat. We need to plot a hidden file for the case
                // of plotting partition boundaries of a spatial index
                splits.add(new FileSplit(inFile, 0, inFs.getFileStatus(inFile).getLen(), new String[0]));
            }
        } else {
            // Use the normal input format splitter to add this non-hidden file
            Job job = Job.getInstance(params);
            SpatialInputFormat3.addInputPath(job, inFile);
            splits.addAll(inputFormat.getSplits(job));
        }
    }

    // Copy splits to a final array to be used in parallel
    final FileSplit[] fsplits = splits.toArray(new FileSplit[splits.size()]);
    int parallelism = params.getInt("parallel", Runtime.getRuntime().availableProcessors());
    List<Canvas> partialCanvases = Parallel.forEach(fsplits.length, new RunnableRange<Canvas>() {
        @Override
        public Canvas run(int i1, int i2) {
            Plotter plotter;
            try {
                plotter = plotterClass.newInstance();
            } catch (InstantiationException e) {
                throw new RuntimeException("Error creating rastierizer", e);
            } catch (IllegalAccessException e) {
                throw new RuntimeException("Error creating rastierizer", e);
            }
            plotter.configure(params);
            // Create the partial layer that will contain the plot of the assigned partitions
            Canvas partialCanvas = plotter.createCanvas(fwidth, fheight, inputMBR);

            for (int i = i1; i < i2; i++) {
                try {
                    RecordReader<Rectangle, Iterable<Shape>> reader = inputFormat.createRecordReader(fsplits[i],
                            null);
                    if (reader instanceof SpatialRecordReader3) {
                        ((SpatialRecordReader3) reader).initialize(fsplits[i], params);
                    } else if (reader instanceof RTreeRecordReader3) {
                        ((RTreeRecordReader3) reader).initialize(fsplits[i], params);
                    } else if (reader instanceof HDFRecordReader) {
                        ((HDFRecordReader) reader).initialize(fsplits[i], params);
                    } else {
                        throw new RuntimeException("Unknown record reader");
                    }

                    while (reader.nextKeyValue()) {
                        Rectangle partition = reader.getCurrentKey();
                        if (!partition.isValid())
                            partition.set(inputMBR);

                        Iterable<Shape> shapes = reader.getCurrentValue();
                        // Run the plot step
                        plotter.plot(partialCanvas, plotter.isSmooth() ? plotter.smooth(shapes) : shapes);
                    }
                    reader.close();
                } catch (IOException e) {
                    throw new RuntimeException("Error reading the file ", e);
                } catch (InterruptedException e) {
                    throw new RuntimeException("Interrupt error ", e);
                }
            }
            return partialCanvas;
        }
    }, parallelism);
    boolean merge = params.getBoolean("merge", true);
    Plotter plotter;
    try {
        plotter = plotterClass.newInstance();
        plotter.configure(params);
    } catch (InstantiationException e) {
        throw new RuntimeException("Error creating plotter", e);
    } catch (IllegalAccessException e) {
        throw new RuntimeException("Error creating plotter", e);
    }

    // Whether we should vertically flip the final image or not
    boolean vflip = params.getBoolean("vflip", true);
    if (merge) {
        LOG.info("Merging " + partialCanvases.size() + " partial canvases");
        // Create the final canvas that will contain the final image
        Canvas finalCanvas = plotter.createCanvas(fwidth, fheight, inputMBR);
        for (Canvas partialCanvas : partialCanvases)
            plotter.merge(finalCanvas, partialCanvas);

        // Finally, write the resulting image to the given output path
        LOG.info("Writing final image");
        FileSystem outFs = outFile.getFileSystem(params);
        FSDataOutputStream outputFile = outFs.create(outFile);

        plotter.writeImage(finalCanvas, outputFile, vflip);
        outputFile.close();
    } else {
        // No merge
        LOG.info("Writing partial images");
        FileSystem outFs = outFile.getFileSystem(params);
        for (int i = 0; i < partialCanvases.size(); i++) {
            Path filename = new Path(outFile, String.format("part-%05d.png", i));
            FSDataOutputStream outputFile = outFs.create(filename);

            plotter.writeImage(partialCanvases.get(i), outputFile, vflip);
            outputFile.close();
        }
    }
}

From source file:edu.umn.cs.sthadoop.operations.HSPKNNQ.java

License:Open Source License

private static <S extends Shape> long knnLocal(Path inFile, Path outPath, OperationsParams params)
        throws IOException, InterruptedException {
    int iterations = 0;
    FileSystem fs = inFile.getFileSystem(params);
    Point queryPoint = (Point) OperationsParams.getShape(params, "point");
    int k = params.getInt("k", 1);
    // Top-k objects are retained in this object
    PriorityQueue<ShapeWithDistance<S>> knn = new KNNObjects<ShapeWithDistance<S>>(k);

    SpatialInputFormat3<Rectangle, Shape> inputFormat = new SpatialInputFormat3<Rectangle, Shape>();

    final GlobalIndex<Partition> gIndex = SpatialSite.getGlobalIndex(fs, inFile);
    double kthDistance = Double.MAX_VALUE;
    if (gIndex != null) {
        // There is a global index, use it
        PriorityQueue<ShapeWithDistance<Partition>> partitionsToProcess = new PriorityQueue<HSPKNNQ.ShapeWithDistance<Partition>>() {
            {/*w  ww .  j av a 2s.c  o m*/
                initialize(gIndex.size());
            }

            @Override
            protected boolean lessThan(Object a, Object b) {
                return ((ShapeWithDistance<Partition>) a).distance < ((ShapeWithDistance<Partition>) b).distance;
            }
        };
        for (Partition p : gIndex) {
            double distance = p.getMinDistanceTo(queryPoint.x, queryPoint.y);
            partitionsToProcess.insert(new ShapeWithDistance<Partition>(p.clone(), distance));
        }

        while (partitionsToProcess.size() > 0 && partitionsToProcess.top().distance <= kthDistance) {

            ShapeWithDistance<Partition> partitionToProcess = partitionsToProcess.pop();
            // Process this partition
            Path partitionPath = new Path(inFile, partitionToProcess.shape.filename);
            long length = fs.getFileStatus(partitionPath).getLen();
            FileSplit fsplit = new FileSplit(partitionPath, 0, length, new String[0]);
            RecordReader<Rectangle, Iterable<Shape>> reader = inputFormat.createRecordReader(fsplit, null);
            if (reader instanceof SpatialRecordReader3) {
                ((SpatialRecordReader3) reader).initialize(fsplit, params);
            } else if (reader instanceof RTreeRecordReader3) {
                ((RTreeRecordReader3) reader).initialize(fsplit, params);
            } else if (reader instanceof HDFRecordReader) {
                ((HDFRecordReader) reader).initialize(fsplit, params);
            } else {
                throw new RuntimeException("Unknown record reader");
            }
            iterations++;

            while (reader.nextKeyValue()) {
                Iterable<Shape> shapes = reader.getCurrentValue();
                for (Shape shape : shapes) {
                    double distance = shape.distanceTo(queryPoint.x, queryPoint.y);
                    if (distance <= kthDistance)
                        knn.insert(new ShapeWithDistance<S>((S) shape.clone(), distance));
                }
            }
            reader.close();

            if (knn.size() >= k)
                kthDistance = knn.top().distance;
        }
    } else {
        // No global index, have to scan the whole file
        Job job = new Job(params);
        SpatialInputFormat3.addInputPath(job, inFile);
        List<InputSplit> splits = inputFormat.getSplits(job);

        for (InputSplit split : splits) {
            RecordReader<Rectangle, Iterable<Shape>> reader = inputFormat.createRecordReader(split, null);
            if (reader instanceof SpatialRecordReader3) {
                ((SpatialRecordReader3) reader).initialize(split, params);
            } else if (reader instanceof RTreeRecordReader3) {
                ((RTreeRecordReader3) reader).initialize(split, params);
            } else if (reader instanceof HDFRecordReader) {
                ((HDFRecordReader) reader).initialize(split, params);
            } else {
                throw new RuntimeException("Unknown record reader");
            }
            iterations++;

            while (reader.nextKeyValue()) {
                Iterable<Shape> shapes = reader.getCurrentValue();
                for (Shape shape : shapes) {
                    double distance = shape.distanceTo(queryPoint.x, queryPoint.y);
                    knn.insert(new ShapeWithDistance<S>((S) shape.clone(), distance));
                }
            }

            reader.close();
        }
        if (knn.size() >= k)
            kthDistance = knn.top().distance;
    }
    long resultCount = knn.size();
    if (outPath != null && params.getBoolean("output", true)) {
        FileSystem outFS = outPath.getFileSystem(params);
        PrintStream ps = new PrintStream(outFS.create(outPath));
        Vector<ShapeWithDistance<S>> resultsOrdered = new Vector<ShapeWithDistance<S>>((int) resultCount);
        resultsOrdered.setSize((int) resultCount);
        while (knn.size() > 0) {
            ShapeWithDistance<S> nextAnswer = knn.pop();
            resultsOrdered.set(knn.size(), nextAnswer);
        }

        Text text = new Text();
        for (ShapeWithDistance<S> answer : resultsOrdered) {
            text.clear();
            TextSerializerHelper.serializeDouble(answer.distance, text, ',');
            answer.shape.toText(text);
            ps.println(text);
        }
        ps.close();
    }
    TotalIterations.addAndGet(iterations);
    return resultCount;

}

From source file:edu.umn.cs.sthadoop.operations.STRangeQuery.java

License:Open Source License

/**
 * Runs a range query on the local machine (no MapReduce) and the output is
 * streamed to the provided result collector. The query might run in
 * parallel which makes it necessary to design the result collector to
 * accept parallel calls to the method/*w  w w  .  j av  a2s  .co m*/
 * {@link ResultCollector#collect(Object)}. You can use
 * {@link ResultCollectorSynchronizer} to synchronize calls to your
 * ResultCollector if you cannot design yours to be thread safe.
 * 
 * @param inPath
 * @param queryRange
 * @param shape
 * @param params
 * @param output
 * @return
 * @throws IOException
 * @throws InterruptedException
 */
public static <S extends Shape> long rangeQueryLocal(Path inPath, final Shape queryRange, final S shape,
        final OperationsParams params, final ResultCollector<S> output)
        throws IOException, InterruptedException {
    // Set MBR of query shape in job configuration to work with the spatial
    // filter
    OperationsParams.setShape(params, SpatialInputFormat3.InputQueryRange, queryRange.getMBR());
    // 1- Split the input path/file to get splits that can be processed
    // independently
    final SpatialInputFormat3<Rectangle, S> inputFormat = new SpatialInputFormat3<Rectangle, S>();
    Job job = Job.getInstance(params);
    SpatialInputFormat3.setInputPaths(job, inPath);
    final List<InputSplit> splits = inputFormat.getSplits(job);

    // 2- Process splits in parallel
    List<Long> results = Parallel.forEach(splits.size(), new RunnableRange<Long>() {
        @Override
        public Long run(int i1, int i2) {
            long results = 0;
            for (int i = i1; i < i2; i++) {
                try {
                    FileSplit fsplit = (FileSplit) splits.get(i);
                    final RecordReader<Rectangle, Iterable<S>> reader = inputFormat.createRecordReader(fsplit,
                            null);
                    if (reader instanceof SpatialRecordReader3) {
                        ((SpatialRecordReader3) reader).initialize(fsplit, params);
                    } else if (reader instanceof RTreeRecordReader3) {
                        ((RTreeRecordReader3) reader).initialize(fsplit, params);
                    } else if (reader instanceof HDFRecordReader) {
                        ((HDFRecordReader) reader).initialize(fsplit, params);
                    } else {
                        throw new RuntimeException("Unknown record reader");
                    }
                    while (reader.nextKeyValue()) {
                        Iterable<S> shapes = reader.getCurrentValue();
                        for (Shape s : shapes) {
                            results++;
                            if (output != null)
                                output.collect((S) s);
                        }
                    }
                    reader.close();
                } catch (IOException e) {
                    LOG.error("Error processing split " + splits.get(i), e);
                } catch (InterruptedException e) {
                    LOG.error("Error processing split " + splits.get(i), e);
                }
            }
            return results;
        }
    });
    long totalResultSize = 0;
    for (long result : results)
        totalResultSize += result;
    return totalResultSize;
}

From source file:edu.umn.cs.sthadoop.trajectory.KNNDTW.java

License:Open Source License

private static <S extends Shape> long knnLocal(Path inFile, Path outPath, OperationsParams params)
        throws IOException, InterruptedException {
    int iterations = 0;
    FileSystem fs = inFile.getFileSystem(params);
    Point queryPoint = (Point) OperationsParams.getShape(params, "point");
    int k = params.getInt("k", 1);
    // Top-k objects are retained in this object
    PriorityQueue<ShapeWithDistance<S>> knn = new KNNObjects<ShapeWithDistance<S>>(k);

    SpatialInputFormat3<Rectangle, Shape> inputFormat = new SpatialInputFormat3<Rectangle, Shape>();

    final GlobalIndex<Partition> gIndex = SpatialSite.getGlobalIndex(fs, inFile);
    double kthDistance = Double.MAX_VALUE;
    if (gIndex != null) {
        // There is a global index, use it
        PriorityQueue<ShapeWithDistance<Partition>> partitionsToProcess = new PriorityQueue<KNNDTW.ShapeWithDistance<Partition>>() {
            {/*from  www . ja  v a  2 s .com*/
                initialize(gIndex.size());
            }

            @Override
            protected boolean lessThan(Object a, Object b) {
                return ((ShapeWithDistance<Partition>) a).distance < ((ShapeWithDistance<Partition>) b).distance;
            }
        };
        for (Partition p : gIndex) {
            double distance = p.getMinDistanceTo(queryPoint.x, queryPoint.y);
            partitionsToProcess.insert(new ShapeWithDistance<Partition>(p.clone(), distance));
        }

        while (partitionsToProcess.size() > 0 && partitionsToProcess.top().distance <= kthDistance) {

            ShapeWithDistance<Partition> partitionToProcess = partitionsToProcess.pop();
            // Process this partition
            Path partitionPath = new Path(inFile, partitionToProcess.shape.filename);
            long length = fs.getFileStatus(partitionPath).getLen();
            FileSplit fsplit = new FileSplit(partitionPath, 0, length, new String[0]);
            RecordReader<Rectangle, Iterable<Shape>> reader = inputFormat.createRecordReader(fsplit, null);
            if (reader instanceof SpatialRecordReader3) {
                ((SpatialRecordReader3) reader).initialize(fsplit, params);
            } else if (reader instanceof RTreeRecordReader3) {
                ((RTreeRecordReader3) reader).initialize(fsplit, params);
            } else if (reader instanceof HDFRecordReader) {
                ((HDFRecordReader) reader).initialize(fsplit, params);
            } else {
                throw new RuntimeException("Unknown record reader");
            }
            iterations++;

            while (reader.nextKeyValue()) {
                Iterable<Shape> shapes = reader.getCurrentValue();
                for (Shape shape : shapes) {
                    double distance = shape.distanceTo(queryPoint.x, queryPoint.y);
                    if (distance <= kthDistance)
                        knn.insert(new ShapeWithDistance<S>((S) shape.clone(), distance));
                }
            }
            reader.close();

            if (knn.size() >= k)
                kthDistance = knn.top().distance;
        }
    } else {
        // No global index, have to scan the whole file
        Job job = new Job(params);
        SpatialInputFormat3.addInputPath(job, inFile);
        List<InputSplit> splits = inputFormat.getSplits(job);

        for (InputSplit split : splits) {
            RecordReader<Rectangle, Iterable<Shape>> reader = inputFormat.createRecordReader(split, null);
            if (reader instanceof SpatialRecordReader3) {
                ((SpatialRecordReader3) reader).initialize(split, params);
            } else if (reader instanceof RTreeRecordReader3) {
                ((RTreeRecordReader3) reader).initialize(split, params);
            } else if (reader instanceof HDFRecordReader) {
                ((HDFRecordReader) reader).initialize(split, params);
            } else {
                throw new RuntimeException("Unknown record reader");
            }
            iterations++;

            while (reader.nextKeyValue()) {
                Iterable<Shape> shapes = reader.getCurrentValue();
                for (Shape shape : shapes) {
                    double distance = shape.distanceTo(queryPoint.x, queryPoint.y);
                    knn.insert(new ShapeWithDistance<S>((S) shape.clone(), distance));
                }
            }

            reader.close();
        }
        if (knn.size() >= k)
            kthDistance = knn.top().distance;
    }
    long resultCount = knn.size();
    if (outPath != null && params.getBoolean("output", true)) {
        FileSystem outFS = outPath.getFileSystem(params);
        PrintStream ps = new PrintStream(outFS.create(outPath));
        Vector<ShapeWithDistance<S>> resultsOrdered = new Vector<ShapeWithDistance<S>>((int) resultCount);
        resultsOrdered.setSize((int) resultCount);
        while (knn.size() > 0) {
            ShapeWithDistance<S> nextAnswer = knn.pop();
            resultsOrdered.set(knn.size(), nextAnswer);
        }

        Text text = new Text();
        for (ShapeWithDistance<S> answer : resultsOrdered) {
            text.clear();
            TextSerializerHelper.serializeDouble(answer.distance, text, ',');
            answer.shape.toText(text);
            ps.println(text);
        }
        ps.close();
    }
    TotalIterations.addAndGet(iterations);
    return resultCount;

}

From source file:eu.scape_project.tb.wc.archd.test.ARCTest.java

License:Apache License

/**
 * Test of nextKeyValue method, of class ArcRecordReader.
 */// w w w  . j  a  v a2s.c  o  m
public void testNextKeyValue() throws Exception {
    RecordReader<Text, ArcRecord> recordReader = myArcF.createRecordReader(split, tac);
    recordReader.initialize(split, tac);
    int start = 1;
    while (recordReader.nextKeyValue()) {
        Text currKey = recordReader.getCurrentKey();
        ArcRecord currValue = recordReader.getCurrentValue();

        String currMIMEType = currValue.getMimeType();
        String currType = currValue.getType();
        String currURL = currValue.getUrl();
        InputStream currStream = currValue.getContents();
        String currContent;
        String myContentString;
        int myContentStringIndex;
        Date currDate = currValue.getDate();
        int currHTTPrc = currValue.getHttpReturnCode();
        int currLength = currValue.getLength();

        System.out.println("KEY " + start + ": " + currKey + " MIME Type: " + currMIMEType + " Type: "
                + currType + " URL: " + currURL + " Date: " + currDate.toString() + " HTTPrc: " + currHTTPrc
                + " Length: " + currLength);

        // check example record 1 (first one and the header of the ARC file)
        if (start == 1) {
            //"myContentString" is arbitrary sting snipped of which we know that it exists in the content stream and of which we know the position in the stream.
            //We will search for the string int the content we read and compare it to the values we know.                
            currContent = content2String(currStream);
            myContentString = "defaultgz_orderxml";
            myContentStringIndex = currContent.indexOf(myContentString);
            //System.out.println("Search for: " + myContentString + "=> Index is: " + myContentStringIndex);

            assertEquals("ID not equal", "20130522085320/filedesc://3-2-20130522085320-00000-prepc2.arc",
                    currKey.toString());
            assertEquals("MIME Type not equal", "text/plain", currMIMEType);
            assertEquals("Response type not equal", "response", currType);
            assertEquals("URL not equal", "filedesc://3-2-20130522085320-00000-prepc2.arc", currURL);
            assertTrue("Date not correct", currDate.toString().startsWith("Wed May 22 08:53:20"));
            assertEquals("HTTPrc not equal", -1, currHTTPrc);
            assertEquals("Record length not equal", 1190, currLength);
            assertEquals("Content seems not to be correct", 531, myContentStringIndex);
        }
        start++;
    }
}

From source file:eu.scape_project.tb.wc.archd.test.WARCTest.java

License:Apache License

/**
 * Test of nextKeyValue method, of class ArcRecordReader.
 *///from w ww  .  j  av  a  2 s.  c om
public void testNextKeyValue() throws Exception {
    RecordReader<Text, ArcRecord> recordReader = myArcF.createRecordReader(split, tac);
    recordReader.initialize(split, tac);
    int start = 1;
    while (recordReader.nextKeyValue()) {
        Text currKey = recordReader.getCurrentKey();
        ArcRecord currValue = recordReader.getCurrentValue();

        String currMIMEType = currValue.getMimeType();
        String currType = currValue.getType();
        String currURL = currValue.getUrl();
        InputStream currStream = currValue.getContents();
        String currContent;
        String myContentString;
        int myContentStringIndex;
        Date currDate = currValue.getDate();
        int currHTTPrc = currValue.getHttpReturnCode();
        int currLength = currValue.getLength();

        System.out.println("KEY " + start + ": " + currKey + " MIME Type: " + currMIMEType + " Type: "
                + currType + " URL: " + currURL + " Date: " + currDate.toString() + " HTTPrc: " + currHTTPrc
                + " Length: " + currLength);

        // check example record 1 (first one and the header of the WARC file)
        if (start == 1) {
            //"myContentString" is arbitrary sting snipped of which we know that it exists in the content stream and of which we know the position in the stream.
            //We will search for the string int the content we read and compare it to the values we know.                
            currContent = content2String(currStream);
            myContentString = "isPartOf: basic";
            myContentStringIndex = currContent.indexOf(myContentString);
            //System.out.println("Search for: " + myContentString + "=> Index is: " + myContentStringIndex);

            assertEquals("ID not equal", "<urn:uuid:18cfb53d-1c89-4cc6-863f-e5535d430c95>", currKey.toString());
            assertEquals("MIME Type not equal", "application/warc-fields", currMIMEType);
            assertEquals("Response type not equal", "warcinfo", currType);
            assertEquals("URL not equal", null, currURL);
            assertTrue("Date not correct", currDate.toString().startsWith("Wed May 22 12:27:40"));
            assertEquals("HTTPrc not equal", -1, currHTTPrc);
            assertEquals("Record length not equal", 374, currLength);
            assertEquals("Content mismatch", 202, myContentStringIndex);
        }
        start++;
    }
}

From source file:gobblin.runtime.mapreduce.GobblinWorkUnitsInputFormatTest.java

License:Apache License

@Test
public void testRecordReader() throws Exception {

    List<String> paths = Lists.newArrayList("/path1", "/path2");
    GobblinWorkUnitsInputFormat.GobblinSplit split = new GobblinWorkUnitsInputFormat.GobblinSplit(paths);

    GobblinWorkUnitsInputFormat inputFormat = new GobblinWorkUnitsInputFormat();
    RecordReader<LongWritable, Text> recordReader = inputFormat.createRecordReader(split,
            new TaskAttemptContextImpl(new Configuration(), new TaskAttemptID("a", 1, TaskType.MAP, 1, 1)));

    recordReader.nextKeyValue();//from   ww w. j  a  va 2  s. c o m
    Assert.assertEquals(recordReader.getCurrentKey().get(), 0);
    Assert.assertEquals(recordReader.getCurrentValue().toString(), "/path1");

    recordReader.nextKeyValue();
    Assert.assertEquals(recordReader.getCurrentKey().get(), 1);
    Assert.assertEquals(recordReader.getCurrentValue().toString(), "/path2");

    Assert.assertFalse(recordReader.nextKeyValue());

}

From source file:io.druid.data.input.orc.DruidOrcInputFormatTest.java

License:Apache License

@Test
public void testRead() throws IOException, InterruptedException {
    InputFormat inputFormat = ReflectionUtils.newInstance(OrcNewInputFormat.class, job.getConfiguration());

    TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID());
    RecordReader reader = inputFormat.createRecordReader(split, context);
    OrcHadoopInputRowParser parser = (OrcHadoopInputRowParser) config.getParser();

    reader.initialize(split, context);//from  w  w w .j av a  2s .co m

    reader.nextKeyValue();

    OrcStruct data = (OrcStruct) reader.getCurrentValue();

    MapBasedInputRow row = (MapBasedInputRow) parser.parse(data);

    Assert.assertTrue(row.getEvent().keySet().size() == 4);
    Assert.assertEquals(new DateTime(timestamp), row.getTimestamp());
    Assert.assertEquals(parser.getParseSpec().getDimensionsSpec().getDimensionNames(), row.getDimensions());
    Assert.assertEquals(col1, row.getEvent().get("col1"));
    Assert.assertEquals(Arrays.asList(col2), row.getDimension("col2"));

    reader.close();
}

From source file:io.druid.data.input.parquet.DruidParquetInputFormatTest.java

License:Apache License

@Test
public void test() throws IOException, InterruptedException {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf);/*w  w  w. j a v a  2  s  .  c om*/

    HadoopDruidIndexerConfig config = HadoopDruidIndexerConfig
            .fromFile(new File("example/wikipedia_hadoop_parquet_job.json"));

    config.intoConfiguration(job);

    File testFile = new File("example/wikipedia_list.parquet");
    Path path = new Path(testFile.getAbsoluteFile().toURI());
    FileSplit split = new FileSplit(path, 0, testFile.length(), null);

    InputFormat inputFormat = ReflectionUtils.newInstance(DruidParquetInputFormat.class,
            job.getConfiguration());

    TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID());
    RecordReader reader = inputFormat.createRecordReader(split, context);

    reader.initialize(split, context);

    reader.nextKeyValue();

    GenericRecord data = (GenericRecord) reader.getCurrentValue();

    // field not read, should return null
    assertEquals(data.get("added"), null);

    assertEquals(data.get("page"), new Utf8("Gypsy Danger"));

    reader.close();
}