Example usage for org.apache.hadoop.mapreduce RecordReader close

List of usage examples for org.apache.hadoop.mapreduce RecordReader close

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce RecordReader close.

Prototype

public abstract void close() throws IOException;

Source Link

Document

Close the record reader.

Usage

From source file:edu.umn.cs.spatialHadoop.visualization.MultilevelPlot.java

License:Open Source License

private static void plotLocal(Path[] inFiles, final Path outPath, final Class<? extends Plotter> plotterClass,
        final OperationsParams params) throws IOException, InterruptedException, ClassNotFoundException {
    final boolean vflip = params.getBoolean("vflip", true);

    OperationsParams mbrParams = new OperationsParams(params);
    mbrParams.setBoolean("background", false);
    final Rectangle inputMBR = params.get("mbr") != null ? params.getShape("mbr").getMBR()
            : FileMBR.fileMBR(inFiles, mbrParams);
    OperationsParams.setShape(params, InputMBR, inputMBR);

    // Retrieve desired output image size and keep aspect ratio if needed
    int tileWidth = params.getInt("tilewidth", 256);
    int tileHeight = params.getInt("tileheight", 256);
    // Adjust width and height if aspect ratio is to be kept
    if (params.getBoolean("keepratio", true)) {
        // Expand input file to a rectangle for compatibility with the pyramid
        // structure
        if (inputMBR.getWidth() > inputMBR.getHeight()) {
            inputMBR.y1 -= (inputMBR.getWidth() - inputMBR.getHeight()) / 2;
            inputMBR.y2 = inputMBR.y1 + inputMBR.getWidth();
        } else {/*w  ww.ja v  a 2 s.co m*/
            inputMBR.x1 -= (inputMBR.getHeight() - inputMBR.getWidth()) / 2;
            inputMBR.x2 = inputMBR.x1 + inputMBR.getHeight();
        }
    }

    String outFName = outPath.getName();
    int extensionStart = outFName.lastIndexOf('.');
    final String extension = extensionStart == -1 ? ".png" : outFName.substring(extensionStart);

    // Start reading input file
    Vector<InputSplit> splits = new Vector<InputSplit>();
    final SpatialInputFormat3<Rectangle, Shape> inputFormat = new SpatialInputFormat3<Rectangle, Shape>();
    for (Path inFile : inFiles) {
        FileSystem inFs = inFile.getFileSystem(params);
        if (!OperationsParams.isWildcard(inFile) && inFs.exists(inFile) && !inFs.isDirectory(inFile)) {
            if (SpatialSite.NonHiddenFileFilter.accept(inFile)) {
                // Use the normal input format splitter to add this non-hidden file
                Job job = Job.getInstance(params);
                SpatialInputFormat3.addInputPath(job, inFile);
                splits.addAll(inputFormat.getSplits(job));
            } else {
                // A hidden file, add it immediately as one split
                // This is useful if the input is a hidden file which is automatically
                // skipped by FileInputFormat. We need to plot a hidden file for the case
                // of plotting partition boundaries of a spatial index
                splits.add(new FileSplit(inFile, 0, inFs.getFileStatus(inFile).getLen(), new String[0]));
            }
        } else {
            Job job = Job.getInstance(params);
            SpatialInputFormat3.addInputPath(job, inFile);
            splits.addAll(inputFormat.getSplits(job));
        }
    }

    try {
        Plotter plotter = plotterClass.newInstance();
        plotter.configure(params);

        String[] strLevels = params.get("levels", "7").split("\\.\\.");
        int minLevel, maxLevel;
        if (strLevels.length == 1) {
            minLevel = 0;
            maxLevel = Integer.parseInt(strLevels[0]);
        } else {
            minLevel = Integer.parseInt(strLevels[0]);
            maxLevel = Integer.parseInt(strLevels[1]);
        }

        GridInfo bottomGrid = new GridInfo(inputMBR.x1, inputMBR.y1, inputMBR.x2, inputMBR.y2);
        bottomGrid.rows = bottomGrid.columns = 1 << maxLevel;

        TileIndex key = new TileIndex();

        // All canvases in the pyramid, one per tile
        Map<TileIndex, Canvas> canvases = new HashMap<TileIndex, Canvas>();
        for (InputSplit split : splits) {
            FileSplit fsplit = (FileSplit) split;
            RecordReader<Rectangle, Iterable<Shape>> reader = inputFormat.createRecordReader(fsplit, null);
            if (reader instanceof SpatialRecordReader3) {
                ((SpatialRecordReader3) reader).initialize(fsplit, params);
            } else if (reader instanceof RTreeRecordReader3) {
                ((RTreeRecordReader3) reader).initialize(fsplit, params);
            } else if (reader instanceof HDFRecordReader) {
                ((HDFRecordReader) reader).initialize(fsplit, params);
            } else {
                throw new RuntimeException("Unknown record reader");
            }

            while (reader.nextKeyValue()) {
                Rectangle partition = reader.getCurrentKey();
                if (!partition.isValid())
                    partition.set(inputMBR);

                Iterable<Shape> shapes = reader.getCurrentValue();

                for (Shape shape : shapes) {
                    Rectangle shapeMBR = shape.getMBR();
                    if (shapeMBR == null)
                        continue;
                    java.awt.Rectangle overlappingCells = bottomGrid.getOverlappingCells(shapeMBR);
                    // Iterate over levels from bottom up
                    for (key.level = maxLevel; key.level >= minLevel; key.level--) {
                        for (key.x = overlappingCells.x; key.x < overlappingCells.x
                                + overlappingCells.width; key.x++) {
                            for (key.y = overlappingCells.y; key.y < overlappingCells.y
                                    + overlappingCells.height; key.y++) {
                                Canvas canvas = canvases.get(key);
                                if (canvas == null) {
                                    Rectangle tileMBR = new Rectangle();
                                    int gridSize = 1 << key.level;
                                    tileMBR.x1 = (inputMBR.x1 * (gridSize - key.x) + inputMBR.x2 * key.x)
                                            / gridSize;
                                    tileMBR.x2 = (inputMBR.x1 * (gridSize - (key.x + 1))
                                            + inputMBR.x2 * (key.x + 1)) / gridSize;
                                    tileMBR.y1 = (inputMBR.y1 * (gridSize - key.y) + inputMBR.y2 * key.y)
                                            / gridSize;
                                    tileMBR.y2 = (inputMBR.y1 * (gridSize - (key.y + 1))
                                            + inputMBR.y2 * (key.y + 1)) / gridSize;
                                    canvas = plotter.createCanvas(tileWidth, tileHeight, tileMBR);
                                    canvases.put(key.clone(), canvas);
                                }
                                plotter.plot(canvas, shape);
                            }
                        }
                        // Update overlappingCells for the higher level
                        int updatedX1 = overlappingCells.x / 2;
                        int updatedY1 = overlappingCells.y / 2;
                        int updatedX2 = (overlappingCells.x + overlappingCells.width - 1) / 2;
                        int updatedY2 = (overlappingCells.y + overlappingCells.height - 1) / 2;
                        overlappingCells.x = updatedX1;
                        overlappingCells.y = updatedY1;
                        overlappingCells.width = updatedX2 - updatedX1 + 1;
                        overlappingCells.height = updatedY2 - updatedY1 + 1;
                    }
                }
            }
            reader.close();
        }

        // Done with all splits. Write output to disk
        LOG.info("Done with plotting. Now writing the output");
        final FileSystem outFS = outPath.getFileSystem(params);

        LOG.info("Writing default empty image");
        // Write a default empty image to be displayed for non-generated tiles
        BufferedImage emptyImg = new BufferedImage(tileWidth, tileHeight, BufferedImage.TYPE_INT_ARGB);
        Graphics2D g = new SimpleGraphics(emptyImg);
        g.setBackground(new Color(0, 0, 0, 0));
        g.clearRect(0, 0, tileWidth, tileHeight);
        g.dispose();

        // Write HTML file to browse the mutlielvel image
        OutputStream out = outFS.create(new Path(outPath, "default.png"));
        ImageIO.write(emptyImg, "png", out);
        out.close();

        // Add an HTML file that visualizes the result using Google Maps
        LOG.info("Writing the HTML viewer file");
        LineReader templateFileReader = new LineReader(
                MultilevelPlot.class.getResourceAsStream("/zoom_view.html"));
        PrintStream htmlOut = new PrintStream(outFS.create(new Path(outPath, "index.html")));
        Text line = new Text();
        while (templateFileReader.readLine(line) > 0) {
            String lineStr = line.toString();
            lineStr = lineStr.replace("#{TILE_WIDTH}", Integer.toString(tileWidth));
            lineStr = lineStr.replace("#{TILE_HEIGHT}", Integer.toString(tileHeight));
            lineStr = lineStr.replace("#{MAX_ZOOM}", Integer.toString(maxLevel));
            lineStr = lineStr.replace("#{MIN_ZOOM}", Integer.toString(minLevel));
            lineStr = lineStr.replace("#{TILE_URL}",
                    "'tile-' + zoom + '-' + coord.x + '-' + coord.y + '" + extension + "'");

            htmlOut.println(lineStr);
        }
        templateFileReader.close();
        htmlOut.close();

        // Write the tiles
        final Entry<TileIndex, Canvas>[] entries = canvases.entrySet().toArray(new Map.Entry[canvases.size()]);
        // Clear the hash map to save memory as it is no longer needed
        canvases.clear();
        int parallelism = params.getInt("parallel", Runtime.getRuntime().availableProcessors());
        Parallel.forEach(entries.length, new RunnableRange<Object>() {
            @Override
            public Object run(int i1, int i2) {
                boolean output = params.getBoolean("output", true);
                try {
                    Plotter plotter = plotterClass.newInstance();
                    plotter.configure(params);
                    for (int i = i1; i < i2; i++) {
                        Map.Entry<TileIndex, Canvas> entry = entries[i];
                        TileIndex key = entry.getKey();
                        if (vflip)
                            key.y = ((1 << key.level) - 1) - key.y;

                        Path imagePath = new Path(outPath, key.getImageFileName() + extension);
                        // Write this tile to an image
                        DataOutputStream outFile = output ? outFS.create(imagePath)
                                : new DataOutputStream(new NullOutputStream());
                        plotter.writeImage(entry.getValue(), outFile, vflip);
                        outFile.close();

                        // Remove entry to allows GC to collect it
                        entries[i] = null;
                    }
                    return null;
                } catch (InstantiationException e) {
                    e.printStackTrace();
                } catch (IllegalAccessException e) {
                    e.printStackTrace();
                } catch (IOException e) {
                    e.printStackTrace();
                }
                return null;
            }
        }, parallelism);
    } catch (InstantiationException e) {
        throw new RuntimeException("Error creating rastierizer", e);
    } catch (IllegalAccessException e) {
        throw new RuntimeException("Error creating rastierizer", e);
    }
}

From source file:edu.umn.cs.spatialHadoop.visualization.SingleLevelPlot.java

License:Open Source License

public static void plotLocal(Path[] inFiles, Path outFile, final Class<? extends Plotter> plotterClass,
        final OperationsParams params) throws IOException, InterruptedException {
    OperationsParams mbrParams = new OperationsParams(params);
    mbrParams.setBoolean("background", false);
    final Rectangle inputMBR = params.get(InputMBR) != null ? params.getShape("mbr").getMBR()
            : FileMBR.fileMBR(inFiles, mbrParams);
    if (params.get(InputMBR) == null)
        OperationsParams.setShape(params, InputMBR, inputMBR);

    // Retrieve desired output image size and keep aspect ratio if needed
    int width = params.getInt("width", 1000);
    int height = params.getInt("height", 1000);
    if (params.getBoolean("keepratio", true)) {
        // Adjust width and height to maintain aspect ratio and store the adjusted
        // values back in params in case the caller needs to retrieve them
        if (inputMBR.getWidth() / inputMBR.getHeight() > (double) width / height)
            params.setInt("height", height = (int) (inputMBR.getHeight() * width / inputMBR.getWidth()));
        else/*www .  jav a  2s  .  c  o  m*/
            params.setInt("width", width = (int) (inputMBR.getWidth() * height / inputMBR.getHeight()));
    }
    // Store width and height in final variables to make them accessible in parallel
    final int fwidth = width, fheight = height;

    // Start reading input file
    List<InputSplit> splits = new ArrayList<InputSplit>();
    final SpatialInputFormat3<Rectangle, Shape> inputFormat = new SpatialInputFormat3<Rectangle, Shape>();
    for (Path inFile : inFiles) {
        FileSystem inFs = inFile.getFileSystem(params);
        if (!OperationsParams.isWildcard(inFile) && inFs.exists(inFile) && !inFs.isDirectory(inFile)) {
            if (SpatialSite.NonHiddenFileFilter.accept(inFile)) {
                // Use the normal input format splitter to add this non-hidden file
                Job job = Job.getInstance(params);
                SpatialInputFormat3.addInputPath(job, inFile);
                splits.addAll(inputFormat.getSplits(job));
            } else {
                // A hidden file, add it immediately as one split
                // This is useful if the input is a hidden file which is automatically
                // skipped by FileInputFormat. We need to plot a hidden file for the case
                // of plotting partition boundaries of a spatial index
                splits.add(new FileSplit(inFile, 0, inFs.getFileStatus(inFile).getLen(), new String[0]));
            }
        } else {
            // Use the normal input format splitter to add this non-hidden file
            Job job = Job.getInstance(params);
            SpatialInputFormat3.addInputPath(job, inFile);
            splits.addAll(inputFormat.getSplits(job));
        }
    }

    // Copy splits to a final array to be used in parallel
    final FileSplit[] fsplits = splits.toArray(new FileSplit[splits.size()]);
    int parallelism = params.getInt("parallel", Runtime.getRuntime().availableProcessors());
    List<Canvas> partialCanvases = Parallel.forEach(fsplits.length, new RunnableRange<Canvas>() {
        @Override
        public Canvas run(int i1, int i2) {
            Plotter plotter;
            try {
                plotter = plotterClass.newInstance();
            } catch (InstantiationException e) {
                throw new RuntimeException("Error creating rastierizer", e);
            } catch (IllegalAccessException e) {
                throw new RuntimeException("Error creating rastierizer", e);
            }
            plotter.configure(params);
            // Create the partial layer that will contain the plot of the assigned partitions
            Canvas partialCanvas = plotter.createCanvas(fwidth, fheight, inputMBR);

            for (int i = i1; i < i2; i++) {
                try {
                    RecordReader<Rectangle, Iterable<Shape>> reader = inputFormat.createRecordReader(fsplits[i],
                            null);
                    if (reader instanceof SpatialRecordReader3) {
                        ((SpatialRecordReader3) reader).initialize(fsplits[i], params);
                    } else if (reader instanceof RTreeRecordReader3) {
                        ((RTreeRecordReader3) reader).initialize(fsplits[i], params);
                    } else if (reader instanceof HDFRecordReader) {
                        ((HDFRecordReader) reader).initialize(fsplits[i], params);
                    } else {
                        throw new RuntimeException("Unknown record reader");
                    }

                    while (reader.nextKeyValue()) {
                        Rectangle partition = reader.getCurrentKey();
                        if (!partition.isValid())
                            partition.set(inputMBR);

                        Iterable<Shape> shapes = reader.getCurrentValue();
                        // Run the plot step
                        plotter.plot(partialCanvas, plotter.isSmooth() ? plotter.smooth(shapes) : shapes);
                    }
                    reader.close();
                } catch (IOException e) {
                    throw new RuntimeException("Error reading the file ", e);
                } catch (InterruptedException e) {
                    throw new RuntimeException("Interrupt error ", e);
                }
            }
            return partialCanvas;
        }
    }, parallelism);
    boolean merge = params.getBoolean("merge", true);
    Plotter plotter;
    try {
        plotter = plotterClass.newInstance();
        plotter.configure(params);
    } catch (InstantiationException e) {
        throw new RuntimeException("Error creating plotter", e);
    } catch (IllegalAccessException e) {
        throw new RuntimeException("Error creating plotter", e);
    }

    // Whether we should vertically flip the final image or not
    boolean vflip = params.getBoolean("vflip", true);
    if (merge) {
        LOG.info("Merging " + partialCanvases.size() + " partial canvases");
        // Create the final canvas that will contain the final image
        Canvas finalCanvas = plotter.createCanvas(fwidth, fheight, inputMBR);
        for (Canvas partialCanvas : partialCanvases)
            plotter.merge(finalCanvas, partialCanvas);

        // Finally, write the resulting image to the given output path
        LOG.info("Writing final image");
        FileSystem outFs = outFile.getFileSystem(params);
        FSDataOutputStream outputFile = outFs.create(outFile);

        plotter.writeImage(finalCanvas, outputFile, vflip);
        outputFile.close();
    } else {
        // No merge
        LOG.info("Writing partial images");
        FileSystem outFs = outFile.getFileSystem(params);
        for (int i = 0; i < partialCanvases.size(); i++) {
            Path filename = new Path(outFile, String.format("part-%05d.png", i));
            FSDataOutputStream outputFile = outFs.create(filename);

            plotter.writeImage(partialCanvases.get(i), outputFile, vflip);
            outputFile.close();
        }
    }
}

From source file:edu.umn.cs.sthadoop.operations.HSPKNNQ.java

License:Open Source License

private static <S extends Shape> long knnLocal(Path inFile, Path outPath, OperationsParams params)
        throws IOException, InterruptedException {
    int iterations = 0;
    FileSystem fs = inFile.getFileSystem(params);
    Point queryPoint = (Point) OperationsParams.getShape(params, "point");
    int k = params.getInt("k", 1);
    // Top-k objects are retained in this object
    PriorityQueue<ShapeWithDistance<S>> knn = new KNNObjects<ShapeWithDistance<S>>(k);

    SpatialInputFormat3<Rectangle, Shape> inputFormat = new SpatialInputFormat3<Rectangle, Shape>();

    final GlobalIndex<Partition> gIndex = SpatialSite.getGlobalIndex(fs, inFile);
    double kthDistance = Double.MAX_VALUE;
    if (gIndex != null) {
        // There is a global index, use it
        PriorityQueue<ShapeWithDistance<Partition>> partitionsToProcess = new PriorityQueue<HSPKNNQ.ShapeWithDistance<Partition>>() {
            {/*w ww  . j a  va  2s. com*/
                initialize(gIndex.size());
            }

            @Override
            protected boolean lessThan(Object a, Object b) {
                return ((ShapeWithDistance<Partition>) a).distance < ((ShapeWithDistance<Partition>) b).distance;
            }
        };
        for (Partition p : gIndex) {
            double distance = p.getMinDistanceTo(queryPoint.x, queryPoint.y);
            partitionsToProcess.insert(new ShapeWithDistance<Partition>(p.clone(), distance));
        }

        while (partitionsToProcess.size() > 0 && partitionsToProcess.top().distance <= kthDistance) {

            ShapeWithDistance<Partition> partitionToProcess = partitionsToProcess.pop();
            // Process this partition
            Path partitionPath = new Path(inFile, partitionToProcess.shape.filename);
            long length = fs.getFileStatus(partitionPath).getLen();
            FileSplit fsplit = new FileSplit(partitionPath, 0, length, new String[0]);
            RecordReader<Rectangle, Iterable<Shape>> reader = inputFormat.createRecordReader(fsplit, null);
            if (reader instanceof SpatialRecordReader3) {
                ((SpatialRecordReader3) reader).initialize(fsplit, params);
            } else if (reader instanceof RTreeRecordReader3) {
                ((RTreeRecordReader3) reader).initialize(fsplit, params);
            } else if (reader instanceof HDFRecordReader) {
                ((HDFRecordReader) reader).initialize(fsplit, params);
            } else {
                throw new RuntimeException("Unknown record reader");
            }
            iterations++;

            while (reader.nextKeyValue()) {
                Iterable<Shape> shapes = reader.getCurrentValue();
                for (Shape shape : shapes) {
                    double distance = shape.distanceTo(queryPoint.x, queryPoint.y);
                    if (distance <= kthDistance)
                        knn.insert(new ShapeWithDistance<S>((S) shape.clone(), distance));
                }
            }
            reader.close();

            if (knn.size() >= k)
                kthDistance = knn.top().distance;
        }
    } else {
        // No global index, have to scan the whole file
        Job job = new Job(params);
        SpatialInputFormat3.addInputPath(job, inFile);
        List<InputSplit> splits = inputFormat.getSplits(job);

        for (InputSplit split : splits) {
            RecordReader<Rectangle, Iterable<Shape>> reader = inputFormat.createRecordReader(split, null);
            if (reader instanceof SpatialRecordReader3) {
                ((SpatialRecordReader3) reader).initialize(split, params);
            } else if (reader instanceof RTreeRecordReader3) {
                ((RTreeRecordReader3) reader).initialize(split, params);
            } else if (reader instanceof HDFRecordReader) {
                ((HDFRecordReader) reader).initialize(split, params);
            } else {
                throw new RuntimeException("Unknown record reader");
            }
            iterations++;

            while (reader.nextKeyValue()) {
                Iterable<Shape> shapes = reader.getCurrentValue();
                for (Shape shape : shapes) {
                    double distance = shape.distanceTo(queryPoint.x, queryPoint.y);
                    knn.insert(new ShapeWithDistance<S>((S) shape.clone(), distance));
                }
            }

            reader.close();
        }
        if (knn.size() >= k)
            kthDistance = knn.top().distance;
    }
    long resultCount = knn.size();
    if (outPath != null && params.getBoolean("output", true)) {
        FileSystem outFS = outPath.getFileSystem(params);
        PrintStream ps = new PrintStream(outFS.create(outPath));
        Vector<ShapeWithDistance<S>> resultsOrdered = new Vector<ShapeWithDistance<S>>((int) resultCount);
        resultsOrdered.setSize((int) resultCount);
        while (knn.size() > 0) {
            ShapeWithDistance<S> nextAnswer = knn.pop();
            resultsOrdered.set(knn.size(), nextAnswer);
        }

        Text text = new Text();
        for (ShapeWithDistance<S> answer : resultsOrdered) {
            text.clear();
            TextSerializerHelper.serializeDouble(answer.distance, text, ',');
            answer.shape.toText(text);
            ps.println(text);
        }
        ps.close();
    }
    TotalIterations.addAndGet(iterations);
    return resultCount;

}

From source file:edu.umn.cs.sthadoop.operations.STRangeQuery.java

License:Open Source License

/**
 * Runs a range query on the local machine (no MapReduce) and the output is
 * streamed to the provided result collector. The query might run in
 * parallel which makes it necessary to design the result collector to
 * accept parallel calls to the method/*from  w ww.j a v  a 2  s . co  m*/
 * {@link ResultCollector#collect(Object)}. You can use
 * {@link ResultCollectorSynchronizer} to synchronize calls to your
 * ResultCollector if you cannot design yours to be thread safe.
 * 
 * @param inPath
 * @param queryRange
 * @param shape
 * @param params
 * @param output
 * @return
 * @throws IOException
 * @throws InterruptedException
 */
public static <S extends Shape> long rangeQueryLocal(Path inPath, final Shape queryRange, final S shape,
        final OperationsParams params, final ResultCollector<S> output)
        throws IOException, InterruptedException {
    // Set MBR of query shape in job configuration to work with the spatial
    // filter
    OperationsParams.setShape(params, SpatialInputFormat3.InputQueryRange, queryRange.getMBR());
    // 1- Split the input path/file to get splits that can be processed
    // independently
    final SpatialInputFormat3<Rectangle, S> inputFormat = new SpatialInputFormat3<Rectangle, S>();
    Job job = Job.getInstance(params);
    SpatialInputFormat3.setInputPaths(job, inPath);
    final List<InputSplit> splits = inputFormat.getSplits(job);

    // 2- Process splits in parallel
    List<Long> results = Parallel.forEach(splits.size(), new RunnableRange<Long>() {
        @Override
        public Long run(int i1, int i2) {
            long results = 0;
            for (int i = i1; i < i2; i++) {
                try {
                    FileSplit fsplit = (FileSplit) splits.get(i);
                    final RecordReader<Rectangle, Iterable<S>> reader = inputFormat.createRecordReader(fsplit,
                            null);
                    if (reader instanceof SpatialRecordReader3) {
                        ((SpatialRecordReader3) reader).initialize(fsplit, params);
                    } else if (reader instanceof RTreeRecordReader3) {
                        ((RTreeRecordReader3) reader).initialize(fsplit, params);
                    } else if (reader instanceof HDFRecordReader) {
                        ((HDFRecordReader) reader).initialize(fsplit, params);
                    } else {
                        throw new RuntimeException("Unknown record reader");
                    }
                    while (reader.nextKeyValue()) {
                        Iterable<S> shapes = reader.getCurrentValue();
                        for (Shape s : shapes) {
                            results++;
                            if (output != null)
                                output.collect((S) s);
                        }
                    }
                    reader.close();
                } catch (IOException e) {
                    LOG.error("Error processing split " + splits.get(i), e);
                } catch (InterruptedException e) {
                    LOG.error("Error processing split " + splits.get(i), e);
                }
            }
            return results;
        }
    });
    long totalResultSize = 0;
    for (long result : results)
        totalResultSize += result;
    return totalResultSize;
}

From source file:edu.umn.cs.sthadoop.trajectory.KNNDTW.java

License:Open Source License

private static <S extends Shape> long knnLocal(Path inFile, Path outPath, OperationsParams params)
        throws IOException, InterruptedException {
    int iterations = 0;
    FileSystem fs = inFile.getFileSystem(params);
    Point queryPoint = (Point) OperationsParams.getShape(params, "point");
    int k = params.getInt("k", 1);
    // Top-k objects are retained in this object
    PriorityQueue<ShapeWithDistance<S>> knn = new KNNObjects<ShapeWithDistance<S>>(k);

    SpatialInputFormat3<Rectangle, Shape> inputFormat = new SpatialInputFormat3<Rectangle, Shape>();

    final GlobalIndex<Partition> gIndex = SpatialSite.getGlobalIndex(fs, inFile);
    double kthDistance = Double.MAX_VALUE;
    if (gIndex != null) {
        // There is a global index, use it
        PriorityQueue<ShapeWithDistance<Partition>> partitionsToProcess = new PriorityQueue<KNNDTW.ShapeWithDistance<Partition>>() {
            {//from   www.ja v a 2s  .c om
                initialize(gIndex.size());
            }

            @Override
            protected boolean lessThan(Object a, Object b) {
                return ((ShapeWithDistance<Partition>) a).distance < ((ShapeWithDistance<Partition>) b).distance;
            }
        };
        for (Partition p : gIndex) {
            double distance = p.getMinDistanceTo(queryPoint.x, queryPoint.y);
            partitionsToProcess.insert(new ShapeWithDistance<Partition>(p.clone(), distance));
        }

        while (partitionsToProcess.size() > 0 && partitionsToProcess.top().distance <= kthDistance) {

            ShapeWithDistance<Partition> partitionToProcess = partitionsToProcess.pop();
            // Process this partition
            Path partitionPath = new Path(inFile, partitionToProcess.shape.filename);
            long length = fs.getFileStatus(partitionPath).getLen();
            FileSplit fsplit = new FileSplit(partitionPath, 0, length, new String[0]);
            RecordReader<Rectangle, Iterable<Shape>> reader = inputFormat.createRecordReader(fsplit, null);
            if (reader instanceof SpatialRecordReader3) {
                ((SpatialRecordReader3) reader).initialize(fsplit, params);
            } else if (reader instanceof RTreeRecordReader3) {
                ((RTreeRecordReader3) reader).initialize(fsplit, params);
            } else if (reader instanceof HDFRecordReader) {
                ((HDFRecordReader) reader).initialize(fsplit, params);
            } else {
                throw new RuntimeException("Unknown record reader");
            }
            iterations++;

            while (reader.nextKeyValue()) {
                Iterable<Shape> shapes = reader.getCurrentValue();
                for (Shape shape : shapes) {
                    double distance = shape.distanceTo(queryPoint.x, queryPoint.y);
                    if (distance <= kthDistance)
                        knn.insert(new ShapeWithDistance<S>((S) shape.clone(), distance));
                }
            }
            reader.close();

            if (knn.size() >= k)
                kthDistance = knn.top().distance;
        }
    } else {
        // No global index, have to scan the whole file
        Job job = new Job(params);
        SpatialInputFormat3.addInputPath(job, inFile);
        List<InputSplit> splits = inputFormat.getSplits(job);

        for (InputSplit split : splits) {
            RecordReader<Rectangle, Iterable<Shape>> reader = inputFormat.createRecordReader(split, null);
            if (reader instanceof SpatialRecordReader3) {
                ((SpatialRecordReader3) reader).initialize(split, params);
            } else if (reader instanceof RTreeRecordReader3) {
                ((RTreeRecordReader3) reader).initialize(split, params);
            } else if (reader instanceof HDFRecordReader) {
                ((HDFRecordReader) reader).initialize(split, params);
            } else {
                throw new RuntimeException("Unknown record reader");
            }
            iterations++;

            while (reader.nextKeyValue()) {
                Iterable<Shape> shapes = reader.getCurrentValue();
                for (Shape shape : shapes) {
                    double distance = shape.distanceTo(queryPoint.x, queryPoint.y);
                    knn.insert(new ShapeWithDistance<S>((S) shape.clone(), distance));
                }
            }

            reader.close();
        }
        if (knn.size() >= k)
            kthDistance = knn.top().distance;
    }
    long resultCount = knn.size();
    if (outPath != null && params.getBoolean("output", true)) {
        FileSystem outFS = outPath.getFileSystem(params);
        PrintStream ps = new PrintStream(outFS.create(outPath));
        Vector<ShapeWithDistance<S>> resultsOrdered = new Vector<ShapeWithDistance<S>>((int) resultCount);
        resultsOrdered.setSize((int) resultCount);
        while (knn.size() > 0) {
            ShapeWithDistance<S> nextAnswer = knn.pop();
            resultsOrdered.set(knn.size(), nextAnswer);
        }

        Text text = new Text();
        for (ShapeWithDistance<S> answer : resultsOrdered) {
            text.clear();
            TextSerializerHelper.serializeDouble(answer.distance, text, ',');
            answer.shape.toText(text);
            ps.println(text);
        }
        ps.close();
    }
    TotalIterations.addAndGet(iterations);
    return resultCount;

}

From source file:io.druid.data.input.orc.DruidOrcInputFormatTest.java

License:Apache License

@Test
public void testRead() throws IOException, InterruptedException {
    InputFormat inputFormat = ReflectionUtils.newInstance(OrcNewInputFormat.class, job.getConfiguration());

    TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID());
    RecordReader reader = inputFormat.createRecordReader(split, context);
    OrcHadoopInputRowParser parser = (OrcHadoopInputRowParser) config.getParser();

    reader.initialize(split, context);//from ww  w  .  j  av a  2 s .  c o m

    reader.nextKeyValue();

    OrcStruct data = (OrcStruct) reader.getCurrentValue();

    MapBasedInputRow row = (MapBasedInputRow) parser.parse(data);

    Assert.assertTrue(row.getEvent().keySet().size() == 4);
    Assert.assertEquals(new DateTime(timestamp), row.getTimestamp());
    Assert.assertEquals(parser.getParseSpec().getDimensionsSpec().getDimensionNames(), row.getDimensions());
    Assert.assertEquals(col1, row.getEvent().get("col1"));
    Assert.assertEquals(Arrays.asList(col2), row.getDimension("col2"));

    reader.close();
}

From source file:io.druid.data.input.parquet.DruidParquetInputFormatTest.java

License:Apache License

@Test
public void test() throws IOException, InterruptedException {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf);/*from   w ww.j av  a2 s.com*/

    HadoopDruidIndexerConfig config = HadoopDruidIndexerConfig
            .fromFile(new File("example/wikipedia_hadoop_parquet_job.json"));

    config.intoConfiguration(job);

    File testFile = new File("example/wikipedia_list.parquet");
    Path path = new Path(testFile.getAbsoluteFile().toURI());
    FileSplit split = new FileSplit(path, 0, testFile.length(), null);

    InputFormat inputFormat = ReflectionUtils.newInstance(DruidParquetInputFormat.class,
            job.getConfiguration());

    TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID());
    RecordReader reader = inputFormat.createRecordReader(split, context);

    reader.initialize(split, context);

    reader.nextKeyValue();

    GenericRecord data = (GenericRecord) reader.getCurrentValue();

    // field not read, should return null
    assertEquals(data.get("added"), null);

    assertEquals(data.get("page"), new Utf8("Gypsy Danger"));

    reader.close();
}

From source file:io.ssc.trackthetrackers.extraction.hadoop.util.Compaction.java

License:Open Source License

public static void main(String[] args) throws IOException, InterruptedException {

    if (args.length != 2) {
        System.out.println("Usage: <input folder> <output file>");
        System.exit(-1);/*from  ww w.j  a  va2  s  . c om*/
    }

    String inputPath = args[0];
    String outputFile = args[1];

    Configuration conf = new Configuration();

    FileSystem fs = FileSystem.get(conf);

    FileStatus[] input = fs.listStatus(new Path(inputPath), new PathFilter() {
        @Override
        public boolean accept(Path path) {
            return path.toString().endsWith(".parquet");
        }
    });

    Path output = new Path(outputFile);

    fs.delete(output, true);

    ProtoParquetInputFormat<ParsedPageProtos.ParsedPageOrBuilder> inputFormat = new ProtoParquetInputFormat<ParsedPageProtos.ParsedPageOrBuilder>();
    inputFormat.setReadSupportClass(new JobConf(conf), ProtoReadSupport.class);

    Job job = new Job(conf);
    ProtoParquetOutputFormat<ParsedPageProtos.ParsedPage> outputFormat = new ProtoParquetOutputFormat<ParsedPageProtos.ParsedPage>(
            ParsedPageProtos.ParsedPage.class);
    ProtoParquetOutputFormat.setProtobufClass(job, ParsedPageProtos.ParsedPage.class);
    ProtoParquetOutputFormat.setCompression(job, CompressionCodecName.SNAPPY);
    ProtoParquetOutputFormat.setEnableDictionary(job, true);

    RecordWriter<Void, ParsedPageProtos.ParsedPage> recordWriter = outputFormat.getRecordWriter(conf, output,
            CompressionCodecName.SNAPPY);

    List<ParquetInputSplit> splits = new ArrayList<ParquetInputSplit>();

    for (FileStatus fileStatus : input) {
        System.out.println(fileStatus.getPath().toString());
        splits.addAll(inputFormat.getSplits(conf, ParquetFileReader.readFooters(conf, fileStatus)));
    }

    int splitIndex = 0;
    for (ParquetInputSplit split : splits) {

        System.out.println("Processing split: " + split.getPath().toString() + "(" + splitIndex + " of "
                + splits.size() + ")");

        TaskAttemptID taskAttemptID = new TaskAttemptID(new TaskID("identifier", splitIndex, true, splitIndex),
                splitIndex);
        TaskAttemptContext ctx = new org.apache.hadoop.mapreduce.TaskAttemptContext(conf, taskAttemptID);

        RecordReader<Void, ParsedPageProtos.ParsedPageOrBuilder> reader = inputFormat.createRecordReader(split,
                ctx);
        reader.initialize(split, ctx);

        while (reader.nextKeyValue()) {

            ParsedPageProtos.ParsedPageOrBuilder record = reader.getCurrentValue();

            ParsedPageProtos.ParsedPage.Builder builder = ParsedPageProtos.ParsedPage.newBuilder();

            builder.setUrl(record.getUrl());
            builder.setArchiveTime(record.getArchiveTime());

            builder.addAllScripts(record.getScriptsList());
            builder.addAllIframes(record.getIframesList());
            builder.addAllLinks(record.getLinksList());
            builder.addAllImages(record.getImagesList());

            recordWriter.write(null, builder.build());
        }

        if (reader != null) {
            reader.close();
        }

        splitIndex++;
    }

    TaskAttemptID taskAttemptID = new TaskAttemptID(new TaskID("identifier", 1, true, 1), 1);
    TaskAttemptContext ctx = new org.apache.hadoop.mapreduce.TaskAttemptContext(conf, taskAttemptID);

    if (recordWriter != null) {
        recordWriter.close(ctx);
    }

}

From source file:org.apache.avro.mapreduce.TestAvroKeyInputFormat.java

License:Apache License

/**
 * Verifies that a non-null record reader can be created, and the key/value types are
 * as expected./*  ww w.j a  v a  2  s.com*/
 */
@Test
public void testCreateRecordReader() throws IOException, InterruptedException {
    // Set up the job configuration.
    Job job = new Job();
    AvroJob.setInputKeySchema(job, Schema.create(Schema.Type.STRING));
    Configuration conf = job.getConfiguration();

    FileSplit inputSplit = createMock(FileSplit.class);
    TaskAttemptContext context = createMock(TaskAttemptContext.class);
    expect(context.getConfiguration()).andReturn(conf).anyTimes();

    replay(inputSplit);
    replay(context);

    AvroKeyInputFormat inputFormat = new AvroKeyInputFormat();
    @SuppressWarnings("unchecked")
    RecordReader<AvroKey<Object>, NullWritable> recordReader = inputFormat.createRecordReader(inputSplit,
            context);
    assertNotNull(inputFormat);
    recordReader.close();

    verify(inputSplit);
    verify(context);
}

From source file:org.apache.avro.mapreduce.TestAvroKeyRecordReader.java

License:Apache License

/**
 * Verifies that avro records can be read and progress is reported correctly.
 *//* ww w  .  ja v a2  s  . c o m*/
@Test
public void testReadRecords() throws IOException, InterruptedException {
    // Create the test avro file input with two records:
    //   1. "first"
    //   2. "second"
    final SeekableInput avroFileInput = new SeekableFileInput(
            AvroFiles.createFile(new File(mTempDir.getRoot(), "myStringfile.avro"),
                    Schema.create(Schema.Type.STRING), "first", "second"));

    // Create the record reader.
    Schema readerSchema = Schema.create(Schema.Type.STRING);
    RecordReader<AvroKey<CharSequence>, NullWritable> recordReader = new AvroKeyRecordReader<CharSequence>(
            readerSchema) {
        @Override
        protected SeekableInput createSeekableInput(Configuration conf, Path path) throws IOException {
            return avroFileInput;
        }
    };

    // Set up the job configuration.
    Configuration conf = new Configuration();

    // Create a mock input split for this record reader.
    FileSplit inputSplit = createMock(FileSplit.class);
    expect(inputSplit.getPath()).andReturn(new Path("/path/to/an/avro/file")).anyTimes();
    expect(inputSplit.getStart()).andReturn(0L).anyTimes();
    expect(inputSplit.getLength()).andReturn(avroFileInput.length()).anyTimes();

    // Create a mock task attempt context for this record reader.
    TaskAttemptContext context = createMock(TaskAttemptContext.class);
    expect(context.getConfiguration()).andReturn(conf).anyTimes();

    // Initialize the record reader.
    replay(inputSplit);
    replay(context);
    recordReader.initialize(inputSplit, context);

    assertEquals("Progress should be zero before any records are read", 0.0f, recordReader.getProgress(), 0.0f);

    // Some variables to hold the records.
    AvroKey<CharSequence> key;
    NullWritable value;

    // Read the first record.
    assertTrue("Expected at least one record", recordReader.nextKeyValue());
    key = recordReader.getCurrentKey();
    value = recordReader.getCurrentValue();

    assertNotNull("First record had null key", key);
    assertNotNull("First record had null value", value);

    CharSequence firstString = key.datum();
    assertEquals("first", firstString.toString());

    assertTrue("getCurrentKey() returned different keys for the same record",
            key == recordReader.getCurrentKey());
    assertTrue("getCurrentValue() returned different values for the same record",
            value == recordReader.getCurrentValue());

    // Read the second record.
    assertTrue("Expected to read a second record", recordReader.nextKeyValue());
    key = recordReader.getCurrentKey();
    value = recordReader.getCurrentValue();

    assertNotNull("Second record had null key", key);
    assertNotNull("Second record had null value", value);

    CharSequence secondString = key.datum();
    assertEquals("second", secondString.toString());

    assertEquals("Progress should be complete (2 out of 2 records processed)", 1.0f, recordReader.getProgress(),
            0.0f);

    // There should be no more records.
    assertFalse("Expected only 2 records", recordReader.nextKeyValue());

    // Close the record reader.
    recordReader.close();

    // Verify the expected calls on the mocks.
    verify(inputSplit);
    verify(context);
}