Example usage for org.apache.hadoop.io MapFile DATA_FILE

Introduction

In this page you can find the example usage for org.apache.hadoop.io MapFile DATA_FILE_NAME.

Prototype

String DATA_FILE_NAME

To view the source code for org.apache.hadoop.io MapFile DATA_FILE_NAME.

Click Source Link

Document

The name of the data file.

Usage

From source file:org.apache.accumulo.server.util.MapFilePerformanceTest.java

License:Apache License

public static void main(final String[] args) throws IOException, InterruptedException {

    Configuration conf = CachedConfiguration.getInstance();
    FileSystem fs = FileSystem.get(conf);

    final ArrayList<Key> keys = new ArrayList<Key>();

    int blocksizes[] = new int[] { 10000 };
    int numMapFiles[] = new int[] { 1, 2, 3, 5, 7 };

    ExecutorService tp = Executors.newFixedThreadPool(10);

    Runnable selectKeysTask = new Runnable() {

        public void run() {
            try {
                selectRandomKeys(args[0], .002, keys);
            } catch (IOException e) {
                e.printStackTrace();//w  w w . j a  v a  2 s .c  om
            }
        }

    };

    tp.submit(selectKeysTask);

    final Map<Integer, Map<Integer, String[]>> tests = new HashMap<Integer, Map<Integer, String[]>>();

    for (final int num : numMapFiles) {
        for (final int blocksize : blocksizes) {

            Runnable r = new Runnable() {
                public void run() {
                    System.out.println("Thread " + Thread.currentThread().getName()
                            + " creating map files blocksize = " + blocksize + " num = " + num);
                    String[] filenames;
                    try {
                        filenames = createMapFiles(args[0],
                                args[1] + "/" + MyMapFile.EXTENSION + "_" + blocksize, blocksize, num);

                        synchronized (tests) {
                            Map<Integer, String[]> map = tests.get(num);
                            if (map == null) {
                                map = new HashMap<Integer, String[]>();
                                tests.put(num, map);
                            }

                            map.put(blocksize, filenames);
                        }
                    } catch (IOException e) {
                        e.printStackTrace();
                        throw new RuntimeException(e);
                    }
                    System.out.println(
                            "Thread " + Thread.currentThread().getName() + " finished creating map files");

                }
            };

            tp.execute(r);
        }
    }

    tp.shutdown();
    while (!tp.isTerminated()) {
        tp.awaitTermination(1, TimeUnit.DAYS);
    }

    for (int num : numMapFiles) {
        for (int blocksize : blocksizes) {
            String[] filenames = tests.get(num).get(blocksize);

            long len = 0;
            for (String filename : filenames) {
                len += fs.getFileStatus(new Path(filename + "/" + MapFile.DATA_FILE_NAME)).getLen();
            }
            runTest(String.format("bs = %,12d fs = %,12d nmf = %d ", blocksize, len, num), filenames, keys);
            runTest(String.format("bs = %,12d fs = %,12d nmf = %d ", blocksize, len, num), filenames, keys);
        }
    }
}

From source file:org.apache.hama.bsp.SequenceFileInputFormat.java

License:Apache License

@Override
protected FileStatus[] listStatus(BSPJob job) throws IOException {

    FileStatus[] files = super.listStatus(job);
    int len = files.length;
    for (int i = 0; i < len; ++i) {
        FileStatus file = files[i];// ww w  .  j  a v  a2  s.co m
        if (file.isDir()) { // it's a MapFile
            Path p = file.getPath();
            FileSystem fs = p.getFileSystem(job.getConfiguration());
            // use the data file
            files[i] = fs.getFileStatus(new Path(p, MapFile.DATA_FILE_NAME));
        }
    }
    return files;
}

From source file:org.mrgeo.hdfs.input.image.HdfsMrsPyramidInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(JobContext context) throws IOException {
    long start = System.currentTimeMillis();

    Configuration conf = context.getConfiguration();

    // In order to be used in MrGeo, this InputFormat must return instances
    // of TiledInputSplit. To do that, we need to determine the start and end
    // tile id's for each split. First we read the splits file and get the
    // partition info, then we break the partition into blocks, which become the
    // actual splits used.
    ImageInputFormatContext ifContext = ImageInputFormatContext.load(conf);
    final int zoom = ifContext.getZoomLevel();
    final int tilesize = ifContext.getTileSize();

    HdfsMrsImageDataProvider dp = createHdfsMrsImageDataProvider(context.getConfiguration());
    Path inputWithZoom = new Path(dp.getResourcePath(true), "" + zoom);

    // This appears to never be used
    //  org.mrgeo.hdfs.tile.FileSplit splitfile = createFileSplit();
    //  splitfile.readSplits(inputWithZoom);

    MrsPyramidMetadataReader metadataReader = dp.getMetadataReader();
    MrsPyramidMetadata metadata = metadataReader.read();

    org.mrgeo.hdfs.tile.FileSplit fsplit = createFileSplit();
    fsplit.readSplits(inputWithZoom);//ww w  .  j  a v  a2 s  .  c o m

    org.mrgeo.hdfs.tile.FileSplit.FileSplitInfo[] splits = (org.mrgeo.hdfs.tile.FileSplit.FileSplitInfo[]) fsplit
            .getSplits();

    List<InputSplit> result = new ArrayList<>(splits.length);

    final Bounds requestedBounds = ifContext.getBounds();
    for (org.mrgeo.hdfs.tile.FileSplit.FileSplitInfo split : splits) {
        final Path part = new Path(inputWithZoom, split.getName());
        final Path dataFile = new Path(part, MapFile.DATA_FILE_NAME);

        final long endTileId = split.getEndId();
        final long startTileId = split.getStartId();

        if (requestedBounds != null) {
            // Do not include splits that can't possibly intersect the requested bounds. This
            // is an HDFS-specific efficiency to avoid needlessly processing splits.
            final Tile startTile = TMSUtils.tileid(startTileId, zoom);
            final Bounds startTileBounds = TMSUtils.tileBounds(startTile, zoom, tilesize);
            final Tile endTile = TMSUtils.tileid(endTileId, zoom);
            final Bounds endTileBounds = TMSUtils.tileBounds(endTile, zoom, tilesize);

            if (startTileBounds.s > requestedBounds.n || endTileBounds.n < requestedBounds.s) {
                // Ignore the split because it's either completely above or completey below
                // the requested bounds.
            } else {
                result.add(new TiledInputSplit(new FileSplit(dataFile, 0, 0, null), startTileId, endTileId,
                        zoom, metadata.getTilesize()));
            }
        } else {
            // If no bounds were specified by the caller, then we include
            // all splits.
            result.add(new TiledInputSplit(new FileSplit(dataFile, 0, 0, null), startTileId, endTileId, zoom,
                    metadata.getTilesize()));
        }
    }

    // The following code is useful for debugging. The gaps can be compared against the
    // contents of the actual index file for the partition to see if there are any gaps
    // in areas where there actually is tile information.
    //    long lastEndTile = -1;
    //    for (InputSplit split: result)
    //    {
    //      if (lastEndTile >= 0)
    //      {
    //        long startTileId = ((TiledInputSplit)split).getStartId();
    //        if (startTileId > lastEndTile + 1)
    //        {
    //          log.error("Gap in splits: " + lastEndTile + " - " + startTileId);
    //        }
    //        lastEndTile = ((TiledInputSplit)split).getEndId();
    //      }
    //    }

    long end = System.currentTimeMillis();
    log.info("Time to generate splits: " + (end - start) + " ms");

    return result;
}

From source file:org.oclc.firefly.hadoop.backup.BackupInputFormat.java

License:Apache License

@Override
protected List<FileStatus> listStatus(JobContext job) throws IOException {
    List<FileStatus> files = super.listStatus(job);
    int len = files.size();

    for (int i = 0; i < len; ++i) {
        FileStatus file = files.get(i);//from   ww  w . j  a  va 2 s . com
        if (file.isDir()) {
            Path p = file.getPath();
            FileSystem fs = p.getFileSystem(job.getConfiguration());
            files.set(i, fs.getFileStatus(new Path(p, MapFile.DATA_FILE_NAME)));
        }
    }

    return files;
}

From source file:stewi.mapred.LenientSequenceFileInputFormat.java

License:Apache License

@Override
protected FileStatus[] listStatus(JobConf job) throws IOException {
    FileStatus[] files = super.listStatus(job);
    for (int i = 0; i < files.length; i++) {
        FileStatus file = files[i];/*w w  w  . ja  va 2  s.  co m*/
        if (file.isDirectory()) { // it's a MapFile
            Path dataFile = new Path(file.getPath(), MapFile.DATA_FILE_NAME);
            FileSystem fs = file.getPath().getFileSystem(job);
            // use the data file
            files[i] = fs.getFileStatus(dataFile);
        }
    }
    return files;
}

Example usage for org.apache.hadoop.io MapFile DATA_FILE_NAME

Introduction

Prototype

Document

Usage