List of usage examples for org.apache.hadoop.io MapFile DATA_FILE_NAME
String DATA_FILE_NAME
To view the source code for org.apache.hadoop.io MapFile DATA_FILE_NAME.
Click Source Link
From source file:org.apache.accumulo.server.util.MapFilePerformanceTest.java
License:Apache License
public static void main(final String[] args) throws IOException, InterruptedException { Configuration conf = CachedConfiguration.getInstance(); FileSystem fs = FileSystem.get(conf); final ArrayList<Key> keys = new ArrayList<Key>(); int blocksizes[] = new int[] { 10000 }; int numMapFiles[] = new int[] { 1, 2, 3, 5, 7 }; ExecutorService tp = Executors.newFixedThreadPool(10); Runnable selectKeysTask = new Runnable() { public void run() { try { selectRandomKeys(args[0], .002, keys); } catch (IOException e) { e.printStackTrace();//w w w . j a v a 2 s .c om } } }; tp.submit(selectKeysTask); final Map<Integer, Map<Integer, String[]>> tests = new HashMap<Integer, Map<Integer, String[]>>(); for (final int num : numMapFiles) { for (final int blocksize : blocksizes) { Runnable r = new Runnable() { public void run() { System.out.println("Thread " + Thread.currentThread().getName() + " creating map files blocksize = " + blocksize + " num = " + num); String[] filenames; try { filenames = createMapFiles(args[0], args[1] + "/" + MyMapFile.EXTENSION + "_" + blocksize, blocksize, num); synchronized (tests) { Map<Integer, String[]> map = tests.get(num); if (map == null) { map = new HashMap<Integer, String[]>(); tests.put(num, map); } map.put(blocksize, filenames); } } catch (IOException e) { e.printStackTrace(); throw new RuntimeException(e); } System.out.println( "Thread " + Thread.currentThread().getName() + " finished creating map files"); } }; tp.execute(r); } } tp.shutdown(); while (!tp.isTerminated()) { tp.awaitTermination(1, TimeUnit.DAYS); } for (int num : numMapFiles) { for (int blocksize : blocksizes) { String[] filenames = tests.get(num).get(blocksize); long len = 0; for (String filename : filenames) { len += fs.getFileStatus(new Path(filename + "/" + MapFile.DATA_FILE_NAME)).getLen(); } runTest(String.format("bs = %,12d fs = %,12d nmf = %d ", blocksize, len, num), filenames, keys); runTest(String.format("bs = %,12d fs = %,12d nmf = %d ", blocksize, len, num), filenames, keys); } } }
From source file:org.apache.hama.bsp.SequenceFileInputFormat.java
License:Apache License
@Override protected FileStatus[] listStatus(BSPJob job) throws IOException { FileStatus[] files = super.listStatus(job); int len = files.length; for (int i = 0; i < len; ++i) { FileStatus file = files[i];// ww w . j a v a2 s.co m if (file.isDir()) { // it's a MapFile Path p = file.getPath(); FileSystem fs = p.getFileSystem(job.getConfiguration()); // use the data file files[i] = fs.getFileStatus(new Path(p, MapFile.DATA_FILE_NAME)); } } return files; }
From source file:org.mrgeo.hdfs.input.image.HdfsMrsPyramidInputFormat.java
License:Apache License
@Override public List<InputSplit> getSplits(JobContext context) throws IOException { long start = System.currentTimeMillis(); Configuration conf = context.getConfiguration(); // In order to be used in MrGeo, this InputFormat must return instances // of TiledInputSplit. To do that, we need to determine the start and end // tile id's for each split. First we read the splits file and get the // partition info, then we break the partition into blocks, which become the // actual splits used. ImageInputFormatContext ifContext = ImageInputFormatContext.load(conf); final int zoom = ifContext.getZoomLevel(); final int tilesize = ifContext.getTileSize(); HdfsMrsImageDataProvider dp = createHdfsMrsImageDataProvider(context.getConfiguration()); Path inputWithZoom = new Path(dp.getResourcePath(true), "" + zoom); // This appears to never be used // org.mrgeo.hdfs.tile.FileSplit splitfile = createFileSplit(); // splitfile.readSplits(inputWithZoom); MrsPyramidMetadataReader metadataReader = dp.getMetadataReader(); MrsPyramidMetadata metadata = metadataReader.read(); org.mrgeo.hdfs.tile.FileSplit fsplit = createFileSplit(); fsplit.readSplits(inputWithZoom);//ww w . j a v a2 s . c o m org.mrgeo.hdfs.tile.FileSplit.FileSplitInfo[] splits = (org.mrgeo.hdfs.tile.FileSplit.FileSplitInfo[]) fsplit .getSplits(); List<InputSplit> result = new ArrayList<>(splits.length); final Bounds requestedBounds = ifContext.getBounds(); for (org.mrgeo.hdfs.tile.FileSplit.FileSplitInfo split : splits) { final Path part = new Path(inputWithZoom, split.getName()); final Path dataFile = new Path(part, MapFile.DATA_FILE_NAME); final long endTileId = split.getEndId(); final long startTileId = split.getStartId(); if (requestedBounds != null) { // Do not include splits that can't possibly intersect the requested bounds. This // is an HDFS-specific efficiency to avoid needlessly processing splits. final Tile startTile = TMSUtils.tileid(startTileId, zoom); final Bounds startTileBounds = TMSUtils.tileBounds(startTile, zoom, tilesize); final Tile endTile = TMSUtils.tileid(endTileId, zoom); final Bounds endTileBounds = TMSUtils.tileBounds(endTile, zoom, tilesize); if (startTileBounds.s > requestedBounds.n || endTileBounds.n < requestedBounds.s) { // Ignore the split because it's either completely above or completey below // the requested bounds. } else { result.add(new TiledInputSplit(new FileSplit(dataFile, 0, 0, null), startTileId, endTileId, zoom, metadata.getTilesize())); } } else { // If no bounds were specified by the caller, then we include // all splits. result.add(new TiledInputSplit(new FileSplit(dataFile, 0, 0, null), startTileId, endTileId, zoom, metadata.getTilesize())); } } // The following code is useful for debugging. The gaps can be compared against the // contents of the actual index file for the partition to see if there are any gaps // in areas where there actually is tile information. // long lastEndTile = -1; // for (InputSplit split: result) // { // if (lastEndTile >= 0) // { // long startTileId = ((TiledInputSplit)split).getStartId(); // if (startTileId > lastEndTile + 1) // { // log.error("Gap in splits: " + lastEndTile + " - " + startTileId); // } // lastEndTile = ((TiledInputSplit)split).getEndId(); // } // } long end = System.currentTimeMillis(); log.info("Time to generate splits: " + (end - start) + " ms"); return result; }
From source file:org.oclc.firefly.hadoop.backup.BackupInputFormat.java
License:Apache License
@Override protected List<FileStatus> listStatus(JobContext job) throws IOException { List<FileStatus> files = super.listStatus(job); int len = files.size(); for (int i = 0; i < len; ++i) { FileStatus file = files.get(i);//from ww w . j a va 2 s . com if (file.isDir()) { Path p = file.getPath(); FileSystem fs = p.getFileSystem(job.getConfiguration()); files.set(i, fs.getFileStatus(new Path(p, MapFile.DATA_FILE_NAME))); } } return files; }
From source file:stewi.mapred.LenientSequenceFileInputFormat.java
License:Apache License
@Override protected FileStatus[] listStatus(JobConf job) throws IOException { FileStatus[] files = super.listStatus(job); for (int i = 0; i < files.length; i++) { FileStatus file = files[i];/*w w w . ja va 2 s. co m*/ if (file.isDirectory()) { // it's a MapFile Path dataFile = new Path(file.getPath(), MapFile.DATA_FILE_NAME); FileSystem fs = file.getPath().getFileSystem(job); // use the data file files[i] = fs.getFileStatus(dataFile); } } return files; }