List of usage examples for org.apache.hadoop.fs FileSystem getLocal
public static LocalFileSystem getLocal(Configuration conf) throws IOException
From source file:edu.uci.ics.pregelix.example.util.TestCluster.java
License:Apache License
private void startHDFS() throws IOException { conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/core-site.xml")); conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/mapred-site.xml")); conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/hdfs-site.xml")); FileSystem lfs = FileSystem.getLocal(new Configuration()); lfs.delete(new Path("build"), true); System.setProperty("hadoop.log.dir", "logs"); dfsCluster = new MiniDFSCluster(conf, numberOfNC, true, null); FileSystem dfs = FileSystem.get(conf); Path src = new Path(DATA_PATH); Path dest = new Path(HDFS_PATH); dfs.mkdirs(dest);//from www.j a v a 2 s .c o m dfs.copyFromLocalFile(src, dest); src = new Path(DATA_PATH2); dest = new Path(HDFS_PATH2); dfs.mkdirs(dest); dfs.copyFromLocalFile(src, dest); src = new Path(DATA_PATH3); dest = new Path(HDFS_PATH3); dfs.mkdirs(dest); dfs.copyFromLocalFile(src, dest); src = new Path(DATA_PATH4); dest = new Path(HDFS_PATH4); dfs.mkdirs(dest); dfs.copyFromLocalFile(src, dest); src = new Path(DATA_PATH5); dest = new Path(HDFS_PATH5); dfs.mkdirs(dest); dfs.copyFromLocalFile(src, dest); DataOutputStream confOutput = new DataOutputStream(new FileOutputStream(new File(HADOOP_CONF_PATH))); conf.writeXml(confOutput); confOutput.flush(); confOutput.close(); }
From source file:edu.umd.cloud9.util.SequenceFileUtils.java
License:Apache License
public static List<Writable> readLocalFile(Path path) { List<Writable> list = new ArrayList<Writable>(); try {// ww w . j av a 2 s. co m Configuration config = new Configuration(); FileSystem fs = FileSystem.getLocal(config); SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, config); WritableComparable key = (WritableComparable) reader.getKeyClass().newInstance(); Writable value = (Writable) reader.getValueClass().newInstance(); while (reader.next(key, value)) { list.add(value); key = (WritableComparable) reader.getKeyClass().newInstance(); value = (Writable) reader.getValueClass().newInstance(); } reader.close(); } catch (Exception e) { e.printStackTrace(); } return list; }
From source file:edu.umd.cloud9.util.SequenceFileUtils.java
License:Apache License
public static <K extends WritableComparable, V extends Writable> List<KeyValuePair<K, V>> readLocalFileInPairs( Path path) {//from ww w . ja v a 2 s .com List<KeyValuePair<K, V>> list = new ArrayList<KeyValuePair<K, V>>(); try { Configuration config = new Configuration(); FileSystem fs = FileSystem.getLocal(config); SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, config); K key = (K) reader.getKeyClass().newInstance(); V value = (V) reader.getValueClass().newInstance(); while (reader.next(key, value)) { list.add(new KeyValuePair<K, V>(key, value)); key = (K) reader.getKeyClass().newInstance(); value = (V) reader.getValueClass().newInstance(); } reader.close(); } catch (Exception e) { e.printStackTrace(); } Collections.sort(list, new Comparator<KeyValuePair<K, V>>() { @SuppressWarnings("unchecked") public int compare(KeyValuePair<K, V> e1, KeyValuePair<K, V> e2) { return e1.getKey().compareTo(e2.getKey()); } }); return list; }
From source file:edu.umn.cs.spatialHadoop.core.Partitioner.java
License:Open Source License
/** * Retrieves the value of a partitioner for a given job. * @param conf/* w w w .ja va 2s. co m*/ * @return */ public static Partitioner getPartitioner(Configuration conf) { Class<? extends Partitioner> klass = conf.getClass(PartitionerClass, Partitioner.class) .asSubclass(Partitioner.class); if (klass == null) return null; try { Partitioner partitioner = klass.newInstance(); String partitionerFile = conf.get(PartitionerValue); if (partitionerFile != null) { Path[] cacheFiles = DistributedCache.getLocalCacheFiles(conf); for (Path cacheFile : cacheFiles) { if (cacheFile.getName().contains(partitionerFile)) { FSDataInputStream in = FileSystem.getLocal(conf).open(cacheFile); partitioner.readFields(in); in.close(); } } } return partitioner; } catch (InstantiationException e) { Log.warn("Error instantiating partitioner", e); return null; } catch (IllegalAccessException e) { Log.warn("Error instantiating partitioner", e); return null; } catch (IOException e) { Log.warn("Error retrieving partitioner value", e); return null; } }
From source file:edu.umn.cs.spatialHadoop.core.SpatialSite.java
License:Open Source License
/** * Retrieves cells that were stored earlier using * {@link #setCells(Configuration, CellInfo[])} * This function opens the corresponding * file from DistributedCache and parses jobs from it. * @param conf//w ww . j a va 2 s.co m * @return * @throws IOException */ public static CellInfo[] getCells(Configuration conf) throws IOException { CellInfo[] cells = null; String cells_file = conf.get(OUTPUT_CELLS); if (cells_file != null) { Path[] cacheFiles = DistributedCache.getLocalCacheFiles(conf); for (Path cacheFile : cacheFiles) { if (cacheFile.getName().contains(cells_file)) { FSDataInputStream in = FileSystem.getLocal(conf).open(cacheFile); int cellCount = in.readInt(); cells = new CellInfo[cellCount]; for (int i = 0; i < cellCount; i++) { cells[i] = new CellInfo(); cells[i].readFields(in); } in.close(); } } } return cells; }
From source file:edu.umn.cs.spatialHadoop.io.RandomCompressedOutputStream.java
License:Open Source License
public static void main(String[] args) throws IOException { long t1 = System.currentTimeMillis(); DataOutputStream out = new DataOutputStream( new RandomCompressedOutputStream(new BufferedOutputStream(new FileOutputStream("test.gzp")))); for (int i = 0; i < 10000000; i++) { out.writeInt(i);/* w ww . j a v a 2 s . c o m*/ } out.close(); long t2 = System.currentTimeMillis(); System.out.println("Total time for writing the file: " + (t2 - t1) / 1000.0 + " secs"); FileSystem localFs = FileSystem.getLocal(new Configuration()); t1 = System.currentTimeMillis(); InputStream in = new RandomCompressedInputStream(localFs, new Path("test.gzp")); FSDataInputStream din = new FSDataInputStream(in); long[] pos = new long[1000]; Random rand = new Random(); for (int i = 0; i < pos.length; i++) { pos[i] = rand.nextInt(10000000) * 4L; } Arrays.sort(pos); for (int i = 0; i < pos.length; i++) { //din.seek(pos[i]); din.skip(pos[i] - din.getPos()); din.readInt(); //System.out.println("Number is "+din.readInt()); } t2 = System.currentTimeMillis(); System.out.println("Total time for reading the file: " + (t2 - t1) / 1000.0 + " secs"); din.close(); }
From source file:edu.umn.cs.spatialHadoop.nasa.StockQuadTree.java
License:Open Source License
/** * Constructs an aggregate quad tree for an input HDF file on a selected * dataset identified by its name in the file. * @param conf The system configuration which can contain user-defined parameters. * @param inFile The path of the input HDF file to read * @param datasetName The name of the dataset to index in the HDF file * @param outFile The path to the index file to write * @throws IOException If an error happens while reading the input or writing the output *//*from w ww. j a v a 2 s . c o m*/ public static void build(Configuration conf, Path inFile, String datasetName, Path outFile) throws IOException { FileSystem inFs = inFile.getFileSystem(conf); if (inFs instanceof HTTPFileSystem) { // HDF files are really bad to read over HTTP due to seeks inFile = new Path(FileUtil.copyFile(conf, inFile)); inFs = FileSystem.getLocal(conf); } HDFFile hdfFile = null; try { hdfFile = new HDFFile(inFs.open(inFile)); DDVGroup dataGroup = hdfFile.findGroupByName(datasetName); if (dataGroup == null) throw new RuntimeException("Cannot find dataset '" + datasetName + "' in file " + inFile); boolean fillValueFound = false; short fillValue = 0; short[] values = null; for (DataDescriptor dd : dataGroup.getContents()) { if (dd instanceof DDNumericDataGroup) { DDNumericDataGroup numericDataGroup = (DDNumericDataGroup) dd; values = (short[]) numericDataGroup.getAsTypedArray(); } else if (dd instanceof DDVDataHeader) { DDVDataHeader vheader = (DDVDataHeader) dd; if (vheader.getName().equals("_FillValue")) { fillValue = (short) (int) (Integer) vheader.getEntryAt(0); fillValueFound = true; } } } // Retrieve meta data String archiveMetadata = (String) hdfFile.findHeaderByName("ArchiveMetadata.0").getEntryAt(0); String coreMetadata = (String) hdfFile.findHeaderByName("CoreMetadata.0").getEntryAt(0); NASADataset nasaDataset = new NASADataset(coreMetadata, archiveMetadata); if (values instanceof short[]) { FileSystem outFs = outFile.getFileSystem(conf); DataOutputStream out = new DataOutputStream( new RandomCompressedOutputStream(outFs.create(outFile, false))); build(nasaDataset, (short[]) values, fillValue, out); out.close(); } else { throw new RuntimeException("Indexing of values of type " + "'" + Array.get(values, 0).getClass() + "' is not supported"); } } finally { if (hdfFile != null) hdfFile.close(); } }
From source file:edu.umn.cs.spatialHadoop.nasa.HDFRecordReader.java
License:Open Source License
public void initialize(InputSplit split, Configuration conf) throws IOException { this.conf = conf; String datasetName = conf.get("dataset"); if (datasetName == null) throw new RuntimeException("Dataset name should be provided"); if (split instanceof CombineFileSplit) { CombineFileSplit csplits = (CombineFileSplit) split; splits = new Vector<FileSplit>(csplits.getNumPaths()); for (int i = 0; i < csplits.getNumPaths(); i++) { FileSplit fsplit = new FileSplit(csplits.getPath(i), csplits.getOffset(i), csplits.getLength(i), csplits.getLocations()); splits.add(fsplit);/*from w ww . j a v a2 s. c o m*/ } this.initialize(splits.remove(splits.size() - 1), conf); return; } inFile = ((FileSplit) split).getPath(); fs = inFile.getFileSystem(conf); if (fs instanceof HTTPFileSystem) { // For performance reasons, we don't open HDF files from HTTP inFile = new Path(FileUtil.copyFile(conf, inFile)); fs = FileSystem.getLocal(conf); this.deleteOnEnd = true; } hdfFile = new HDFFile(fs.open(inFile)); // Retrieve meta data String archiveMetadata = (String) hdfFile.findHeaderByName("ArchiveMetadata.0").getEntryAt(0); String coreMetadata = (String) hdfFile.findHeaderByName("CoreMetadata.0").getEntryAt(0); nasaDataset = new NASADataset(coreMetadata, archiveMetadata); // Retrieve the data array DDVGroup dataGroup = hdfFile.findGroupByName(datasetName); boolean fillValueFound = false; int resolution = 0; // Retrieve metadata int fillValuee = 0; for (DataDescriptor dd : dataGroup.getContents()) { if (dd instanceof DDVDataHeader) { DDVDataHeader vheader = (DDVDataHeader) dd; if (vheader.getName().equals("_FillValue")) { Object fillValue = vheader.getEntryAt(0); if (fillValue instanceof Integer) fillValuee = (Integer) fillValue; else if (fillValue instanceof Short) fillValuee = (Short) fillValue; else if (fillValue instanceof Byte) fillValuee = (Byte) fillValue; else throw new RuntimeException("Unsupported type: " + fillValue.getClass()); fillValueFound = true; } else if (vheader.getName().equals("valid_range")) { Object minValue = vheader.getEntryAt(0); if (minValue instanceof Integer) nasaDataset.minValue = (Integer) minValue; else if (minValue instanceof Byte) nasaDataset.minValue = (Byte) minValue; Object maxValue = vheader.getEntryAt(1); if (maxValue instanceof Integer) nasaDataset.maxValue = (Integer) maxValue; else if (maxValue instanceof Byte) nasaDataset.maxValue = (Byte) maxValue; } } } // Retrieve data for (DataDescriptor dd : dataGroup.getContents()) { if (dd instanceof DDNumericDataGroup) { DDNumericDataGroup numericDataGroup = (DDNumericDataGroup) dd; valueSize = numericDataGroup.getDataSize(); resolution = numericDataGroup.getDimensions()[0]; unparsedDataArray = new byte[valueSize * resolution * resolution]; if (fillValueFound) { fillValueBytes = new byte[valueSize]; HDFConstants.writeAt(fillValueBytes, 0, fillValuee, valueSize); for (int i = 0; i < unparsedDataArray.length; i++) unparsedDataArray[i] = fillValueBytes[i % valueSize]; } numericDataGroup.getAsByteArray(unparsedDataArray, 0, unparsedDataArray.length); } } nasaDataset.resolution = resolution; if (!fillValueFound) { skipFillValue = false; } else { skipFillValue = conf.getBoolean("skipfill", true); // Whether we need to recover fill values or not boolean recoverFillValues = conf.getBoolean("recoverholes", true); if (recoverFillValues) recoverFillValues(conf); } this.nasaShape = (S) OperationsParams.getShape(conf, "shape", new NASARectangle()); this.nasaShape.setTimestamp(nasaDataset.time); this.value = new NASAIterator(); }
From source file:edu.umn.cs.spatialHadoop.nasa.HDFRecordReader.java
License:Open Source License
/** * Recover fill values in the array {@link Values}. * @param conf/* w w w . j a va2 s .com*/ * @throws IOException * @throws Exception */ private void recoverFillValues(Configuration conf) throws IOException { // For now, we can only recover values of type short HDFFile waterMaskFile = null; try { // Read water mask Path wmPath = new Path( conf.get(WATER_MASK_PATH, "http://e4ftl01.cr.usgs.gov/MOLT/MOD44W.005/2000.02.24/")); final String tileIdentifier = String.format("h%02dv%02d", nasaDataset.h, nasaDataset.v); FileSystem wmFs = wmPath.getFileSystem(conf); FileStatus[] wmFile = wmFs.listStatus(wmPath, new PathFilter() { @Override public boolean accept(Path path) { return path.getName().contains(tileIdentifier); } }); if (wmFile.length == 0) { LOG.warn("Could not find water mask for tile '" + tileIdentifier + "'"); return; } Path wmFileToLoad = wmFile[0].getPath(); if (wmFs instanceof HTTPFileSystem) { wmFileToLoad = new Path(FileUtil.copyFile(conf, wmFileToLoad)); wmFs = FileSystem.getLocal(conf); } waterMaskFile = new HDFFile(wmFs.open(wmFileToLoad)); DDVGroup waterMaskGroup = waterMaskFile.findGroupByName("water_mask"); if (waterMaskGroup == null) { LOG.warn("Water mask dataset 'water_mask' not found in file " + wmFile[0]); return; } byte[] waterMask = null; for (DataDescriptor dd : waterMaskGroup.getContents()) { if (dd instanceof DDNumericDataGroup) { DDNumericDataGroup numericDataGroup = (DDNumericDataGroup) dd; waterMask = (byte[]) numericDataGroup.getAsByteArray(); } } // Convert the waterMask to a BinArray of the right size int size = 4800 / nasaDataset.resolution; BitArray waterMaskBits = convertWaterMaskToBits(ByteBuffer.wrap(waterMask), size); short fillValueShort = (short) HDFConstants.readAsInteger(fillValueBytes, 0, fillValueBytes.length); recoverXYShorts(ByteBuffer.wrap(unparsedDataArray), fillValueShort, waterMaskBits); } finally { if (waterMaskFile != null) waterMaskFile.close(); } }
From source file:edu.umn.cs.spatialHadoop.nasa.HDFRecordReader3.java
License:Open Source License
/** * Recover fill values in the array {@link Values}. * @param conf// w w w . j av a2s . c o m * @throws IOException * @throws Exception */ private void recoverFillValues(Configuration conf) throws IOException { HDFFile waterMaskFile = null; try { // Read water mask Path wmPath = new Path( conf.get(WATER_MASK_PATH, "http://e4ftl01.cr.usgs.gov/MOLT/MOD44W.005/2000.02.24/")); final String tileIdentifier = String.format("h%02dv%02d", nasaDataset.h, nasaDataset.v); FileSystem wmFs = wmPath.getFileSystem(conf); FileStatus[] wmFile = wmFs.listStatus(wmPath, new PathFilter() { @Override public boolean accept(Path path) { return path.getName().contains(tileIdentifier); } }); if (wmFile.length == 0) { LOG.warn("Could not find water mask for tile '" + tileIdentifier + "'"); return; } Path wmFileToLoad = wmFile[0].getPath(); if (wmFs instanceof HTTPFileSystem) { wmFileToLoad = new Path(FileUtil.copyFile(conf, wmFileToLoad)); wmFs = FileSystem.getLocal(conf); } waterMaskFile = new HDFFile(wmFs.open(wmFileToLoad)); DDVGroup waterMaskGroup = waterMaskFile.findGroupByName("water_mask"); if (waterMaskGroup == null) { LOG.warn("Water mask dataset 'water_mask' not found in file " + wmFile[0]); return; } byte[] waterMask = null; for (DataDescriptor dd : waterMaskGroup.getContents()) { if (dd instanceof DDNumericDataGroup) { DDNumericDataGroup numericDataGroup = (DDNumericDataGroup) dd; waterMask = (byte[]) numericDataGroup.getAsAnArray(); } } // Stores which values has been recovered by copying a single value // without interpolation in the x-direction byte[] valueStatus = new byte[dataArray.length]; recoverXDirection(waterMask, valueStatus); recoverYDirection(waterMask, valueStatus); } finally { if (waterMaskFile != null) waterMaskFile.close(); } }