Example usage for org.apache.hadoop.fs FileSystem getLocal

List of usage examples for org.apache.hadoop.fs FileSystem getLocal

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem getLocal.

Prototype

public static LocalFileSystem getLocal(Configuration conf) throws IOException 

Source Link

Document

Get the local FileSystem.

Usage

From source file:edu.uci.ics.pregelix.example.util.TestCluster.java

License:Apache License

private void startHDFS() throws IOException {
    conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/core-site.xml"));
    conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/mapred-site.xml"));
    conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/hdfs-site.xml"));
    FileSystem lfs = FileSystem.getLocal(new Configuration());
    lfs.delete(new Path("build"), true);
    System.setProperty("hadoop.log.dir", "logs");
    dfsCluster = new MiniDFSCluster(conf, numberOfNC, true, null);
    FileSystem dfs = FileSystem.get(conf);
    Path src = new Path(DATA_PATH);
    Path dest = new Path(HDFS_PATH);
    dfs.mkdirs(dest);//from   www.j a v a  2 s .c  o m
    dfs.copyFromLocalFile(src, dest);

    src = new Path(DATA_PATH2);
    dest = new Path(HDFS_PATH2);
    dfs.mkdirs(dest);
    dfs.copyFromLocalFile(src, dest);

    src = new Path(DATA_PATH3);
    dest = new Path(HDFS_PATH3);
    dfs.mkdirs(dest);
    dfs.copyFromLocalFile(src, dest);

    src = new Path(DATA_PATH4);
    dest = new Path(HDFS_PATH4);
    dfs.mkdirs(dest);
    dfs.copyFromLocalFile(src, dest);

    src = new Path(DATA_PATH5);
    dest = new Path(HDFS_PATH5);
    dfs.mkdirs(dest);
    dfs.copyFromLocalFile(src, dest);

    DataOutputStream confOutput = new DataOutputStream(new FileOutputStream(new File(HADOOP_CONF_PATH)));
    conf.writeXml(confOutput);
    confOutput.flush();
    confOutput.close();
}

From source file:edu.umd.cloud9.util.SequenceFileUtils.java

License:Apache License

public static List<Writable> readLocalFile(Path path) {
    List<Writable> list = new ArrayList<Writable>();

    try {//  ww  w  . j av  a  2  s. co  m
        Configuration config = new Configuration();
        FileSystem fs = FileSystem.getLocal(config);
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, config);

        WritableComparable key = (WritableComparable) reader.getKeyClass().newInstance();
        Writable value = (Writable) reader.getValueClass().newInstance();

        while (reader.next(key, value)) {
            list.add(value);

            key = (WritableComparable) reader.getKeyClass().newInstance();
            value = (Writable) reader.getValueClass().newInstance();
        }
        reader.close();
    } catch (Exception e) {
        e.printStackTrace();
    }

    return list;
}

From source file:edu.umd.cloud9.util.SequenceFileUtils.java

License:Apache License

public static <K extends WritableComparable, V extends Writable> List<KeyValuePair<K, V>> readLocalFileInPairs(
        Path path) {//from  ww  w .  ja v  a 2  s  .com

    List<KeyValuePair<K, V>> list = new ArrayList<KeyValuePair<K, V>>();

    try {

        Configuration config = new Configuration();
        FileSystem fs = FileSystem.getLocal(config);
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, config);

        K key = (K) reader.getKeyClass().newInstance();
        V value = (V) reader.getValueClass().newInstance();

        while (reader.next(key, value)) {
            list.add(new KeyValuePair<K, V>(key, value));

            key = (K) reader.getKeyClass().newInstance();
            value = (V) reader.getValueClass().newInstance();
        }
        reader.close();
    } catch (Exception e) {
        e.printStackTrace();
    }

    Collections.sort(list, new Comparator<KeyValuePair<K, V>>() {
        @SuppressWarnings("unchecked")
        public int compare(KeyValuePair<K, V> e1, KeyValuePair<K, V> e2) {
            return e1.getKey().compareTo(e2.getKey());
        }
    });

    return list;
}

From source file:edu.umn.cs.spatialHadoop.core.Partitioner.java

License:Open Source License

/**
 * Retrieves the value of a partitioner for a given job.
 * @param conf/* w w  w .ja va 2s. co  m*/
 * @return
 */
public static Partitioner getPartitioner(Configuration conf) {
    Class<? extends Partitioner> klass = conf.getClass(PartitionerClass, Partitioner.class)
            .asSubclass(Partitioner.class);
    if (klass == null)
        return null;
    try {
        Partitioner partitioner = klass.newInstance();

        String partitionerFile = conf.get(PartitionerValue);
        if (partitionerFile != null) {
            Path[] cacheFiles = DistributedCache.getLocalCacheFiles(conf);
            for (Path cacheFile : cacheFiles) {
                if (cacheFile.getName().contains(partitionerFile)) {
                    FSDataInputStream in = FileSystem.getLocal(conf).open(cacheFile);
                    partitioner.readFields(in);
                    in.close();
                }
            }
        }
        return partitioner;
    } catch (InstantiationException e) {
        Log.warn("Error instantiating partitioner", e);
        return null;
    } catch (IllegalAccessException e) {
        Log.warn("Error instantiating partitioner", e);
        return null;
    } catch (IOException e) {
        Log.warn("Error retrieving partitioner value", e);
        return null;
    }
}

From source file:edu.umn.cs.spatialHadoop.core.SpatialSite.java

License:Open Source License

/**
 * Retrieves cells that were stored earlier using
 * {@link #setCells(Configuration, CellInfo[])}
 * This function opens the corresponding
 * file from DistributedCache and parses jobs from it.
 * @param conf//w ww  .  j  a va  2  s.co  m
 * @return
 * @throws IOException
 */
public static CellInfo[] getCells(Configuration conf) throws IOException {
    CellInfo[] cells = null;
    String cells_file = conf.get(OUTPUT_CELLS);
    if (cells_file != null) {
        Path[] cacheFiles = DistributedCache.getLocalCacheFiles(conf);
        for (Path cacheFile : cacheFiles) {
            if (cacheFile.getName().contains(cells_file)) {
                FSDataInputStream in = FileSystem.getLocal(conf).open(cacheFile);

                int cellCount = in.readInt();
                cells = new CellInfo[cellCount];
                for (int i = 0; i < cellCount; i++) {
                    cells[i] = new CellInfo();
                    cells[i].readFields(in);
                }

                in.close();
            }
        }
    }
    return cells;
}

From source file:edu.umn.cs.spatialHadoop.io.RandomCompressedOutputStream.java

License:Open Source License

public static void main(String[] args) throws IOException {
    long t1 = System.currentTimeMillis();
    DataOutputStream out = new DataOutputStream(
            new RandomCompressedOutputStream(new BufferedOutputStream(new FileOutputStream("test.gzp"))));
    for (int i = 0; i < 10000000; i++) {
        out.writeInt(i);/*  w  ww . j a v a  2  s  . c o m*/
    }
    out.close();
    long t2 = System.currentTimeMillis();
    System.out.println("Total time for writing the file: " + (t2 - t1) / 1000.0 + " secs");

    FileSystem localFs = FileSystem.getLocal(new Configuration());
    t1 = System.currentTimeMillis();
    InputStream in = new RandomCompressedInputStream(localFs, new Path("test.gzp"));
    FSDataInputStream din = new FSDataInputStream(in);
    long[] pos = new long[1000];
    Random rand = new Random();
    for (int i = 0; i < pos.length; i++) {
        pos[i] = rand.nextInt(10000000) * 4L;
    }
    Arrays.sort(pos);
    for (int i = 0; i < pos.length; i++) {
        //din.seek(pos[i]);
        din.skip(pos[i] - din.getPos());
        din.readInt();
        //System.out.println("Number is "+din.readInt());
    }
    t2 = System.currentTimeMillis();
    System.out.println("Total time for reading the file: " + (t2 - t1) / 1000.0 + " secs");
    din.close();
}

From source file:edu.umn.cs.spatialHadoop.nasa.StockQuadTree.java

License:Open Source License

/**
 * Constructs an aggregate quad tree for an input HDF file on a selected
 * dataset identified by its name in the file.
 * @param conf The system configuration which can contain user-defined parameters.
 * @param inFile The path of the input HDF file to read
 * @param datasetName The name of the dataset to index in the HDF file
 * @param outFile The path to the index file to write
 * @throws IOException If an error happens while reading the input or writing the output
 *//*from   w ww. j  a  v  a  2 s . c o m*/
public static void build(Configuration conf, Path inFile, String datasetName, Path outFile) throws IOException {
    FileSystem inFs = inFile.getFileSystem(conf);
    if (inFs instanceof HTTPFileSystem) {
        // HDF files are really bad to read over HTTP due to seeks
        inFile = new Path(FileUtil.copyFile(conf, inFile));
        inFs = FileSystem.getLocal(conf);
    }
    HDFFile hdfFile = null;
    try {
        hdfFile = new HDFFile(inFs.open(inFile));
        DDVGroup dataGroup = hdfFile.findGroupByName(datasetName);

        if (dataGroup == null)
            throw new RuntimeException("Cannot find dataset '" + datasetName + "' in file " + inFile);

        boolean fillValueFound = false;
        short fillValue = 0;
        short[] values = null;
        for (DataDescriptor dd : dataGroup.getContents()) {
            if (dd instanceof DDNumericDataGroup) {
                DDNumericDataGroup numericDataGroup = (DDNumericDataGroup) dd;
                values = (short[]) numericDataGroup.getAsTypedArray();
            } else if (dd instanceof DDVDataHeader) {
                DDVDataHeader vheader = (DDVDataHeader) dd;
                if (vheader.getName().equals("_FillValue")) {
                    fillValue = (short) (int) (Integer) vheader.getEntryAt(0);
                    fillValueFound = true;
                }
            }
        }

        // Retrieve meta data
        String archiveMetadata = (String) hdfFile.findHeaderByName("ArchiveMetadata.0").getEntryAt(0);
        String coreMetadata = (String) hdfFile.findHeaderByName("CoreMetadata.0").getEntryAt(0);
        NASADataset nasaDataset = new NASADataset(coreMetadata, archiveMetadata);

        if (values instanceof short[]) {
            FileSystem outFs = outFile.getFileSystem(conf);
            DataOutputStream out = new DataOutputStream(
                    new RandomCompressedOutputStream(outFs.create(outFile, false)));
            build(nasaDataset, (short[]) values, fillValue, out);
            out.close();
        } else {
            throw new RuntimeException("Indexing of values of type " + "'" + Array.get(values, 0).getClass()
                    + "' is not supported");
        }
    } finally {
        if (hdfFile != null)
            hdfFile.close();
    }
}

From source file:edu.umn.cs.spatialHadoop.nasa.HDFRecordReader.java

License:Open Source License

public void initialize(InputSplit split, Configuration conf) throws IOException {
    this.conf = conf;
    String datasetName = conf.get("dataset");
    if (datasetName == null)
        throw new RuntimeException("Dataset name should be provided");
    if (split instanceof CombineFileSplit) {
        CombineFileSplit csplits = (CombineFileSplit) split;
        splits = new Vector<FileSplit>(csplits.getNumPaths());
        for (int i = 0; i < csplits.getNumPaths(); i++) {
            FileSplit fsplit = new FileSplit(csplits.getPath(i), csplits.getOffset(i), csplits.getLength(i),
                    csplits.getLocations());
            splits.add(fsplit);/*from   w  ww . j  a v a2 s. c o  m*/
        }
        this.initialize(splits.remove(splits.size() - 1), conf);
        return;
    }
    inFile = ((FileSplit) split).getPath();
    fs = inFile.getFileSystem(conf);
    if (fs instanceof HTTPFileSystem) {
        // For performance reasons, we don't open HDF files from HTTP
        inFile = new Path(FileUtil.copyFile(conf, inFile));
        fs = FileSystem.getLocal(conf);
        this.deleteOnEnd = true;
    }
    hdfFile = new HDFFile(fs.open(inFile));

    // Retrieve meta data
    String archiveMetadata = (String) hdfFile.findHeaderByName("ArchiveMetadata.0").getEntryAt(0);
    String coreMetadata = (String) hdfFile.findHeaderByName("CoreMetadata.0").getEntryAt(0);
    nasaDataset = new NASADataset(coreMetadata, archiveMetadata);

    // Retrieve the data array
    DDVGroup dataGroup = hdfFile.findGroupByName(datasetName);
    boolean fillValueFound = false;
    int resolution = 0;
    // Retrieve metadata
    int fillValuee = 0;
    for (DataDescriptor dd : dataGroup.getContents()) {
        if (dd instanceof DDVDataHeader) {
            DDVDataHeader vheader = (DDVDataHeader) dd;
            if (vheader.getName().equals("_FillValue")) {
                Object fillValue = vheader.getEntryAt(0);
                if (fillValue instanceof Integer)
                    fillValuee = (Integer) fillValue;
                else if (fillValue instanceof Short)
                    fillValuee = (Short) fillValue;
                else if (fillValue instanceof Byte)
                    fillValuee = (Byte) fillValue;
                else
                    throw new RuntimeException("Unsupported type: " + fillValue.getClass());
                fillValueFound = true;
            } else if (vheader.getName().equals("valid_range")) {
                Object minValue = vheader.getEntryAt(0);
                if (minValue instanceof Integer)
                    nasaDataset.minValue = (Integer) minValue;
                else if (minValue instanceof Byte)
                    nasaDataset.minValue = (Byte) minValue;
                Object maxValue = vheader.getEntryAt(1);
                if (maxValue instanceof Integer)
                    nasaDataset.maxValue = (Integer) maxValue;
                else if (maxValue instanceof Byte)
                    nasaDataset.maxValue = (Byte) maxValue;
            }
        }
    }
    // Retrieve data
    for (DataDescriptor dd : dataGroup.getContents()) {
        if (dd instanceof DDNumericDataGroup) {
            DDNumericDataGroup numericDataGroup = (DDNumericDataGroup) dd;
            valueSize = numericDataGroup.getDataSize();
            resolution = numericDataGroup.getDimensions()[0];
            unparsedDataArray = new byte[valueSize * resolution * resolution];
            if (fillValueFound) {
                fillValueBytes = new byte[valueSize];
                HDFConstants.writeAt(fillValueBytes, 0, fillValuee, valueSize);
                for (int i = 0; i < unparsedDataArray.length; i++)
                    unparsedDataArray[i] = fillValueBytes[i % valueSize];
            }
            numericDataGroup.getAsByteArray(unparsedDataArray, 0, unparsedDataArray.length);
        }
    }

    nasaDataset.resolution = resolution;
    if (!fillValueFound) {
        skipFillValue = false;
    } else {
        skipFillValue = conf.getBoolean("skipfill", true);
        // Whether we need to recover fill values or not
        boolean recoverFillValues = conf.getBoolean("recoverholes", true);
        if (recoverFillValues)
            recoverFillValues(conf);
    }
    this.nasaShape = (S) OperationsParams.getShape(conf, "shape", new NASARectangle());
    this.nasaShape.setTimestamp(nasaDataset.time);
    this.value = new NASAIterator();
}

From source file:edu.umn.cs.spatialHadoop.nasa.HDFRecordReader.java

License:Open Source License

/**
 * Recover fill values in the array {@link Values}.
 * @param conf/* w  w  w  . j  a va2 s .com*/
 * @throws IOException 
 * @throws Exception 
 */
private void recoverFillValues(Configuration conf) throws IOException {
    // For now, we can only recover values of type short
    HDFFile waterMaskFile = null;
    try {
        // Read water mask
        Path wmPath = new Path(
                conf.get(WATER_MASK_PATH, "http://e4ftl01.cr.usgs.gov/MOLT/MOD44W.005/2000.02.24/"));
        final String tileIdentifier = String.format("h%02dv%02d", nasaDataset.h, nasaDataset.v);
        FileSystem wmFs = wmPath.getFileSystem(conf);
        FileStatus[] wmFile = wmFs.listStatus(wmPath, new PathFilter() {
            @Override
            public boolean accept(Path path) {
                return path.getName().contains(tileIdentifier);
            }
        });
        if (wmFile.length == 0) {
            LOG.warn("Could not find water mask for tile '" + tileIdentifier + "'");
            return;
        }
        Path wmFileToLoad = wmFile[0].getPath();
        if (wmFs instanceof HTTPFileSystem) {
            wmFileToLoad = new Path(FileUtil.copyFile(conf, wmFileToLoad));
            wmFs = FileSystem.getLocal(conf);
        }
        waterMaskFile = new HDFFile(wmFs.open(wmFileToLoad));
        DDVGroup waterMaskGroup = waterMaskFile.findGroupByName("water_mask");
        if (waterMaskGroup == null) {
            LOG.warn("Water mask dataset 'water_mask' not found in file " + wmFile[0]);
            return;
        }
        byte[] waterMask = null;
        for (DataDescriptor dd : waterMaskGroup.getContents()) {
            if (dd instanceof DDNumericDataGroup) {
                DDNumericDataGroup numericDataGroup = (DDNumericDataGroup) dd;
                waterMask = (byte[]) numericDataGroup.getAsByteArray();
            }
        }
        // Convert the waterMask to a BinArray of the right size
        int size = 4800 / nasaDataset.resolution;
        BitArray waterMaskBits = convertWaterMaskToBits(ByteBuffer.wrap(waterMask), size);

        short fillValueShort = (short) HDFConstants.readAsInteger(fillValueBytes, 0, fillValueBytes.length);
        recoverXYShorts(ByteBuffer.wrap(unparsedDataArray), fillValueShort, waterMaskBits);
    } finally {
        if (waterMaskFile != null)
            waterMaskFile.close();
    }
}

From source file:edu.umn.cs.spatialHadoop.nasa.HDFRecordReader3.java

License:Open Source License

/**
 * Recover fill values in the array {@link Values}.
 * @param conf//  w  w  w  .  j av  a2s  . c  o m
 * @throws IOException 
 * @throws Exception 
 */
private void recoverFillValues(Configuration conf) throws IOException {
    HDFFile waterMaskFile = null;
    try {
        // Read water mask
        Path wmPath = new Path(
                conf.get(WATER_MASK_PATH, "http://e4ftl01.cr.usgs.gov/MOLT/MOD44W.005/2000.02.24/"));
        final String tileIdentifier = String.format("h%02dv%02d", nasaDataset.h, nasaDataset.v);
        FileSystem wmFs = wmPath.getFileSystem(conf);
        FileStatus[] wmFile = wmFs.listStatus(wmPath, new PathFilter() {
            @Override
            public boolean accept(Path path) {
                return path.getName().contains(tileIdentifier);
            }
        });
        if (wmFile.length == 0) {
            LOG.warn("Could not find water mask for tile '" + tileIdentifier + "'");
            return;
        }
        Path wmFileToLoad = wmFile[0].getPath();
        if (wmFs instanceof HTTPFileSystem) {
            wmFileToLoad = new Path(FileUtil.copyFile(conf, wmFileToLoad));
            wmFs = FileSystem.getLocal(conf);
        }
        waterMaskFile = new HDFFile(wmFs.open(wmFileToLoad));
        DDVGroup waterMaskGroup = waterMaskFile.findGroupByName("water_mask");
        if (waterMaskGroup == null) {
            LOG.warn("Water mask dataset 'water_mask' not found in file " + wmFile[0]);
            return;
        }
        byte[] waterMask = null;
        for (DataDescriptor dd : waterMaskGroup.getContents()) {
            if (dd instanceof DDNumericDataGroup) {
                DDNumericDataGroup numericDataGroup = (DDNumericDataGroup) dd;
                waterMask = (byte[]) numericDataGroup.getAsAnArray();
            }
        }

        // Stores which values has been recovered by copying a single value
        // without interpolation in the x-direction
        byte[] valueStatus = new byte[dataArray.length];

        recoverXDirection(waterMask, valueStatus);
        recoverYDirection(waterMask, valueStatus);
    } finally {
        if (waterMaskFile != null)
            waterMaskFile.close();
    }
}