Example usage for org.apache.hadoop.fs FSDataInputStream FSDataInputStream

List of usage examples for org.apache.hadoop.fs FSDataInputStream FSDataInputStream

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FSDataInputStream FSDataInputStream.

Prototype

public FSDataInputStream(InputStream in) 

Source Link

Usage

From source file:edu.umn.cs.spatialHadoop.indexing.RTree.java

License:Open Source License

@Override
public void readFields(DataInput in) throws IOException {
    // Read the whole tree structure and keep it in memory. Leave data on disk
    // Tree size (Header + structure + data)
    treeSize = in.readInt();/*from ww  w.j  a v  a 2 s .  c  o  m*/

    if (in instanceof Seekable)
        this.treeStartOffset = ((Seekable) in).getPos();
    if (treeSize == 0) {
        height = elementCount = 0;
        return;
    }

    // Read only the tree structure in memory while actual records remain on
    // disk and loaded when necessary
    height = in.readInt();
    if (height == 0)
        return;
    degree = in.readInt();
    elementCount = in.readInt();

    // Keep only tree structure in memory
    nodeCount = (int) ((powInt(degree, height) - 1) / (degree - 1));
    this.nodes = new Rectangle[nodeCount];
    this.dataOffset = new int[nodeCount + 1];

    for (int node_id = 0; node_id < nodeCount; node_id++) {
        this.dataOffset[node_id] = in.readInt();
        this.nodes[node_id] = new Rectangle();
        this.nodes[node_id].readFields(in);
    }
    this.dataOffset[nodeCount] = treeSize;

    if (in instanceof FSDataInputStream) {
        // A random input stream, can keep the data on disk
        this.data = (FSDataInputStream) in;
    } else {
        // A sequential input stream, need to read all data now
        int treeDataSize = this.dataOffset[nodeCount] - this.dataOffset[0];
        // Adjust the offset of data to be zero
        this.treeStartOffset = -this.dataOffset[0];
        byte[] treeData = new byte[treeDataSize];
        in.readFully(treeData, 0, treeDataSize);
        this.data = new FSDataInputStream(new MemoryInputStream(treeData));
    }
    leafNodeCount = (int) Math.pow(degree, height - 1);
    nonLeafNodeCount = nodeCount - leafNodeCount;
}

From source file:edu.umn.cs.spatialHadoop.io.RandomCompressedInputStream.java

License:Open Source License

public RandomCompressedInputStream(FSDataInputStream in, long totalLength) throws IOException {
    this.compressedIn = new FSDataInputStream(in);
    // Read and cache the lookup table
    this.compressedIn.seek(totalLength - 4);
    int numberOfBlocks = this.compressedIn.readInt();
    this.blockOffsetsInCompressedFile = new long[numberOfBlocks + 1];
    this.blockOffsetsInRawFile = new long[numberOfBlocks + 1];
    this.compressedIn.seek(totalLength - 4 - numberOfBlocks * (8 + 8));
    for (int i = 1; i <= numberOfBlocks; i++) {
        blockOffsetsInCompressedFile[i] = this.compressedIn.readLong();
        blockOffsetsInRawFile[i] = this.compressedIn.readLong();
    }/* ww  w  . j  a va 2 s  .c  o m*/
    this.compressedIn.seek(0);
    this.decompressedIn = new GZIPInputStream(this.compressedIn);
}

From source file:edu.umn.cs.spatialHadoop.io.RandomCompressedOutputStream.java

License:Open Source License

public static void main(String[] args) throws IOException {
    long t1 = System.currentTimeMillis();
    DataOutputStream out = new DataOutputStream(
            new RandomCompressedOutputStream(new BufferedOutputStream(new FileOutputStream("test.gzp"))));
    for (int i = 0; i < 10000000; i++) {
        out.writeInt(i);/*w ww .  j av  a 2s .  c om*/
    }
    out.close();
    long t2 = System.currentTimeMillis();
    System.out.println("Total time for writing the file: " + (t2 - t1) / 1000.0 + " secs");

    FileSystem localFs = FileSystem.getLocal(new Configuration());
    t1 = System.currentTimeMillis();
    InputStream in = new RandomCompressedInputStream(localFs, new Path("test.gzp"));
    FSDataInputStream din = new FSDataInputStream(in);
    long[] pos = new long[1000];
    Random rand = new Random();
    for (int i = 0; i < pos.length; i++) {
        pos[i] = rand.nextInt(10000000) * 4L;
    }
    Arrays.sort(pos);
    for (int i = 0; i < pos.length; i++) {
        //din.seek(pos[i]);
        din.skip(pos[i] - din.getPos());
        din.readInt();
        //System.out.println("Number is "+din.readInt());
    }
    t2 = System.currentTimeMillis();
    System.out.println("Total time for reading the file: " + (t2 - t1) / 1000.0 + " secs");
    din.close();
}

From source file:edu.umn.cs.spatialHadoop.nasa.StockQuadTree.java

License:Open Source License

/**
 * Merges a list of aggregate trees (in the same order) and writes an output tree
 * that combines all input trees. Input trees must have been built using on of the
 * {@link #build} methods. The merged tree can be further merged using this method.
 * @param conf// www  .j  a  v  a2 s.  c  o  m
 * @param inFiles
 * @param outFile
 * @throws IOException
 */
public static void merge(Configuration conf, Path[] inFiles, Path outFile) throws IOException {
    DataInputStream[] inTrees = new DataInputStream[inFiles.length];
    for (int i = 0; i < inFiles.length; i++) {
        FileSystem inFs = inFiles[i].getFileSystem(conf);
        inTrees[i] = new FSDataInputStream(new RandomCompressedInputStream(inFs, inFiles[i]));
    }

    FileSystem outFs = outFile.getFileSystem(conf);
    DataOutputStream outTree = new DataOutputStream(
            new RandomCompressedOutputStream(outFs.create(outFile, false)));

    merge(inTrees, outTree);

    for (int i = 0; i < inFiles.length; i++)
        inTrees[i].close();
    outTree.close();
}

From source file:edu.umn.cs.spatialHadoop.nasa.StockQuadTree.java

License:Open Source License

/**
 * Performs a range query on an aggregate
 * @param fs/* w  w w  .  java2 s.  c  om*/
 * @param p
 * @param query_mbr
 * @param output
 * @return
 * @throws IOException
 */
public static int selectionQuery(FileSystem fs, Path p, Rectangle query_mbr, ResultCollector<PointValue> output)
        throws IOException {
    FSDataInputStream inStream = null;
    try {
        inStream = new FSDataInputStream(new RandomCompressedInputStream(fs, p));
        //inStream = fs.open(p);
        return selectionQuery(inStream, query_mbr, output);
    } finally {
        if (inStream != null)
            inStream.close();
    }

}

From source file:edu.umn.cs.spatialHadoop.nasa.StockQuadTree.java

License:Open Source License

public static Node aggregateQuery(FileSystem fs, Path p, Rectangle query_mbr) throws IOException {
    FSDataInputStream inStream = null;//from   ww  w  .j a va 2  s.c om
    try {
        inStream = new FSDataInputStream(new RandomCompressedInputStream(fs, p));
        //inStream = fs.open(p);
        return aggregateQuery(inStream, query_mbr);
    } finally {
        if (inStream != null)
            inStream.close();
    }
}

From source file:edu.umn.cs.spatialHadoop.nasa.StockQuadTree.java

License:Open Source License

public static int getResolution(FileSystem fs, Path p) throws IOException {
    FSDataInputStream inStream = null;//  w  w w  .  ja  v a 2 s  .com
    try {
        inStream = new FSDataInputStream(new RandomCompressedInputStream(fs, p));
        //inStream = fs.open(p);
        int resolution = inStream.readInt();
        return resolution;
    } finally {
        if (inStream != null)
            inStream.close();
    }
}

From source file:edu.umn.cs.spatialHadoop.nasa.HTTPFileSystem.java

License:Open Source License

@Override
public FSDataInputStream open(Path f, int bufferSize) throws IOException {
    URL url = f.toUri().toURL();/*w  w w . j a va2  s . co  m*/
    return new FSDataInputStream(new HTTPInputStream(url));
}

From source file:gobblin.source.extractor.extract.google.GoogleDriveFileSystem.java

License:Apache License

@Override
public FSDataInputStream open(Path path, int bufferSize) throws IOException {
    return closer.register(new FSDataInputStream(new SeekableFSInputStream(new BufferedInputStream(
            client.files().get(toFileId(path)).executeMediaAsInputStream(), bufferSize))));
}

From source file:gobblin.source.extractor.extract.google.GoogleDriveFileSystem.java

License:Apache License

@Override
public FSDataInputStream open(Path path) throws IOException {
    return closer.register(new FSDataInputStream(new SeekableFSInputStream(
            new BufferedInputStream(client.files().get(toFileId(path)).executeMediaAsInputStream()))));
}