List of usage examples for org.apache.hadoop.fs FSDataInputStream FSDataInputStream
public FSDataInputStream(InputStream in)
From source file:edu.umn.cs.spatialHadoop.indexing.RTree.java
License:Open Source License
@Override public void readFields(DataInput in) throws IOException { // Read the whole tree structure and keep it in memory. Leave data on disk // Tree size (Header + structure + data) treeSize = in.readInt();/*from ww w.j a v a 2 s . c o m*/ if (in instanceof Seekable) this.treeStartOffset = ((Seekable) in).getPos(); if (treeSize == 0) { height = elementCount = 0; return; } // Read only the tree structure in memory while actual records remain on // disk and loaded when necessary height = in.readInt(); if (height == 0) return; degree = in.readInt(); elementCount = in.readInt(); // Keep only tree structure in memory nodeCount = (int) ((powInt(degree, height) - 1) / (degree - 1)); this.nodes = new Rectangle[nodeCount]; this.dataOffset = new int[nodeCount + 1]; for (int node_id = 0; node_id < nodeCount; node_id++) { this.dataOffset[node_id] = in.readInt(); this.nodes[node_id] = new Rectangle(); this.nodes[node_id].readFields(in); } this.dataOffset[nodeCount] = treeSize; if (in instanceof FSDataInputStream) { // A random input stream, can keep the data on disk this.data = (FSDataInputStream) in; } else { // A sequential input stream, need to read all data now int treeDataSize = this.dataOffset[nodeCount] - this.dataOffset[0]; // Adjust the offset of data to be zero this.treeStartOffset = -this.dataOffset[0]; byte[] treeData = new byte[treeDataSize]; in.readFully(treeData, 0, treeDataSize); this.data = new FSDataInputStream(new MemoryInputStream(treeData)); } leafNodeCount = (int) Math.pow(degree, height - 1); nonLeafNodeCount = nodeCount - leafNodeCount; }
From source file:edu.umn.cs.spatialHadoop.io.RandomCompressedInputStream.java
License:Open Source License
public RandomCompressedInputStream(FSDataInputStream in, long totalLength) throws IOException { this.compressedIn = new FSDataInputStream(in); // Read and cache the lookup table this.compressedIn.seek(totalLength - 4); int numberOfBlocks = this.compressedIn.readInt(); this.blockOffsetsInCompressedFile = new long[numberOfBlocks + 1]; this.blockOffsetsInRawFile = new long[numberOfBlocks + 1]; this.compressedIn.seek(totalLength - 4 - numberOfBlocks * (8 + 8)); for (int i = 1; i <= numberOfBlocks; i++) { blockOffsetsInCompressedFile[i] = this.compressedIn.readLong(); blockOffsetsInRawFile[i] = this.compressedIn.readLong(); }/* ww w . j a va 2 s .c o m*/ this.compressedIn.seek(0); this.decompressedIn = new GZIPInputStream(this.compressedIn); }
From source file:edu.umn.cs.spatialHadoop.io.RandomCompressedOutputStream.java
License:Open Source License
public static void main(String[] args) throws IOException { long t1 = System.currentTimeMillis(); DataOutputStream out = new DataOutputStream( new RandomCompressedOutputStream(new BufferedOutputStream(new FileOutputStream("test.gzp")))); for (int i = 0; i < 10000000; i++) { out.writeInt(i);/*w ww . j av a 2s . c om*/ } out.close(); long t2 = System.currentTimeMillis(); System.out.println("Total time for writing the file: " + (t2 - t1) / 1000.0 + " secs"); FileSystem localFs = FileSystem.getLocal(new Configuration()); t1 = System.currentTimeMillis(); InputStream in = new RandomCompressedInputStream(localFs, new Path("test.gzp")); FSDataInputStream din = new FSDataInputStream(in); long[] pos = new long[1000]; Random rand = new Random(); for (int i = 0; i < pos.length; i++) { pos[i] = rand.nextInt(10000000) * 4L; } Arrays.sort(pos); for (int i = 0; i < pos.length; i++) { //din.seek(pos[i]); din.skip(pos[i] - din.getPos()); din.readInt(); //System.out.println("Number is "+din.readInt()); } t2 = System.currentTimeMillis(); System.out.println("Total time for reading the file: " + (t2 - t1) / 1000.0 + " secs"); din.close(); }
From source file:edu.umn.cs.spatialHadoop.nasa.StockQuadTree.java
License:Open Source License
/** * Merges a list of aggregate trees (in the same order) and writes an output tree * that combines all input trees. Input trees must have been built using on of the * {@link #build} methods. The merged tree can be further merged using this method. * @param conf// www .j a v a2 s. c o m * @param inFiles * @param outFile * @throws IOException */ public static void merge(Configuration conf, Path[] inFiles, Path outFile) throws IOException { DataInputStream[] inTrees = new DataInputStream[inFiles.length]; for (int i = 0; i < inFiles.length; i++) { FileSystem inFs = inFiles[i].getFileSystem(conf); inTrees[i] = new FSDataInputStream(new RandomCompressedInputStream(inFs, inFiles[i])); } FileSystem outFs = outFile.getFileSystem(conf); DataOutputStream outTree = new DataOutputStream( new RandomCompressedOutputStream(outFs.create(outFile, false))); merge(inTrees, outTree); for (int i = 0; i < inFiles.length; i++) inTrees[i].close(); outTree.close(); }
From source file:edu.umn.cs.spatialHadoop.nasa.StockQuadTree.java
License:Open Source License
/** * Performs a range query on an aggregate * @param fs/* w w w . java2 s. c om*/ * @param p * @param query_mbr * @param output * @return * @throws IOException */ public static int selectionQuery(FileSystem fs, Path p, Rectangle query_mbr, ResultCollector<PointValue> output) throws IOException { FSDataInputStream inStream = null; try { inStream = new FSDataInputStream(new RandomCompressedInputStream(fs, p)); //inStream = fs.open(p); return selectionQuery(inStream, query_mbr, output); } finally { if (inStream != null) inStream.close(); } }
From source file:edu.umn.cs.spatialHadoop.nasa.StockQuadTree.java
License:Open Source License
public static Node aggregateQuery(FileSystem fs, Path p, Rectangle query_mbr) throws IOException { FSDataInputStream inStream = null;//from ww w .j a va 2 s.c om try { inStream = new FSDataInputStream(new RandomCompressedInputStream(fs, p)); //inStream = fs.open(p); return aggregateQuery(inStream, query_mbr); } finally { if (inStream != null) inStream.close(); } }
From source file:edu.umn.cs.spatialHadoop.nasa.StockQuadTree.java
License:Open Source License
public static int getResolution(FileSystem fs, Path p) throws IOException { FSDataInputStream inStream = null;// w w w . ja v a 2 s .com try { inStream = new FSDataInputStream(new RandomCompressedInputStream(fs, p)); //inStream = fs.open(p); int resolution = inStream.readInt(); return resolution; } finally { if (inStream != null) inStream.close(); } }
From source file:edu.umn.cs.spatialHadoop.nasa.HTTPFileSystem.java
License:Open Source License
@Override public FSDataInputStream open(Path f, int bufferSize) throws IOException { URL url = f.toUri().toURL();/*w w w . j a va2 s . co m*/ return new FSDataInputStream(new HTTPInputStream(url)); }
From source file:gobblin.source.extractor.extract.google.GoogleDriveFileSystem.java
License:Apache License
@Override public FSDataInputStream open(Path path, int bufferSize) throws IOException { return closer.register(new FSDataInputStream(new SeekableFSInputStream(new BufferedInputStream( client.files().get(toFileId(path)).executeMediaAsInputStream(), bufferSize)))); }
From source file:gobblin.source.extractor.extract.google.GoogleDriveFileSystem.java
License:Apache License
@Override public FSDataInputStream open(Path path) throws IOException { return closer.register(new FSDataInputStream(new SeekableFSInputStream( new BufferedInputStream(client.files().get(toFileId(path)).executeMediaAsInputStream())))); }