List of usage examples for org.apache.hadoop.fs FSDataOutputStream getPos
public long getPos()
From source file:edu.umn.cs.spatialHadoop.core.RTree.java
License:Open Source License
/** * Builds the RTree given a serialized list of elements. It uses the given * stockObject to deserialize these elements using * {@link TextSerializable#fromText(Text)} and build the tree. Also writes the * created tree to the disk directly./*from ww w. java2s .co m*/ * * @param element_bytes * - serialization of all elements separated by new lines * @param offset * - offset of the first byte to use in elements_bytes * @param len * - number of bytes to use in elements_bytes * @param degree * - Degree of the R-tree to build in terms of number of children per * node * @param dataOut * - output stream to write the result to. * @param fast_sort * - setting this to <code>true</code> allows the method to run * faster by materializing the offset of each element in the list * which speeds up the comparison. However, this requires an * additional 16 bytes per element. So, for each 1M elements, the * method will require an additional 16 M bytes (approximately). */ public void bulkLoadWrite(final byte[] element_bytes, final int offset, final int len, final int degree, DataOutput dataOut, final boolean fast_sort) { try { // Count number of elements in the given text int i_start = offset; final Text line = new Text(); while (i_start < offset + len) { int i_end = skipToEOL(element_bytes, i_start); // Extract the line without end of line character line.set(element_bytes, i_start, i_end - i_start - 1); stockObject.fromText(line); elementCount++; i_start = i_end; } LOG.info("Bulk loading an RTree with " + elementCount + " elements"); // It turns out the findBestDegree returns the best degree when the whole // tree is loaded to memory when processed. However, as current algorithms // process the tree while it's on disk, a higher degree should be selected // such that a node fits one file block (assumed to be 4K). //final int degree = findBestDegree(bytesAvailable, elementCount); LOG.info("Writing an RTree with degree " + degree); int height = Math.max(1, (int) Math.ceil(Math.log(elementCount) / Math.log(degree))); int leafNodeCount = (int) Math.pow(degree, height - 1); if (elementCount < 2 * leafNodeCount && height > 1) { height--; leafNodeCount = (int) Math.pow(degree, height - 1); } int nodeCount = (int) ((Math.pow(degree, height) - 1) / (degree - 1)); int nonLeafNodeCount = nodeCount - leafNodeCount; // Keep track of the offset of each element in the text final int[] offsets = new int[elementCount]; final double[] xs = fast_sort ? new double[elementCount] : null; final double[] ys = fast_sort ? new double[elementCount] : null; i_start = offset; line.clear(); for (int i = 0; i < elementCount; i++) { offsets[i] = i_start; int i_end = skipToEOL(element_bytes, i_start); if (xs != null) { // Extract the line with end of line character line.set(element_bytes, i_start, i_end - i_start - 1); stockObject.fromText(line); // Sample center of the shape xs[i] = (stockObject.getMBR().x1 + stockObject.getMBR().x2) / 2; ys[i] = (stockObject.getMBR().y1 + stockObject.getMBR().y2) / 2; } i_start = i_end; } /**A struct to store information about a split*/ class SplitStruct extends Rectangle { /**Start and end index for this split*/ int index1, index2; /**Direction of this split*/ byte direction; /**Index of first element on disk*/ int offsetOfFirstElement; static final byte DIRECTION_X = 0; static final byte DIRECTION_Y = 1; SplitStruct(int index1, int index2, byte direction) { this.index1 = index1; this.index2 = index2; this.direction = direction; } @Override public void write(DataOutput out) throws IOException { out.writeInt(offsetOfFirstElement); super.write(out); } void partition(Queue<SplitStruct> toBePartitioned) { IndexedSortable sortableX; IndexedSortable sortableY; if (fast_sort) { // Use materialized xs[] and ys[] to do the comparisons sortableX = new IndexedSortable() { @Override public void swap(int i, int j) { // Swap xs double tempx = xs[i]; xs[i] = xs[j]; xs[j] = tempx; // Swap ys double tempY = ys[i]; ys[i] = ys[j]; ys[j] = tempY; // Swap id int tempid = offsets[i]; offsets[i] = offsets[j]; offsets[j] = tempid; } @Override public int compare(int i, int j) { if (xs[i] < xs[j]) return -1; if (xs[i] > xs[j]) return 1; return 0; } }; sortableY = new IndexedSortable() { @Override public void swap(int i, int j) { // Swap xs double tempx = xs[i]; xs[i] = xs[j]; xs[j] = tempx; // Swap ys double tempY = ys[i]; ys[i] = ys[j]; ys[j] = tempY; // Swap id int tempid = offsets[i]; offsets[i] = offsets[j]; offsets[j] = tempid; } @Override public int compare(int i, int j) { if (ys[i] < ys[j]) return -1; if (ys[i] > ys[j]) return 1; return 0; } }; } else { // No materialized xs and ys. Always deserialize objects to compare sortableX = new IndexedSortable() { @Override public void swap(int i, int j) { // Swap id int tempid = offsets[i]; offsets[i] = offsets[j]; offsets[j] = tempid; } @Override public int compare(int i, int j) { // Get end of line int eol = skipToEOL(element_bytes, offsets[i]); line.set(element_bytes, offsets[i], eol - offsets[i] - 1); stockObject.fromText(line); double xi = (stockObject.getMBR().x1 + stockObject.getMBR().x2) / 2; eol = skipToEOL(element_bytes, offsets[j]); line.set(element_bytes, offsets[j], eol - offsets[j] - 1); stockObject.fromText(line); double xj = (stockObject.getMBR().x1 + stockObject.getMBR().x2) / 2; if (xi < xj) return -1; if (xi > xj) return 1; return 0; } }; sortableY = new IndexedSortable() { @Override public void swap(int i, int j) { // Swap id int tempid = offsets[i]; offsets[i] = offsets[j]; offsets[j] = tempid; } @Override public int compare(int i, int j) { int eol = skipToEOL(element_bytes, offsets[i]); line.set(element_bytes, offsets[i], eol - offsets[i] - 1); stockObject.fromText(line); double yi = (stockObject.getMBR().y1 + stockObject.getMBR().y2) / 2; eol = skipToEOL(element_bytes, offsets[j]); line.set(element_bytes, offsets[j], eol - offsets[j] - 1); stockObject.fromText(line); double yj = (stockObject.getMBR().y1 + stockObject.getMBR().y2) / 2; if (yi < yj) return -1; if (yi > yj) return 1; return 0; } }; } final IndexedSorter sorter = new QuickSort(); final IndexedSortable[] sortables = new IndexedSortable[2]; sortables[SplitStruct.DIRECTION_X] = sortableX; sortables[SplitStruct.DIRECTION_Y] = sortableY; sorter.sort(sortables[direction], index1, index2); // Partition into maxEntries partitions (equally) and // create a SplitStruct for each partition int i1 = index1; for (int iSplit = 0; iSplit < degree; iSplit++) { int i2 = index1 + (index2 - index1) * (iSplit + 1) / degree; SplitStruct newSplit = new SplitStruct(i1, i2, (byte) (1 - direction)); toBePartitioned.add(newSplit); i1 = i2; } } } // All nodes stored in level-order traversal Vector<SplitStruct> nodes = new Vector<SplitStruct>(); final Queue<SplitStruct> toBePartitioned = new LinkedList<SplitStruct>(); toBePartitioned.add(new SplitStruct(0, elementCount, SplitStruct.DIRECTION_X)); while (!toBePartitioned.isEmpty()) { SplitStruct split = toBePartitioned.poll(); if (nodes.size() < nonLeafNodeCount) { // This is a non-leaf split.partition(toBePartitioned); } nodes.add(split); } if (nodes.size() != nodeCount) { throw new RuntimeException( "Expected node count: " + nodeCount + ". Real node count: " + nodes.size()); } // Now we have our data sorted in the required order. Start building // the tree. // Store the offset of each leaf node in the tree FSDataOutputStream fakeOut = null; try { fakeOut = new FSDataOutputStream(new java.io.OutputStream() { // Null output stream @Override public void write(int b) throws IOException { // Do nothing } @Override public void write(byte[] b, int off, int len) throws IOException { // Do nothing } @Override public void write(byte[] b) throws IOException { // Do nothing } }, null, TreeHeaderSize + nodes.size() * NodeSize); for (int i_leaf = nonLeafNodeCount, i = 0; i_leaf < nodes.size(); i_leaf++) { nodes.elementAt(i_leaf).offsetOfFirstElement = (int) fakeOut.getPos(); if (i != nodes.elementAt(i_leaf).index1) throw new RuntimeException(); double x1, y1, x2, y2; // Initialize MBR to first object int eol = skipToEOL(element_bytes, offsets[i]); fakeOut.write(element_bytes, offsets[i], eol - offsets[i]); line.set(element_bytes, offsets[i], eol - offsets[i] - 1); stockObject.fromText(line); Rectangle mbr = stockObject.getMBR(); x1 = mbr.x1; y1 = mbr.y1; x2 = mbr.x2; y2 = mbr.y2; i++; while (i < nodes.elementAt(i_leaf).index2) { eol = skipToEOL(element_bytes, offsets[i]); fakeOut.write(element_bytes, offsets[i], eol - offsets[i]); line.set(element_bytes, offsets[i], eol - offsets[i] - 1); stockObject.fromText(line); mbr = stockObject.getMBR(); if (mbr.x1 < x1) x1 = mbr.x1; if (mbr.y1 < y1) y1 = mbr.y1; if (mbr.x2 > x2) x2 = mbr.x2; if (mbr.y2 > y2) y2 = mbr.y2; i++; } nodes.elementAt(i_leaf).set(x1, y1, x2, y2); } } finally { if (fakeOut != null) fakeOut.close(); } // Calculate MBR and offsetOfFirstElement for non-leaves for (int i_node = nonLeafNodeCount - 1; i_node >= 0; i_node--) { int i_first_child = i_node * degree + 1; nodes.elementAt(i_node).offsetOfFirstElement = nodes.elementAt(i_first_child).offsetOfFirstElement; int i_child = 0; Rectangle mbr; mbr = nodes.elementAt(i_first_child + i_child); double x1 = mbr.x1; double y1 = mbr.y1; double x2 = mbr.x2; double y2 = mbr.y2; i_child++; while (i_child < degree) { mbr = nodes.elementAt(i_first_child + i_child); if (mbr.x1 < x1) x1 = mbr.x1; if (mbr.y1 < y1) y1 = mbr.y1; if (mbr.x2 > x2) x2 = mbr.x2; if (mbr.y2 > y2) y2 = mbr.y2; i_child++; } nodes.elementAt(i_node).set(x1, y1, x2, y2); } // Start writing the tree // write tree header (including size) // Total tree size. (== Total bytes written - 8 bytes for the size itself) dataOut.writeInt(TreeHeaderSize + NodeSize * nodeCount + len); // Tree height dataOut.writeInt(height); // Degree dataOut.writeInt(degree); dataOut.writeInt(elementCount); // write nodes for (SplitStruct node : nodes) { node.write(dataOut); } // write elements for (int element_i = 0; element_i < elementCount; element_i++) { int eol = skipToEOL(element_bytes, offsets[element_i]); dataOut.write(element_bytes, offsets[element_i], eol - offsets[element_i]); } } catch (IOException e) { e.printStackTrace(); } }
From source file:edu.umn.cs.spatialHadoop.indexing.RTree.java
License:Open Source License
/** * Builds the RTree given a serialized list of elements. It uses the given * stockObject to deserialize these elements using * {@link TextSerializable#fromText(Text)} and build the tree. Also writes the * created tree to the disk directly./* ww w . ja va2 s .co m*/ * * @param element_bytes * - serialization of all elements separated by new lines * @param offset * - offset of the first byte to use in elements_bytes * @param len * - number of bytes to use in elements_bytes * @param degree * - Degree of the R-tree to build in terms of number of children per * node * @param dataOut * - output stream to write the result to. * @param fast_sort * - setting this to <code>true</code> allows the method to run * faster by materializing the offset of each element in the list * which speeds up the comparison. However, this requires an * additional 16 bytes per element. So, for each 1M elements, the * method will require an additional 16 M bytes (approximately). */ public static void bulkLoadWrite(final byte[] element_bytes, final int offset, final int len, final int degree, DataOutput dataOut, final Shape stockObject, final boolean fast_sort) { try { int elementCount = 0; // Count number of elements in the given text int i_start = offset; final Text line = new Text(); while (i_start < offset + len) { int i_end = skipToEOL(element_bytes, i_start); // Extract the line without end of line character line.set(element_bytes, i_start, i_end - i_start - 1); stockObject.fromText(line); elementCount++; i_start = i_end; } LOG.info("Bulk loading an RTree with " + elementCount + " elements"); // It turns out the findBestDegree returns the best degree when the whole // tree is loaded to memory when processed. However, as current algorithms // process the tree while it's on disk, a higher degree should be selected // such that a node fits one file block (assumed to be 4K). //final int degree = findBestDegree(bytesAvailable, elementCount); int height = Math.max(1, (int) Math.ceil(Math.log(elementCount) / Math.log(degree))); int leafNodeCount = (int) Math.pow(degree, height - 1); if (elementCount < 2 * leafNodeCount && height > 1) { height--; leafNodeCount = (int) Math.pow(degree, height - 1); } int nodeCount = (int) ((Math.pow(degree, height) - 1) / (degree - 1)); int nonLeafNodeCount = nodeCount - leafNodeCount; // Keep track of the offset of each element in the text final int[] offsets = new int[elementCount]; final double[] xs = fast_sort ? new double[elementCount] : null; final double[] ys = fast_sort ? new double[elementCount] : null; i_start = offset; line.clear(); for (int i = 0; i < elementCount; i++) { offsets[i] = i_start; int i_end = skipToEOL(element_bytes, i_start); if (xs != null) { // Extract the line with end of line character line.set(element_bytes, i_start, i_end - i_start - 1); stockObject.fromText(line); // Sample center of the shape xs[i] = (stockObject.getMBR().x1 + stockObject.getMBR().x2) / 2; ys[i] = (stockObject.getMBR().y1 + stockObject.getMBR().y2) / 2; } i_start = i_end; } /**A struct to store information about a split*/ class SplitStruct extends Rectangle { /**Start and end index for this split*/ int index1, index2; /**Direction of this split*/ byte direction; /**Index of first element on disk*/ int offsetOfFirstElement; static final byte DIRECTION_X = 0; static final byte DIRECTION_Y = 1; SplitStruct(int index1, int index2, byte direction) { this.index1 = index1; this.index2 = index2; this.direction = direction; } @Override public void write(DataOutput out) throws IOException { out.writeInt(offsetOfFirstElement); super.write(out); } void partition(Queue<SplitStruct> toBePartitioned) { IndexedSortable sortableX; IndexedSortable sortableY; if (fast_sort) { // Use materialized xs[] and ys[] to do the comparisons sortableX = new IndexedSortable() { @Override public void swap(int i, int j) { // Swap xs double tempx = xs[i]; xs[i] = xs[j]; xs[j] = tempx; // Swap ys double tempY = ys[i]; ys[i] = ys[j]; ys[j] = tempY; // Swap id int tempid = offsets[i]; offsets[i] = offsets[j]; offsets[j] = tempid; } @Override public int compare(int i, int j) { if (xs[i] < xs[j]) return -1; if (xs[i] > xs[j]) return 1; return 0; } }; sortableY = new IndexedSortable() { @Override public void swap(int i, int j) { // Swap xs double tempx = xs[i]; xs[i] = xs[j]; xs[j] = tempx; // Swap ys double tempY = ys[i]; ys[i] = ys[j]; ys[j] = tempY; // Swap id int tempid = offsets[i]; offsets[i] = offsets[j]; offsets[j] = tempid; } @Override public int compare(int i, int j) { if (ys[i] < ys[j]) return -1; if (ys[i] > ys[j]) return 1; return 0; } }; } else { // No materialized xs and ys. Always deserialize objects to compare sortableX = new IndexedSortable() { @Override public void swap(int i, int j) { // Swap id int tempid = offsets[i]; offsets[i] = offsets[j]; offsets[j] = tempid; } @Override public int compare(int i, int j) { // Get end of line int eol = skipToEOL(element_bytes, offsets[i]); line.set(element_bytes, offsets[i], eol - offsets[i] - 1); stockObject.fromText(line); double xi = (stockObject.getMBR().x1 + stockObject.getMBR().x2) / 2; eol = skipToEOL(element_bytes, offsets[j]); line.set(element_bytes, offsets[j], eol - offsets[j] - 1); stockObject.fromText(line); double xj = (stockObject.getMBR().x1 + stockObject.getMBR().x2) / 2; if (xi < xj) return -1; if (xi > xj) return 1; return 0; } }; sortableY = new IndexedSortable() { @Override public void swap(int i, int j) { // Swap id int tempid = offsets[i]; offsets[i] = offsets[j]; offsets[j] = tempid; } @Override public int compare(int i, int j) { int eol = skipToEOL(element_bytes, offsets[i]); line.set(element_bytes, offsets[i], eol - offsets[i] - 1); stockObject.fromText(line); double yi = (stockObject.getMBR().y1 + stockObject.getMBR().y2) / 2; eol = skipToEOL(element_bytes, offsets[j]); line.set(element_bytes, offsets[j], eol - offsets[j] - 1); stockObject.fromText(line); double yj = (stockObject.getMBR().y1 + stockObject.getMBR().y2) / 2; if (yi < yj) return -1; if (yi > yj) return 1; return 0; } }; } final IndexedSorter sorter = new QuickSort(); final IndexedSortable[] sortables = new IndexedSortable[2]; sortables[SplitStruct.DIRECTION_X] = sortableX; sortables[SplitStruct.DIRECTION_Y] = sortableY; sorter.sort(sortables[direction], index1, index2); // Partition into maxEntries partitions (equally) and // create a SplitStruct for each partition int i1 = index1; for (int iSplit = 0; iSplit < degree; iSplit++) { int i2 = index1 + (index2 - index1) * (iSplit + 1) / degree; SplitStruct newSplit = new SplitStruct(i1, i2, (byte) (1 - direction)); toBePartitioned.add(newSplit); i1 = i2; } } } // All nodes stored in level-order traversal Vector<SplitStruct> nodes = new Vector<SplitStruct>(); final Queue<SplitStruct> toBePartitioned = new LinkedList<SplitStruct>(); toBePartitioned.add(new SplitStruct(0, elementCount, SplitStruct.DIRECTION_X)); while (!toBePartitioned.isEmpty()) { SplitStruct split = toBePartitioned.poll(); if (nodes.size() < nonLeafNodeCount) { // This is a non-leaf split.partition(toBePartitioned); } nodes.add(split); } if (nodes.size() != nodeCount) { throw new RuntimeException( "Expected node count: " + nodeCount + ". Real node count: " + nodes.size()); } // Now we have our data sorted in the required order. Start building // the tree. // Store the offset of each leaf node in the tree FSDataOutputStream fakeOut = null; try { fakeOut = new FSDataOutputStream(new java.io.OutputStream() { // Null output stream @Override public void write(int b) throws IOException { // Do nothing } @Override public void write(byte[] b, int off, int len) throws IOException { // Do nothing } @Override public void write(byte[] b) throws IOException { // Do nothing } }, null, TreeHeaderSize + nodes.size() * NodeSize); for (int i_leaf = nonLeafNodeCount, i = 0; i_leaf < nodes.size(); i_leaf++) { nodes.elementAt(i_leaf).offsetOfFirstElement = (int) fakeOut.getPos(); if (i != nodes.elementAt(i_leaf).index1) throw new RuntimeException(); double x1, y1, x2, y2; // Initialize MBR to first object int eol = skipToEOL(element_bytes, offsets[i]); fakeOut.write(element_bytes, offsets[i], eol - offsets[i]); line.set(element_bytes, offsets[i], eol - offsets[i] - 1); stockObject.fromText(line); Rectangle mbr = stockObject.getMBR(); x1 = mbr.x1; y1 = mbr.y1; x2 = mbr.x2; y2 = mbr.y2; i++; while (i < nodes.elementAt(i_leaf).index2) { eol = skipToEOL(element_bytes, offsets[i]); fakeOut.write(element_bytes, offsets[i], eol - offsets[i]); line.set(element_bytes, offsets[i], eol - offsets[i] - 1); stockObject.fromText(line); mbr = stockObject.getMBR(); if (mbr.x1 < x1) x1 = mbr.x1; if (mbr.y1 < y1) y1 = mbr.y1; if (mbr.x2 > x2) x2 = mbr.x2; if (mbr.y2 > y2) y2 = mbr.y2; i++; } nodes.elementAt(i_leaf).set(x1, y1, x2, y2); } } finally { if (fakeOut != null) fakeOut.close(); } // Calculate MBR and offsetOfFirstElement for non-leaves for (int i_node = nonLeafNodeCount - 1; i_node >= 0; i_node--) { int i_first_child = i_node * degree + 1; nodes.elementAt(i_node).offsetOfFirstElement = nodes.elementAt(i_first_child).offsetOfFirstElement; int i_child = 0; Rectangle mbr; mbr = nodes.elementAt(i_first_child + i_child); double x1 = mbr.x1; double y1 = mbr.y1; double x2 = mbr.x2; double y2 = mbr.y2; i_child++; while (i_child < degree) { mbr = nodes.elementAt(i_first_child + i_child); if (mbr.x1 < x1) x1 = mbr.x1; if (mbr.y1 < y1) y1 = mbr.y1; if (mbr.x2 > x2) x2 = mbr.x2; if (mbr.y2 > y2) y2 = mbr.y2; i_child++; } nodes.elementAt(i_node).set(x1, y1, x2, y2); } // Start writing the tree // write tree header (including size) // Total tree size. (== Total bytes written - 8 bytes for the size itself) dataOut.writeInt(TreeHeaderSize + NodeSize * nodeCount + len); // Tree height dataOut.writeInt(height); // Degree dataOut.writeInt(degree); dataOut.writeInt(elementCount); // write nodes for (SplitStruct node : nodes) { node.write(dataOut); } // write elements for (int element_i = 0; element_i < elementCount; element_i++) { int eol = skipToEOL(element_bytes, offsets[element_i]); dataOut.write(element_bytes, offsets[element_i], eol - offsets[element_i]); } } catch (IOException e) { e.printStackTrace(); } }
From source file:FormatStorage.BlockIndex.java
License:Open Source License
public OffsetInfo persistent(FSDataOutputStream out) throws IOException { OffsetInfo offsetInfo = new OffsetInfo(); if (keyIndexInfos.size() != 0) { offsetInfo.keyIndexOffset = out.getPos(); persistentKeyIndex(out);//from w w w . j a v a 2 s. c o m } if (lineIndexInfos.size() != 0) { offsetInfo.lineIndexOffset = out.getPos(); persistentLineIndex(out); } lineIndexInfos.clear(); lineIndexInfos = null; keyIndexInfos.clear(); keyIndexInfos = null; return offsetInfo; }
From source file:ml.shifu.guagua.yarn.GuaguaSplitWriter.java
License:Apache License
@SuppressWarnings("unchecked") private static <T extends InputSplit> SplitMetaInfo[] writeNewSplits(Configuration conf, T[] array, FSDataOutputStream out) throws IOException, InterruptedException { SplitMetaInfo[] info = new SplitMetaInfo[array.length]; if (array.length != 0) { SerializationFactory factory = new SerializationFactory(conf); int i = 0; long offset = out.getPos(); for (T split : array) { long prevCount = out.getPos(); Text.writeString(out, split.getClass().getName()); Serializer<T> serializer = factory.getSerializer((Class<T>) split.getClass()); serializer.open(out);//from w w w. j av a 2 s . c o m serializer.serialize(split); long currCount = out.getPos(); String[] locations = split.getLocations(); final int max_loc = conf.getInt(MAX_SPLIT_LOCATIONS, 10); if (locations.length > max_loc) { LOG.warn("Max block location exceeded for split: " + split + " splitsize: " + locations.length + " maxsize: " + max_loc); locations = Arrays.copyOf(locations, max_loc); } info[i++] = new JobSplit.SplitMetaInfo(locations, offset, split.getLength()); offset += currCount - prevCount; } } return info; }
From source file:ml.shifu.guagua.yarn.GuaguaSplitWriter.java
License:Apache License
private static SplitMetaInfo[] writeOldSplits(org.apache.hadoop.mapred.InputSplit[] splits, FSDataOutputStream out, Configuration conf) throws IOException { SplitMetaInfo[] info = new SplitMetaInfo[splits.length]; if (splits.length != 0) { int i = 0; long offset = out.getPos(); for (org.apache.hadoop.mapred.InputSplit split : splits) { long prevLen = out.getPos(); Text.writeString(out, split.getClass().getName()); split.write(out);// w ww.ja v a2s .c o m long currLen = out.getPos(); String[] locations = split.getLocations(); final int max_loc = conf.getInt(MAX_SPLIT_LOCATIONS, 10); if (locations.length > max_loc) { LOG.warn("Max block location exceeded for split: " + split + " splitsize: " + locations.length + " maxsize: " + max_loc); locations = Arrays.copyOf(locations, max_loc); } info[i++] = new JobSplit.SplitMetaInfo(locations, offset, split.getLength()); offset += currLen - prevLen; } } return info; }
From source file:org.apache.blur.store.hdfs.HdfsDirectory.java
License:Apache License
@Override public IndexOutput createOutput(final String name, IOContext context) throws IOException { LOG.debug("createOutput [{0}] [{1}] [{2}]", name, context, getPath()); if (fileExists(name)) { deleteFile(name);/*from w ww.j a v a 2 s .c om*/ } if (_useCache) { _fileStatusMap.put(name, new FStat(System.currentTimeMillis(), 0L)); } final FSDataOutputStream outputStream = openForOutput(name); trackObject(outputStream, "Outputstream", name, _path); return new BufferedIndexOutput() { @Override public long length() throws IOException { return outputStream.getPos(); } @Override protected void flushBuffer(byte[] b, int offset, int len) throws IOException { long start = System.nanoTime(); outputStream.write(b, offset, len); long end = System.nanoTime(); _metricsGroup.writeAccess.update((end - start) / 1000); _metricsGroup.writeThroughput.mark(len); } @Override public void close() throws IOException { super.close(); long length = outputStream.getPos(); if (_useCache) { _fileStatusMap.put(name, new FStat(System.currentTimeMillis(), length)); } // This exists because HDFS is so slow to close files. There are // built-in sleeps during the close call. if (_asyncClosing && _useCache) { outputStream.sync(); CLOSING_QUEUE.add(outputStream); } else { outputStream.close(); } } @Override public void seek(long pos) throws IOException { throw new IOException("seeks not allowed on IndexOutputs."); } }; }
From source file:org.apache.giraph.graph.BspServiceWorker.java
License:Apache License
@Override public void storeCheckpoint() throws IOException { getContext()/*w ww . ja v a 2 s . c om*/ .setStatus("storeCheckpoint: Starting checkpoint " + getGraphMapper().getMapFunctions().toString() + " - Attempt=" + getApplicationAttempt() + ", Superstep=" + getSuperstep()); // Algorithm: // For each partition, dump vertices and messages Path metadataFilePath = new Path(getCheckpointBasePath(getSuperstep()) + "." + getHostnamePartitionId() + CHECKPOINT_METADATA_POSTFIX); Path verticesFilePath = new Path(getCheckpointBasePath(getSuperstep()) + "." + getHostnamePartitionId() + CHECKPOINT_VERTICES_POSTFIX); Path validFilePath = new Path( getCheckpointBasePath(getSuperstep()) + "." + getHostnamePartitionId() + CHECKPOINT_VALID_POSTFIX); // Remove these files if they already exist (shouldn't though, unless // of previous failure of this worker) if (getFs().delete(validFilePath, false)) { LOG.warn("storeCheckpoint: Removed valid file " + validFilePath); } if (getFs().delete(metadataFilePath, false)) { LOG.warn("storeCheckpoint: Removed metadata file " + metadataFilePath); } if (getFs().delete(verticesFilePath, false)) { LOG.warn("storeCheckpoint: Removed file " + verticesFilePath); } FSDataOutputStream verticesOutputStream = getFs().create(verticesFilePath); ByteArrayOutputStream metadataByteStream = new ByteArrayOutputStream(); DataOutput metadataOutput = new DataOutputStream(metadataByteStream); for (Partition<I, V, E, M> partition : workerPartitionMap.values()) { long startPos = verticesOutputStream.getPos(); partition.write(verticesOutputStream); // Write the metadata for this partition // Format: // <index count> // <index 0 start pos><partition id> // <index 1 start pos><partition id> metadataOutput.writeLong(startPos); metadataOutput.writeInt(partition.getPartitionId()); if (LOG.isDebugEnabled()) { LOG.debug("storeCheckpoint: Vertex file starting " + "offset = " + startPos + ", length = " + (verticesOutputStream.getPos() - startPos) + ", partition = " + partition.toString()); } } // Metadata is buffered and written at the end since it's small and // needs to know how many partitions this worker owns FSDataOutputStream metadataOutputStream = getFs().create(metadataFilePath); metadataOutputStream.writeInt(workerPartitionMap.size()); metadataOutputStream.write(metadataByteStream.toByteArray()); metadataOutputStream.close(); verticesOutputStream.close(); if (LOG.isInfoEnabled()) { LOG.info("storeCheckpoint: Finished metadata (" + metadataFilePath + ") and vertices (" + verticesFilePath + ")."); } getFs().createNewFile(validFilePath); }
From source file:org.apache.giraph.worker.BspServiceWorkerForCohesiveSubgraph.java
License:Apache License
@Override public void storeCheckpoint() throws IOException { LoggerUtils.setStatusAndLog(getContext(), LOG, Level.INFO, "storeCheckpoint: Starting checkpoint " + getGraphTaskManager().getGraphFunctions().toString() + " - Attempt=" + getApplicationAttempt() + ", Superstep=" + getSuperstep()); // Algorithm: // For each partition, dump vertices and messages Path metadataFilePath = new Path(getCheckpointBasePath(getSuperstep()) + "." + getHostnamePartitionId() + CHECKPOINT_METADATA_POSTFIX); Path verticesFilePath = new Path(getCheckpointBasePath(getSuperstep()) + "." + getHostnamePartitionId() + CHECKPOINT_VERTICES_POSTFIX); Path validFilePath = new Path( getCheckpointBasePath(getSuperstep()) + "." + getHostnamePartitionId() + CHECKPOINT_VALID_POSTFIX); // Remove these files if they already exist (shouldn't though, unless // of previous failure of this worker) if (getFs().delete(validFilePath, false)) { LOG.warn("storeCheckpoint: Removed valid file " + validFilePath); }//ww w .java 2 s .c o m if (getFs().delete(metadataFilePath, false)) { LOG.warn("storeCheckpoint: Removed metadata file " + metadataFilePath); } if (getFs().delete(verticesFilePath, false)) { LOG.warn("storeCheckpoint: Removed file " + verticesFilePath); } FSDataOutputStream verticesOutputStream = getFs().create(verticesFilePath); ByteArrayOutputStream metadataByteStream = new ByteArrayOutputStream(); DataOutput metadataOutput = new DataOutputStream(metadataByteStream); for (Integer partitionId : getPartitionStore().getPartitionIds()) { Partition<I, V, E, M> partition = getPartitionStore().getPartition(partitionId); long startPos = verticesOutputStream.getPos(); partition.write(verticesOutputStream); // write messages getServerData().getCurrentMessageStore().writePartition(verticesOutputStream, partition.getId()); // Write the metadata for this partition // Format: // <index count> // <index 0 start pos><partition id> // <index 1 start pos><partition id> metadataOutput.writeLong(startPos); metadataOutput.writeInt(partition.getId()); if (LOG.isDebugEnabled()) { LOG.debug("storeCheckpoint: Vertex file starting " + "offset = " + startPos + ", length = " + (verticesOutputStream.getPos() - startPos) + ", partition = " + partition.toString()); } getPartitionStore().putPartition(partition); getContext().progress(); } // Metadata is buffered and written at the end since it's small and // needs to know how many partitions this worker owns FSDataOutputStream metadataOutputStream = getFs().create(metadataFilePath); metadataOutputStream.writeInt(getPartitionStore().getNumPartitions()); metadataOutputStream.write(metadataByteStream.toByteArray()); metadataOutputStream.close(); verticesOutputStream.close(); if (LOG.isInfoEnabled()) { LOG.info("storeCheckpoint: Finished metadata (" + metadataFilePath + ") and vertices (" + verticesFilePath + ")."); } getFs().createNewFile(validFilePath); // Notify master that checkpoint is stored String workerWroteCheckpoint = getWorkerWroteCheckpointPath(getApplicationAttempt(), getSuperstep()) + "/" + getHostnamePartitionId(); try { getZkExt().createExt(workerWroteCheckpoint, new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT, true); } catch (KeeperException.NodeExistsException e) { LOG.warn("storeCheckpoint: wrote checkpoint worker path " + workerWroteCheckpoint + " already exists!"); } catch (KeeperException e) { throw new IllegalStateException("Creating " + workerWroteCheckpoint + " failed with KeeperException", e); } catch (InterruptedException e) { throw new IllegalStateException( "Creating " + workerWroteCheckpoint + " failed with InterruptedException", e); } }
From source file:org.apache.hama.graph.DiskVerticesInfo.java
License:Apache License
/** * Serializes the vertex's soft parts to its file. If the vertex does not have * an index yet (e.G. at startup) you can provide -1 and it will be added to * the temporary storage.//w w w. j ava 2s . co m */ private void serializeSoft(Vertex<V, E, M> vertex, int index, long[] softValueOffsets, FSDataOutputStream softGraphParts) throws IOException { // safe offset write the soft parts if (index >= 0) { softValueOffsets[index] = softGraphParts.getPos(); // only set the bitset if we've finished the setup activeVertices.set(index, vertex.isHalted()); } else { tmpSoftOffsets.add(softGraphParts.getPos()); } if (vertex.getValue() == null) { softGraphParts.write(NULL); } else { softGraphParts.write(NOT_NULL); vertex.getValue().write(softGraphParts); } vertex.writeState(softGraphParts); softGraphParts.writeInt(vertex.getEdges().size()); for (Edge<?, ?> e : vertex.getEdges()) { if (e.getValue() == null) { softGraphParts.write(NULL); } else { softGraphParts.write(NOT_NULL); e.getValue().write(softGraphParts); } } }
From source file:org.apache.parquet.hadoop.ParquetFileWriter.java
License:Apache License
private static void serializeFooter(ParquetMetadata footer, FSDataOutputStream out) throws IOException { long footerIndex = out.getPos(); org.apache.parquet.format.FileMetaData parquetMetadata = metadataConverter .toParquetMetadata(CURRENT_VERSION, footer); writeFileMetaData(parquetMetadata, out); if (DEBUG)//from ww w. j a v a 2s . c o m LOG.debug(out.getPos() + ": footer length = " + (out.getPos() - footerIndex)); BytesUtils.writeIntLittleEndian(out, (int) (out.getPos() - footerIndex)); out.write(MAGIC); }