List of usage examples for org.apache.spark.sql.catalyst.expressions UnsafeRow UnsafeRow
public UnsafeRow()
From source file:edu.ucla.cs.wis.bigdatalog.spark.storage.map.UnsafeFixedWidthMonotonicAggregationMap.java
License:Apache License
/** * Returns an iterator over the keys and values in this map. This uses destructive iterator of * BytesToBytesMap. So it is illegal to call any other method on this map after `iterator()` has * been called.//from ww w. jav a 2 s . co m * <p> * For efficiency, each call returns the same object. */ public KVIterator<UnsafeRow, UnsafeRow> iterator() { return new KVIterator<UnsafeRow, UnsafeRow>() { private final edu.ucla.cs.wis.bigdatalog.spark.storage.map.BytesToBytesMap.MapIterator mapLocationIterator = map .iterator(); private final UnsafeRow key = new UnsafeRow(); private final UnsafeRow value = new UnsafeRow(); @Override public boolean next() { if (mapLocationIterator.hasNext()) { final edu.ucla.cs.wis.bigdatalog.spark.storage.map.BytesToBytesMap.Location loc = mapLocationIterator .next(); final MemoryLocation keyAddress = loc.getKeyAddress(); final MemoryLocation valueAddress = loc.getValueAddress(); key.pointTo(keyAddress.getBaseObject(), keyAddress.getBaseOffset(), groupingKeySchema.length(), loc.getKeyLength()); value.pointTo(valueAddress.getBaseObject(), valueAddress.getBaseOffset(), aggregationBufferSchema.length(), loc.getValueLength()); return true; } else { return false; } } @Override public UnsafeRow getKey() { return key; } @Override public UnsafeRow getValue() { return value; } @Override public void close() { // Do nothing. } }; }
From source file:edu.ucla.cs.wis.bigdatalog.spark.storage.map.UnsafeFixedWidthMonotonicAggregationMap.java
License:Apache License
@SuppressWarnings("UseOfSystemOutOrSystemErr") /*public void printPerfMetrics() { if (!enablePerfMetrics) {//from w w w.j a va2 s . c o m throw new IllegalStateException("Perf metrics not enabled"); } System.out.println("Average probes per lookup: " + map.getAverageProbesPerLookup()); System.out.println("Number of hash collisions: " + map.getNumHashCollisions()); System.out.println("Time spent resizing (ns): " + map.getTimeSpentResizingNs()); System.out.println("Total memory consumption (bytes): " + map.getTotalMemoryConsumption()); }*/ public void readExternal(ObjectInput in) throws java.io.IOException { long start = System.currentTimeMillis(); int nKeys = in.readInt(); int initialCapacity = nKeys; if (initialCapacity == 0) initialCapacity = 1024 * 16; long pageSizeBytes; if (SparkEnv.get() != null) pageSizeBytes = SparkEnv.get().memoryManager().pageSizeBytes(); else pageSizeBytes = new SparkConf().getSizeAsBytes("spark.buffer.pageSize", "16m"); enablePerfMetrics = (in.readInt() == 1); int serializedSize = in.readInt(); byte[] bytes = new byte[serializedSize]; in.readFully(bytes); aggregationBufferSchema = (StructType) SparkSqlSerializer.deserialize(bytes, ClassTag$.MODULE$.apply(StructType.class)); serializedSize = in.readInt(); bytes = new byte[serializedSize]; in.readFully(bytes); groupingKeySchema = (StructType) SparkSqlSerializer.deserialize(bytes, ClassTag$.MODULE$.apply(StructType.class)); groupingKeyProjection = UnsafeProjection.create(groupingKeySchema); currentAggregationBuffer = new UnsafeRow(); map = new edu.ucla.cs.wis.bigdatalog.spark.storage.map.BytesToBytesMap(initialCapacity, pageSizeBytes, enablePerfMetrics); int i = 0; byte[] keyBuffer = new byte[1024]; byte[] valuesBuffer = new byte[1024]; while (i < nKeys) { int keySize = in.readInt(); int valuesSize = in.readInt(); if (keySize > keyBuffer.length) keyBuffer = new byte[keySize]; in.readFully(keyBuffer, 0, keySize); if (valuesSize > valuesBuffer.length) valuesBuffer = new byte[valuesSize]; in.readFully(valuesBuffer, 0, valuesSize); // put it into binary map BytesToBytesMap.Location loc = map.lookup(keyBuffer, Platform.BYTE_ARRAY_OFFSET, keySize); assert (!loc.isDefined()) : "Duplicated key found!"; boolean putSucceeded = loc.putNewKey(keyBuffer, Platform.BYTE_ARRAY_OFFSET, keySize, valuesBuffer, Platform.BYTE_ARRAY_OFFSET, valuesSize); if (!putSucceeded) throw new IOException("Could not allocate memory to deserialize BytesToBytesMap"); i += 1; } //System.out.println("readExternal took " + (System.currentTimeMillis() - start) + " ms"); }
From source file:edu.ucla.cs.wis.bigdatalog.spark.storage.set.hashset.UnsafeFixedWidthSet.java
License:Apache License
/** * Return the aggregation buffer for the current group. For efficiency, all calls to this method * return the same object. If additional memory could not be allocated, then this method will * signal an error by returning null./*from ww w . j ava 2 s . co m*/ */ /*public UnsafeRow getAggregationBuffer(InternalRow groupingKey) { final UnsafeRow unsafeGroupingKeyRow = this.groupingKeyProjection.apply(groupingKey); return getAggregationBufferFromUnsafeRow(unsafeGroupingKeyRow); }*/ /*public UnsafeRow getAggregationBufferFromUnsafeRow(UnsafeRow unsafeGroupingKeyRow) { // Probe our set using the serialized key final edu.ucla.cs.wis.bigdatalog.spark.storage.set.hashset.BytesSet.Location loc = set.lookup( unsafeGroupingKeyRow.getBaseObject(), unsafeGroupingKeyRow.getBaseOffset(), unsafeGroupingKeyRow.getSizeInBytes()); if (!loc.isDefined()) { // This is the first time that we've seen this grouping key, so we'll insert a copy of the // empty aggregation buffer into the set: boolean putSucceeded = loc.putNewKey( unsafeGroupingKeyRow.getBaseObject(), unsafeGroupingKeyRow.getBaseOffset(), unsafeGroupingKeyRow.getSizeInBytes()//, //emptyAggregationBuffer, //Platform.BYTE_ARRAY_OFFSET, //emptyAggregationBuffer.length ); if (!putSucceeded) { return null; } } // Reset the pointer to point to the value that we just stored or looked up: final MemoryLocation address = loc.getValueAddress(); currentAggregationBuffer.pointTo( address.getBaseObject(), address.getBaseOffset(), aggregationBufferSchema.length(), loc.getValueLength() ); return currentAggregationBuffer; }/* /** * Returns an iterator over the keys and values in this set. This uses destructive iterator of * BytesToBytesMap. So it is illegal to call any other method on this set after `iterator()` has * been called. * * For efficiency, each call returns the same object. */ public Iterator<InternalRow> iterator() { return new Iterator<InternalRow>() { private final edu.ucla.cs.wis.bigdatalog.spark.storage.set.hashset.BytesSet.SetIterator setLocationIterator = set .iterator(); private final UnsafeRow key = new UnsafeRow(); //private final UnsafeRow value = new UnsafeRow(); @Override public boolean hasNext() { return setLocationIterator.hasNext(); } /* if (setLocationIterator.hasNext()) { final edu.ucla.cs.wis.bigdatalog.spark.storage.set.hashset.BytesSet.Location loc = setLocationIterator.next(); final MemoryLocation keyAddress = loc.getKeyAddress(); //final MemoryLocation valueAddress = loc.getValueAddress(); key.pointTo( keyAddress.getBaseObject(), keyAddress.getBaseOffset(), groupingKeySchema.length(), loc.getKeyLength() ); System.out.println("next: " + key.toString()); /*value.pointTo( valueAddress.getBaseObject(), valueAddress.getBaseOffset(), aggregationBufferSchema.length(), loc.getValueLength() );* return true; } else { return false; } }*/ @Override public UnsafeRow next() { final edu.ucla.cs.wis.bigdatalog.spark.storage.set.hashset.BytesSet.Location loc = setLocationIterator .next(); final MemoryLocation keyAddress = loc.getKeyAddress(); key.pointTo(keyAddress.getBaseObject(), keyAddress.getBaseOffset(), groupingKeySchema.length(), loc.getKeyLength()); return key; } /*@Override public UnsafeRow getValue() { return value; }*/ @Override public void remove() { setLocationIterator.remove(); } /*@Override public void close() { // Do nothing. }*/ }; }