Example usage for org.apache.spark.sql.catalyst.expressions UnsafeRow UnsafeRow

List of usage examples for org.apache.spark.sql.catalyst.expressions UnsafeRow UnsafeRow

Introduction

In this page you can find the example usage for org.apache.spark.sql.catalyst.expressions UnsafeRow UnsafeRow.

Prototype

public UnsafeRow() 

Source Link

Usage

From source file:edu.ucla.cs.wis.bigdatalog.spark.storage.map.UnsafeFixedWidthMonotonicAggregationMap.java

License:Apache License

/**
 * Returns an iterator over the keys and values in this map. This uses destructive iterator of
 * BytesToBytesMap. So it is illegal to call any other method on this map after `iterator()` has
 * been called.//from  ww w.  jav  a  2 s  .  co m
 * <p>
 * For efficiency, each call returns the same object.
 */
public KVIterator<UnsafeRow, UnsafeRow> iterator() {
    return new KVIterator<UnsafeRow, UnsafeRow>() {

        private final edu.ucla.cs.wis.bigdatalog.spark.storage.map.BytesToBytesMap.MapIterator mapLocationIterator = map
                .iterator();
        private final UnsafeRow key = new UnsafeRow();
        private final UnsafeRow value = new UnsafeRow();

        @Override
        public boolean next() {
            if (mapLocationIterator.hasNext()) {
                final edu.ucla.cs.wis.bigdatalog.spark.storage.map.BytesToBytesMap.Location loc = mapLocationIterator
                        .next();
                final MemoryLocation keyAddress = loc.getKeyAddress();
                final MemoryLocation valueAddress = loc.getValueAddress();
                key.pointTo(keyAddress.getBaseObject(), keyAddress.getBaseOffset(), groupingKeySchema.length(),
                        loc.getKeyLength());
                value.pointTo(valueAddress.getBaseObject(), valueAddress.getBaseOffset(),
                        aggregationBufferSchema.length(), loc.getValueLength());
                return true;
            } else {
                return false;
            }
        }

        @Override
        public UnsafeRow getKey() {
            return key;
        }

        @Override
        public UnsafeRow getValue() {
            return value;
        }

        @Override
        public void close() {
            // Do nothing.
        }
    };
}

From source file:edu.ucla.cs.wis.bigdatalog.spark.storage.map.UnsafeFixedWidthMonotonicAggregationMap.java

License:Apache License

@SuppressWarnings("UseOfSystemOutOrSystemErr")
/*public void printPerfMetrics() {
if (!enablePerfMetrics) {//from  w  w w.j a va2 s . c  o m
    throw new IllegalStateException("Perf metrics not enabled");
}
System.out.println("Average probes per lookup: " + map.getAverageProbesPerLookup());
System.out.println("Number of hash collisions: " + map.getNumHashCollisions());
System.out.println("Time spent resizing (ns): " + map.getTimeSpentResizingNs());
System.out.println("Total memory consumption (bytes): " + map.getTotalMemoryConsumption());
}*/

public void readExternal(ObjectInput in) throws java.io.IOException {
    long start = System.currentTimeMillis();
    int nKeys = in.readInt();
    int initialCapacity = nKeys;
    if (initialCapacity == 0)
        initialCapacity = 1024 * 16;

    long pageSizeBytes;
    if (SparkEnv.get() != null)
        pageSizeBytes = SparkEnv.get().memoryManager().pageSizeBytes();
    else
        pageSizeBytes = new SparkConf().getSizeAsBytes("spark.buffer.pageSize", "16m");

    enablePerfMetrics = (in.readInt() == 1);

    int serializedSize = in.readInt();
    byte[] bytes = new byte[serializedSize];
    in.readFully(bytes);
    aggregationBufferSchema = (StructType) SparkSqlSerializer.deserialize(bytes,
            ClassTag$.MODULE$.apply(StructType.class));

    serializedSize = in.readInt();
    bytes = new byte[serializedSize];
    in.readFully(bytes);
    groupingKeySchema = (StructType) SparkSqlSerializer.deserialize(bytes,
            ClassTag$.MODULE$.apply(StructType.class));

    groupingKeyProjection = UnsafeProjection.create(groupingKeySchema);

    currentAggregationBuffer = new UnsafeRow();

    map = new edu.ucla.cs.wis.bigdatalog.spark.storage.map.BytesToBytesMap(initialCapacity, pageSizeBytes,
            enablePerfMetrics);

    int i = 0;
    byte[] keyBuffer = new byte[1024];
    byte[] valuesBuffer = new byte[1024];
    while (i < nKeys) {
        int keySize = in.readInt();
        int valuesSize = in.readInt();
        if (keySize > keyBuffer.length)
            keyBuffer = new byte[keySize];

        in.readFully(keyBuffer, 0, keySize);
        if (valuesSize > valuesBuffer.length)
            valuesBuffer = new byte[valuesSize];

        in.readFully(valuesBuffer, 0, valuesSize);

        // put it into binary map
        BytesToBytesMap.Location loc = map.lookup(keyBuffer, Platform.BYTE_ARRAY_OFFSET, keySize);

        assert (!loc.isDefined()) : "Duplicated key found!";

        boolean putSucceeded = loc.putNewKey(keyBuffer, Platform.BYTE_ARRAY_OFFSET, keySize, valuesBuffer,
                Platform.BYTE_ARRAY_OFFSET, valuesSize);

        if (!putSucceeded)
            throw new IOException("Could not allocate memory to deserialize BytesToBytesMap");

        i += 1;
    }

    //System.out.println("readExternal took " + (System.currentTimeMillis() - start) + " ms");
}

From source file:edu.ucla.cs.wis.bigdatalog.spark.storage.set.hashset.UnsafeFixedWidthSet.java

License:Apache License

/**
 * Return the aggregation buffer for the current group. For efficiency, all calls to this method
 * return the same object. If additional memory could not be allocated, then this method will
 * signal an error by returning null./*from  ww  w  .  j  ava 2  s  .  co  m*/
 */
/*public UnsafeRow getAggregationBuffer(InternalRow groupingKey) {
final UnsafeRow unsafeGroupingKeyRow = this.groupingKeyProjection.apply(groupingKey);
        
return getAggregationBufferFromUnsafeRow(unsafeGroupingKeyRow);
}*/

/*public UnsafeRow getAggregationBufferFromUnsafeRow(UnsafeRow unsafeGroupingKeyRow) {
// Probe our set using the serialized key
final edu.ucla.cs.wis.bigdatalog.spark.storage.set.hashset.BytesSet.Location loc = set.lookup(
        unsafeGroupingKeyRow.getBaseObject(),
        unsafeGroupingKeyRow.getBaseOffset(),
        unsafeGroupingKeyRow.getSizeInBytes());
if (!loc.isDefined()) {
    // This is the first time that we've seen this grouping key, so we'll insert a copy of the
    // empty aggregation buffer into the set:
    boolean putSucceeded = loc.putNewKey(
            unsafeGroupingKeyRow.getBaseObject(),
            unsafeGroupingKeyRow.getBaseOffset(),
            unsafeGroupingKeyRow.getSizeInBytes()//,
            //emptyAggregationBuffer,
            //Platform.BYTE_ARRAY_OFFSET,
            //emptyAggregationBuffer.length
    );
    if (!putSucceeded) {
        return null;
    }
}
        
// Reset the pointer to point to the value that we just stored or looked up:
final MemoryLocation address = loc.getValueAddress();
currentAggregationBuffer.pointTo(
        address.getBaseObject(),
        address.getBaseOffset(),
        aggregationBufferSchema.length(),
        loc.getValueLength()
);
return currentAggregationBuffer;
}/*
        
/**
 * Returns an iterator over the keys and values in this set. This uses destructive iterator of
 * BytesToBytesMap. So it is illegal to call any other method on this set after `iterator()` has
 * been called.
 *
 * For efficiency, each call returns the same object.
 */
public Iterator<InternalRow> iterator() {
    return new Iterator<InternalRow>() {

        private final edu.ucla.cs.wis.bigdatalog.spark.storage.set.hashset.BytesSet.SetIterator setLocationIterator = set
                .iterator();
        private final UnsafeRow key = new UnsafeRow();
        //private final UnsafeRow value = new UnsafeRow();

        @Override
        public boolean hasNext() {
            return setLocationIterator.hasNext();
        }
        /*    if (setLocationIterator.hasNext()) {
            final edu.ucla.cs.wis.bigdatalog.spark.storage.set.hashset.BytesSet.Location loc = setLocationIterator.next();
            final MemoryLocation keyAddress = loc.getKeyAddress();
            //final MemoryLocation valueAddress = loc.getValueAddress();
            key.pointTo(
                    keyAddress.getBaseObject(),
                    keyAddress.getBaseOffset(),
                    groupingKeySchema.length(),
                    loc.getKeyLength()
            );
            System.out.println("next: " + key.toString());
            /*value.pointTo(
                    valueAddress.getBaseObject(),
                    valueAddress.getBaseOffset(),
                    aggregationBufferSchema.length(),
                    loc.getValueLength()
            );*
            return true;
        } else {
            return false;
        }
        }*/

        @Override
        public UnsafeRow next() {
            final edu.ucla.cs.wis.bigdatalog.spark.storage.set.hashset.BytesSet.Location loc = setLocationIterator
                    .next();
            final MemoryLocation keyAddress = loc.getKeyAddress();
            key.pointTo(keyAddress.getBaseObject(), keyAddress.getBaseOffset(), groupingKeySchema.length(),
                    loc.getKeyLength());

            return key;
        }

        /*@Override
        public UnsafeRow getValue() {
        return value;
        }*/

        @Override
        public void remove() {
            setLocationIterator.remove();
        }

        /*@Override
        public void close() {
        // Do nothing.
        }*/
    };
}