Example usage for org.apache.commons.collections.buffer PriorityBuffer iterator

List of usage examples for org.apache.commons.collections.buffer PriorityBuffer iterator

Introduction

In this page you can find the example usage for org.apache.commons.collections.buffer PriorityBuffer iterator.

Prototype

public Iterator iterator() 

Source Link

Document

Returns an iterator over this heap's elements.

Usage

From source file:co.cask.hydrator.plugin.batch.aggreagtor.aggregator.Sampling.java

@Override
public void aggregate(String groupKey, Iterator<StructuredRecord> iterator, Emitter<StructuredRecord> emitter)
        throws Exception {
    int finalSampleSize = 0;
    if (config.sampleSize != null) {
        finalSampleSize = config.sampleSize;
    }// w  w w  .  j  av  a 2  s . co m
    if (config.samplePercentage != null) {
        finalSampleSize = Math.round((config.samplePercentage / 100) * config.totalRecords);
    }

    switch (TYPE.valueOf(config.samplingType.toUpperCase())) {
    case SYSTEMATIC:
        if (config.overSamplingPercentage != null) {
            finalSampleSize = Math
                    .round(finalSampleSize + (finalSampleSize * (config.overSamplingPercentage / 100)));
        }

        int sampleIndex = Math.round(config.totalRecords / finalSampleSize);
        Float random = new Float(0);
        if (config.random != null) {
            random = config.random;
        } else {
            random = new Random().nextFloat();
        }
        int firstSampleIndex = Math.round(sampleIndex * random);
        List<StructuredRecord> records = IteratorUtils.toList(iterator);
        int counter = 0;
        emitter.emit(records.get(firstSampleIndex));
        counter++;

        while (counter < finalSampleSize) {
            int index = firstSampleIndex + (counter * sampleIndex);
            emitter.emit(records.get(index - 1));
            counter++;
        }
        break;

    case RESERVOIR:
        PriorityBuffer sampleData = new PriorityBuffer(true, new Comparator<StructuredRecord>() {
            @Override
            public int compare(StructuredRecord o1, StructuredRecord o2) {
                if ((float) o1.get("random") < (float) o2.get("random")) {
                    return 1;
                } else if ((float) o1.get("random") > (float) o2.get("random")) {
                    return -1;
                } else {
                    return 0;
                }
            }
        });

        int count = 0;
        Random randomValue = new Random();
        List<StructuredRecord> recordArray = IteratorUtils.toList(iterator);
        Schema inputSchema = recordArray.get(0).getSchema();
        Schema schemaWithRandomField = createSchemaWithRandomField(inputSchema);
        while (count < finalSampleSize) {
            StructuredRecord record = recordArray.get(0);
            sampleData.add(getSampledRecord(record, randomValue.nextFloat(), schemaWithRandomField));
            count++;
        }

        while (count < recordArray.size()) {
            StructuredRecord structuredRecord = (StructuredRecord) sampleData.get();
            Float randomFloat = randomValue.nextFloat();
            if ((float) structuredRecord.get("random") < randomFloat) {
                sampleData.remove();
                StructuredRecord record = recordArray.get(count);
                sampleData.add(getSampledRecord(record, randomFloat, structuredRecord.getSchema()));
            }
            count++;
        }

        Iterator<StructuredRecord> sampleDataIterator = sampleData.iterator();
        while (sampleDataIterator.hasNext()) {
            StructuredRecord sampledRecord = sampleDataIterator.next();
            StructuredRecord.Builder builder = StructuredRecord.builder(inputSchema);
            for (Schema.Field field : sampledRecord.getSchema().getFields()) {
                if (!field.getName().equalsIgnoreCase("random")) {
                    builder.set(field.getName(), sampledRecord.get(field.getName()));
                }
            }
            emitter.emit(builder.build());
        }
        break;
    }
}