Example usage for org.apache.commons.collections.buffer PriorityBuffer PriorityBuffer

Introduction

In this page you can find the example usage for org.apache.commons.collections.buffer PriorityBuffer PriorityBuffer.

Prototype

public PriorityBuffer(int capacity, boolean ascendingOrder)

Source Link

Document

Constructs a new empty buffer that specifying initial capacity and sort order, using the natural order of the objects added.

Usage

From source file:net.sourceforge.eclipseccase.StateCacheJobQueue.java

/**
 * Creates a new instance./*from   w ww. ja  va 2s.com*/
 * 
 * @param name
 */
StateCacheJobQueue() {
    super(MESSAGE_QUEUE_NAME);

    // create underlying priority queue
    this.queue = new PriorityBuffer(400, false);

    // execute as system job if hidden
    setSystem(ClearCasePreferences.isHideRefreshActivity());

    // set priority for long running jobs
    setPriority(ClearCasePreferences.jobQueuePriority());

    // NOT: set the rule to the clearcase engine
    // NOT: setRule(ClearCasePlugin.RULE_CLEARCASE_REFRESH);
    // (if we have such a scheduling rule here, we can't do any
    // refreshLocal() calls from inside this thread. So use a null rule)
    setRule(null);
}

From source file:co.cask.hydrator.plugin.batch.aggreagtor.aggregator.Sampling.java

@Override
public void aggregate(String groupKey, Iterator<StructuredRecord> iterator, Emitter<StructuredRecord> emitter)
        throws Exception {
    int finalSampleSize = 0;
    if (config.sampleSize != null) {
        finalSampleSize = config.sampleSize;
    }/*from  w  w  w  .  ja v  a 2 s. c o m*/
    if (config.samplePercentage != null) {
        finalSampleSize = Math.round((config.samplePercentage / 100) * config.totalRecords);
    }

    switch (TYPE.valueOf(config.samplingType.toUpperCase())) {
    case SYSTEMATIC:
        if (config.overSamplingPercentage != null) {
            finalSampleSize = Math
                    .round(finalSampleSize + (finalSampleSize * (config.overSamplingPercentage / 100)));
        }

        int sampleIndex = Math.round(config.totalRecords / finalSampleSize);
        Float random = new Float(0);
        if (config.random != null) {
            random = config.random;
        } else {
            random = new Random().nextFloat();
        }
        int firstSampleIndex = Math.round(sampleIndex * random);
        List<StructuredRecord> records = IteratorUtils.toList(iterator);
        int counter = 0;
        emitter.emit(records.get(firstSampleIndex));
        counter++;

        while (counter < finalSampleSize) {
            int index = firstSampleIndex + (counter * sampleIndex);
            emitter.emit(records.get(index - 1));
            counter++;
        }
        break;

    case RESERVOIR:
        PriorityBuffer sampleData = new PriorityBuffer(true, new Comparator<StructuredRecord>() {
            @Override
            public int compare(StructuredRecord o1, StructuredRecord o2) {
                if ((float) o1.get("random") < (float) o2.get("random")) {
                    return 1;
                } else if ((float) o1.get("random") > (float) o2.get("random")) {
                    return -1;
                } else {
                    return 0;
                }
            }
        });

        int count = 0;
        Random randomValue = new Random();
        List<StructuredRecord> recordArray = IteratorUtils.toList(iterator);
        Schema inputSchema = recordArray.get(0).getSchema();
        Schema schemaWithRandomField = createSchemaWithRandomField(inputSchema);
        while (count < finalSampleSize) {
            StructuredRecord record = recordArray.get(0);
            sampleData.add(getSampledRecord(record, randomValue.nextFloat(), schemaWithRandomField));
            count++;
        }

        while (count < recordArray.size()) {
            StructuredRecord structuredRecord = (StructuredRecord) sampleData.get();
            Float randomFloat = randomValue.nextFloat();
            if ((float) structuredRecord.get("random") < randomFloat) {
                sampleData.remove();
                StructuredRecord record = recordArray.get(count);
                sampleData.add(getSampledRecord(record, randomFloat, structuredRecord.getSchema()));
            }
            count++;
        }

        Iterator<StructuredRecord> sampleDataIterator = sampleData.iterator();
        while (sampleDataIterator.hasNext()) {
            StructuredRecord sampledRecord = sampleDataIterator.next();
            StructuredRecord.Builder builder = StructuredRecord.builder(inputSchema);
            for (Schema.Field field : sampledRecord.getSchema().getFields()) {
                if (!field.getName().equalsIgnoreCase("random")) {
                    builder.set(field.getName(), sampledRecord.get(field.getName()));
                }
            }
            emitter.emit(builder.build());
        }
        break;
    }
}

From source file:uk.ac.ebi.orchem.search.SimilaritySearch.java

/**
 * Performs a similarity search between a query molecule and the orchem fingerprint table.
 *
 * @param queryFp fingerprint of the query molecule
 * @param _cutOff tanimoto score below which to stop searching
 * @param _topN top N results after which to stop searching
 * @param debugYN Y or N to debug output back
 * @param idsOnlyYN Y or N to indicate to just return IDs of results (faster)
 * @param extraWhereClause option to include an extra SQL where clause refering to the base compound table
 * @return array of {@link uk.ac.ebi.orchem.bean.OrChemCompound compounds}
 * @throws Exception//from   ww w . j  ava 2 s . com
 */
private static oracle.sql.ARRAY search(BitSet queryFp, Float _cutOff, Integer _topN, String debugYN,
        String idsOnlyYN, String extraWhereClause) throws Exception {

    /*
     * 
    The comment block below describes the search algorithm.
    From:
     "Bounds and Algorithms for Fast Exact Searches of Chemical Fingerprints in Linear and Sub-Linear Time"
      S.Joshua Swamidass and Pierre Baldi
      http://dx.doi.org/10.1021/ci600358f
            
     Top K Hits
     ----------
     We can search for the top K hits by starting from the maximum (where A=B), and exploring discrete possible
     values of B right and left of the maximum.
            
     More precisely, for binary fingerprints, we first
     index the molecules in the database by their fingerprint "bit count"
     to enable efficient referencing
     of a particular bit count bin.
            
     Next, with respect to a particular query, we calculate the bound
     on the similarity for every bit count in the database.
            
     Then we sort these bit counts by their associated bound and iterate over the
     molecules in the database, in order of decreasing bound.
            
     As we iterate, we calculate the similarity between the query and the database molecule and use
     a heap to efficiently track the top hits. The algorithm terminates when
     "the lowest similarity value in the heap is greater than the bound associated with the current database bin"
            
     Algorithm 1 Top K Search
     Require: database of fingerprints binned by bit count Bs
     Ensure: hits contains top K hits which satisfy SIMILARITY( ) > T
            
     1:  hits <- MINHEAP()
     2:  bounds <- LIST()
     3:  for all B in database do //iterate over bins
     4:    tuple <- TUPLE(BOUND(A,B),B)
     5:    LISTAPPEND(bounds, tuple)
     6:  end for
     7:  QUICKSORT(bounds) //NOTE: the length of bounds is constant
     8:  for all bound, B in bounds do //iterate in order of decreasing bound
     9:    if bound < T then
     10:      break //threshold stopping condition
     11:   end if
     12:   if K  HEAPSIZE(hits) and bound < MINSIMILARITY(hits) then
     13:     break //top-K stopping condition
     14:   end if
     15:   for all in database[B] do
     16:     S=SIMILARITY( )
     17:     tuple <- TUPLE(S, )
     18:     if S  T then
     19:        continue //ignore this and continue to next
     20:     else if LENGTH(hits)< K then
     21:        HEAPPUSH(hits, tuple)
     22:     else if S > MINSIMILARITY(hits) then
     23:       HEAPPOPMIN(hits)
     24:       HEAPPUSH(hits,tuple)
     25:     end if
     26:   end for
     27: end for
     28: return hits
     */

    boolean debugging = false;
    if (debugYN.toLowerCase().equals("y"))
        debugging = true;

    debug("started", debugging);

    /**********************************************************************
     * Similarity search algorithm section                                *
     *                                                                    *
     **********************************************************************/
    Comparator heapComparator = new SimHeapElementTanimComparator();
    PriorityBuffer heap = null;
    OracleConnection conn = null;
    PreparedStatement pstmtFp = null;
    PreparedStatement pstmLookup = null;

    String query = " select bit_count, id, fp from orchem_fingprint_simsearch s where  bit_count = ? ";

    float cutOff = _cutOff.floatValue();
    int topN = -1;
    if (_topN == null) {
        debug("No topN breakout specified.. searching until lower bound reached", debugging);
    } else {
        topN = _topN.intValue();
        debug("topN is " + topN + ", result set size limited.", debugging);
    }

    try {
        conn = (OracleConnection) new OracleDriver().defaultConnection();

        String compoundTableName = OrChemParameters.getParameterValue(OrChemParameters.COMPOUND_TABLE, conn);
        String compoundTablePkColumn = OrChemParameters.getParameterValue(OrChemParameters.COMPOUND_PK, conn);
        String compoundTableMolfileColumn = OrChemParameters.getParameterValue(OrChemParameters.COMPOUND_MOL,
                conn);

        if (extraWhereClause != null) {
            query = " select s.bit_count, s.id, s.fp from " + " orchem_fingprint_simsearch s , "
                    + compoundTableName + " c " + " where  s.bit_count = ? " + " and s.id = c."
                    + compoundTablePkColumn + " " + " and " + extraWhereClause;
            debug("QUERY is " + query, debugging);
        }

        float queryBitCount = queryFp.cardinality();
        byte[] queryBytes = Utils.toByteArray(queryFp, extFpSize);
        int queryByteArrLen = queryBytes.length;

        float lowBucketNum = queryBitCount - 1;
        float highBucketNum = queryBitCount + 1;
        float currBucketNum = queryBitCount;

        pstmtFp = conn.prepareStatement(query);
        pstmtFp.setFetchSize(250);

        ResultSet resFp = null;
        boolean done = false;
        byte[] dbByteArray = null;
        float tanimotoCoeff = 0f;
        heap = new PriorityBuffer(true, heapComparator);
        int bucksSearched = 0;
        int loopCount = 0;

        while (!done) {
            debug("bucket is " + currBucketNum, debugging);
            loopCount++;
            pstmtFp.setFloat(1, currBucketNum);
            bucksSearched++;
            resFp = pstmtFp.executeQuery();

            float bound = 0f;
            if (currBucketNum < queryBitCount)
                bound = currBucketNum / queryBitCount;
            else
                bound = queryBitCount / currBucketNum;

            /* Algorithm step 9..11
               Here we can break out because the tanimoto score is becoming to low */
            if (bound < cutOff) {
                debug("bound < cutOff, done", debugging);
                done = true;
            }

            if (!done) {
                //Algorithm 15-26
                while (resFp.next()) {
                    dbByteArray = resFp.getBytes("fp");
                    tanimotoCoeff = calcTanimoto(queryBytes, queryByteArrLen, dbByteArray, queryBitCount,
                            currBucketNum);

                    if (tanimotoCoeff >= cutOff) {
                        SimHeapElement elm = new SimHeapElement();
                        elm.setID(resFp.getString("id"));
                        elm.setTanimotoCoeff(new Float(tanimotoCoeff));

                        if (heap.size() < topN || topN == -1) {
                            heap.add(elm);
                            debug("add elem " + elm.getID(), debugging);

                        } else if (tanimotoCoeff > ((SimHeapElement) (heap.get())).getTanimotoCoeff()
                                .floatValue()) {
                            heap.remove();
                            heap.add(elm);
                            debug("remove + add elem " + elm.getID(), debugging);
                        }
                    }
                }
                resFp.close();
                /* Algorithm 12-14:
                 * When top N hits is reached, and the lowest score of the
                 * hits is greater than the current bucket bound, stop.
                 * If not, the next bucket may contain a better score, so go on.
                 */

                if (topN != -1 && heap.size() >= topN
                        && ((SimHeapElement) (heap.get())).getTanimotoCoeff().floatValue() > bound) {
                    done = true;
                    debug("topN reached, done", debugging);

                } else {
                    // calculate new currBucket
                    float up = queryBitCount / highBucketNum;
                    float down = lowBucketNum / queryBitCount;

                    if (up > down) {
                        currBucketNum = highBucketNum;
                        highBucketNum++;
                    } else {
                        currBucketNum = lowBucketNum;
                        lowBucketNum--;
                    }

                    if (lowBucketNum < 1 && highBucketNum > extFpSize)
                        done = true;
                }
            }
        }
        debug("searched bit_count buckets: " + loopCount, debugging);

        /********************************************************************
         * Search completed.                                                *
         *                                                                  *
         * Next section is just looking up the compounds by ID and          *
         * returning the results, sorted by Tanimoto coefficient            *
         *                                                                  *
         *******************************************************************/
        String lookupCompoundQuery = " select " + compoundTableMolfileColumn + " from " + " "
                + compoundTableName + " where " + " " + compoundTablePkColumn + " =?";

        pstmLookup = conn.prepareStatement(lookupCompoundQuery);
        List compounds = new ArrayList();

        while (heap.size() != 0) {
            SimHeapElement bElm = (SimHeapElement) heap.remove();

            if (idsOnlyYN.equals("N")) {
                // return structure to user
                pstmLookup.setString(1, bElm.getID());
                ResultSet resLookup = pstmLookup.executeQuery();
                if (resLookup.next()) {
                    OrChemCompound c = new OrChemCompound();
                    c.setId(bElm.getID());
                    c.setScore(bElm.getTanimotoCoeff().floatValue());
                    c.setMolFileClob(resLookup.getClob(compoundTableMolfileColumn));
                    compounds.add(c);
                }
                resLookup.close();
            } else {
                // only return ID and score to user
                OrChemCompound c = new OrChemCompound();
                c.setId(bElm.getID());
                c.setScore(bElm.getTanimotoCoeff().floatValue());
                compounds.add(c);
            }
        }
        pstmLookup.close();
        long befSort = System.currentTimeMillis();
        Collections.sort(compounds, new OrChemCompoundTanimComparator());
        debug("sorting time (ms) " + (System.currentTimeMillis() - befSort), debugging);

        OrChemCompound[] output = new OrChemCompound[compounds.size()];
        for (int i = 0; i < compounds.size(); i++) {
            output[i] = (OrChemCompound) (compounds.get(i));
        }
        ArrayDescriptor arrayDescriptor = ArrayDescriptor.createDescriptor("ORCHEM_COMPOUND_LIST", conn);
        debug("#compounds in result list : " + compounds.size(), debugging);
        debug("ended", debugging);
        return new ARRAY(arrayDescriptor, conn, output);
    } catch (Exception ex) {
        ex.printStackTrace();
        throw (ex);
    } finally {
        if (pstmLookup != null)
            pstmLookup.close();
        if (pstmtFp != null)
            pstmtFp.close();
        if (conn != null)
            conn.close();
    }
}