Example usage for org.apache.lucene.index PostingsEnum FREQS

List of usage examples for org.apache.lucene.index PostingsEnum FREQS

Introduction

In this page you can find the example usage for org.apache.lucene.index PostingsEnum FREQS.

Prototype

short FREQS

To view the source code for org.apache.lucene.index PostingsEnum FREQS.

Click Source Link

Document

Flag to pass to TermsEnum#postings(PostingsEnum,int) if you require term frequencies in the returned enum.

Usage

From source file:com.rocana.lucene.codec.v1.RocanaBasePostingsFormatTestCase.java

License:Apache License

@Override
public void testInvertedWrite() throws Exception {
    Directory dir = newDirectory();//www . j  a  v a 2  s  .co  m
    MockAnalyzer analyzer = new MockAnalyzer(random());
    analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
    IndexWriterConfig iwc = newIndexWriterConfig(analyzer);

    // Must be concurrent because thread(s) can be merging
    // while up to one thread flushes, and each of those
    // threads iterates over the map while the flushing
    // thread might be adding to it:
    final Map<String, TermFreqs> termFreqs = new ConcurrentHashMap<>();

    final AtomicLong sumDocFreq = new AtomicLong();
    final AtomicLong sumTotalTermFreq = new AtomicLong();

    // TODO: would be better to use / delegate to the current
    // Codec returned by getCodec()

    iwc.setCodec(new AssertingCodec() {
        @Override
        public PostingsFormat getPostingsFormatForField(String field) {

            PostingsFormat p = getCodec().postingsFormat();
            if (p instanceof PerFieldPostingsFormat) {
                p = ((PerFieldPostingsFormat) p).getPostingsFormatForField(field);
            }
            if (p instanceof RocanaPerFieldPostingsFormat) {
                p = ((RocanaPerFieldPostingsFormat) p).getPostingsFormatForField(field);
            }
            final PostingsFormat defaultPostingsFormat = p;

            final Thread mainThread = Thread.currentThread();

            if (field.equals("body")) {

                // A PF that counts up some stats and then in
                // the end we verify the stats match what the
                // final IndexReader says, just to exercise the
                // new freedom of iterating the postings more
                // than once at flush/merge:

                return new PostingsFormat(defaultPostingsFormat.getName()) {

                    @Override
                    public FieldsConsumer fieldsConsumer(final SegmentWriteState state) throws IOException {

                        final FieldsConsumer fieldsConsumer = defaultPostingsFormat.fieldsConsumer(state);

                        return new FieldsConsumer() {
                            @Override
                            public void write(Fields fields) throws IOException {
                                fieldsConsumer.write(fields);

                                boolean isMerge = state.context.context == IOContext.Context.MERGE;

                                // We only use one thread for flushing
                                // in this test:
                                assert isMerge || Thread.currentThread() == mainThread;

                                // We iterate the provided TermsEnum
                                // twice, so we excercise this new freedom
                                // with the inverted API; if
                                // addOnSecondPass is true, we add up
                                // term stats on the 2nd iteration:
                                boolean addOnSecondPass = random().nextBoolean();

                                //System.out.println("write isMerge=" + isMerge + " 2ndPass=" + addOnSecondPass);

                                // Gather our own stats:
                                Terms terms = fields.terms("body");
                                assert terms != null;

                                TermsEnum termsEnum = terms.iterator();
                                PostingsEnum docs = null;
                                while (termsEnum.next() != null) {
                                    BytesRef term = termsEnum.term();
                                    // TODO: also sometimes ask for payloads/offsets?
                                    boolean noPositions = random().nextBoolean();
                                    if (noPositions) {
                                        docs = termsEnum.postings(docs, PostingsEnum.FREQS);
                                    } else {
                                        docs = termsEnum.postings(null, PostingsEnum.POSITIONS);
                                    }
                                    int docFreq = 0;
                                    long totalTermFreq = 0;
                                    while (docs.nextDoc() != PostingsEnum.NO_MORE_DOCS) {
                                        docFreq++;
                                        totalTermFreq += docs.freq();
                                        int limit = TestUtil.nextInt(random(), 1, docs.freq());
                                        if (!noPositions) {
                                            for (int i = 0; i < limit; i++) {
                                                docs.nextPosition();
                                            }
                                        }
                                    }

                                    String termString = term.utf8ToString();

                                    // During merge we should only see terms
                                    // we had already seen during a
                                    // previous flush:
                                    assertTrue(isMerge == false || termFreqs.containsKey(termString));

                                    if (isMerge == false) {
                                        if (addOnSecondPass == false) {
                                            TermFreqs tf = termFreqs.get(termString);
                                            if (tf == null) {
                                                tf = new TermFreqs();
                                                termFreqs.put(termString, tf);
                                            }
                                            tf.docFreq += docFreq;
                                            tf.totalTermFreq += totalTermFreq;
                                            sumDocFreq.addAndGet(docFreq);
                                            sumTotalTermFreq.addAndGet(totalTermFreq);
                                        } else if (termFreqs.containsKey(termString) == false) {
                                            // Add placeholder (2nd pass will
                                            // set its counts):
                                            termFreqs.put(termString, new TermFreqs());
                                        }
                                    }
                                }

                                // Also test seeking the TermsEnum:
                                for (String term : termFreqs.keySet()) {
                                    if (termsEnum.seekExact(new BytesRef(term))) {
                                        // TODO: also sometimes ask for payloads/offsets?
                                        boolean noPositions = random().nextBoolean();
                                        if (noPositions) {
                                            docs = termsEnum.postings(docs, PostingsEnum.FREQS);
                                        } else {
                                            docs = termsEnum.postings(null, PostingsEnum.POSITIONS);
                                        }

                                        int docFreq = 0;
                                        long totalTermFreq = 0;
                                        while (docs.nextDoc() != PostingsEnum.NO_MORE_DOCS) {
                                            docFreq++;
                                            totalTermFreq += docs.freq();
                                            int limit = TestUtil.nextInt(random(), 1, docs.freq());
                                            if (!noPositions) {
                                                for (int i = 0; i < limit; i++) {
                                                    docs.nextPosition();
                                                }
                                            }
                                        }

                                        if (isMerge == false && addOnSecondPass) {
                                            TermFreqs tf = termFreqs.get(term);
                                            assert tf != null;
                                            tf.docFreq += docFreq;
                                            tf.totalTermFreq += totalTermFreq;
                                            sumDocFreq.addAndGet(docFreq);
                                            sumTotalTermFreq.addAndGet(totalTermFreq);
                                        }

                                        //System.out.println("  term=" + term + " docFreq=" + docFreq + " ttDF=" + termToDocFreq.get(term));
                                        assertTrue(docFreq <= termFreqs.get(term).docFreq);
                                        assertTrue(totalTermFreq <= termFreqs.get(term).totalTermFreq);
                                    }
                                }

                                // Also test seekCeil
                                for (int iter = 0; iter < 10; iter++) {
                                    BytesRef term = new BytesRef(
                                            TestUtil.randomRealisticUnicodeString(random()));
                                    SeekStatus status = termsEnum.seekCeil(term);
                                    if (status == SeekStatus.NOT_FOUND) {
                                        assertTrue(term.compareTo(termsEnum.term()) < 0);
                                    }
                                }
                            }

                            @Override
                            public void close() throws IOException {
                                fieldsConsumer.close();
                            }
                        };
                    }

                    @Override
                    public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
                        return defaultPostingsFormat.fieldsProducer(state);
                    }
                };
            } else {
                return defaultPostingsFormat;
            }
        }
    });

    RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);

    LineFileDocs docs = new LineFileDocs(random());
    int bytesToIndex = atLeast(100) * 1024;
    int bytesIndexed = 0;
    while (bytesIndexed < bytesToIndex) {
        Document doc = docs.nextDoc();
        w.addDocument(doc);
        bytesIndexed += RamUsageTester.sizeOf(doc);
    }

    IndexReader r = w.getReader();
    w.close();

    Terms terms = MultiFields.getTerms(r, "body");
    assertEquals(sumDocFreq.get(), terms.getSumDocFreq());
    assertEquals(sumTotalTermFreq.get(), terms.getSumTotalTermFreq());

    TermsEnum termsEnum = terms.iterator();
    long termCount = 0;
    boolean supportsOrds = true;
    while (termsEnum.next() != null) {
        BytesRef term = termsEnum.term();
        assertEquals(termFreqs.get(term.utf8ToString()).docFreq, termsEnum.docFreq());
        assertEquals(termFreqs.get(term.utf8ToString()).totalTermFreq, termsEnum.totalTermFreq());
        if (supportsOrds) {
            long ord;
            try {
                ord = termsEnum.ord();
            } catch (UnsupportedOperationException uoe) {
                supportsOrds = false;
                ord = -1;
            }
            if (ord != -1) {
                assertEquals(termCount, ord);
            }
        }
        termCount++;
    }
    assertEquals(termFreqs.size(), termCount);

    r.close();
    dir.close();
}

From source file:org.codelibs.elasticsearch.common.lucene.index.FilterableTermsEnum.java

License:Apache License

public FilterableTermsEnum(IndexReader reader, String field, int docsEnumFlag, @Nullable Query filter)
        throws IOException {
    if ((docsEnumFlag != PostingsEnum.FREQS) && (docsEnumFlag != PostingsEnum.NONE)) {
        throw new IllegalArgumentException("invalid docsEnumFlag of " + docsEnumFlag);
    }//  w  w w.j av a  2 s .  c om
    this.docsEnumFlag = docsEnumFlag;
    List<LeafReaderContext> leaves = reader.leaves();
    List<Holder> enums = new ArrayList<>(leaves.size());
    final Weight weight;
    if (filter == null) {
        weight = null;
    } else {
        final IndexSearcher searcher = new IndexSearcher(reader);
        searcher.setQueryCache(null);
        weight = searcher.createNormalizedWeight(filter, false);
    }
    for (LeafReaderContext context : leaves) {
        Terms terms = context.reader().terms(field);
        if (terms == null) {
            continue;
        }
        TermsEnum termsEnum = terms.iterator();
        if (termsEnum == null) {
            continue;
        }
        BitSet bits = null;
        if (weight != null) {
            Scorer scorer = weight.scorer(context);
            if (scorer == null) {
                // fully filtered, none matching, no need to iterate on this
                continue;
            }
            DocIdSetIterator docs = scorer.iterator();

            // we want to force apply deleted docs
            final Bits liveDocs = context.reader().getLiveDocs();
            if (liveDocs != null) {
                docs = new FilteredDocIdSetIterator(docs) {
                    @Override
                    protected boolean match(int doc) {
                        return liveDocs.get(doc);
                    }
                };
            }

            bits = BitSet.of(docs, context.reader().maxDoc());
        }
        enums.add(new Holder(termsEnum, bits));
    }
    this.enums = enums.toArray(new Holder[enums.size()]);
}

From source file:org.codelibs.elasticsearch.common.lucene.index.FilterableTermsEnum.java

License:Apache License

@Override
public boolean seekExact(BytesRef text) throws IOException {
    int docFreq = 0;
    long totalTermFreq = 0;
    for (Holder anEnum : enums) {
        if (anEnum.termsEnum.seekExact(text)) {
            if (anEnum.bits == null) {
                docFreq += anEnum.termsEnum.docFreq();
                if (docsEnumFlag == PostingsEnum.FREQS) {
                    long leafTotalTermFreq = anEnum.termsEnum.totalTermFreq();
                    if (totalTermFreq == -1 || leafTotalTermFreq == -1) {
                        totalTermFreq = -1;
                        continue;
                    }/*from  w  ww .  j a va 2s.  c  o m*/
                    totalTermFreq += leafTotalTermFreq;
                }
            } else {
                final PostingsEnum docsEnum = anEnum.docsEnum = anEnum.termsEnum.postings(anEnum.docsEnum,
                        docsEnumFlag);
                // 2 choices for performing same heavy loop - one attempts to calculate totalTermFreq and other does not
                if (docsEnumFlag == PostingsEnum.FREQS) {
                    for (int docId = docsEnum
                            .nextDoc(); docId != DocIdSetIterator.NO_MORE_DOCS; docId = docsEnum.nextDoc()) {
                        if (anEnum.bits != null && anEnum.bits.get(docId) == false) {
                            continue;
                        }
                        docFreq++;
                        // docsEnum.freq() returns 1 if doc indexed with IndexOptions.DOCS_ONLY so no way of knowing if value
                        // is really 1 or unrecorded when filtering like this
                        totalTermFreq += docsEnum.freq();
                    }
                } else {
                    for (int docId = docsEnum
                            .nextDoc(); docId != DocIdSetIterator.NO_MORE_DOCS; docId = docsEnum.nextDoc()) {
                        if (anEnum.bits != null && anEnum.bits.get(docId) == false) {
                            continue;
                        }
                        // docsEnum.freq() behaviour is undefined if docsEnumFlag==PostingsEnum.FLAG_NONE so don't bother with call
                        docFreq++;
                    }
                }
            }
        }
    }
    if (docFreq > 0) {
        currentDocFreq = docFreq;
        currentTotalTermFreq = totalTermFreq;
        current = text;
        return true;
    } else {
        currentDocFreq = NOT_FOUND;
        currentTotalTermFreq = NOT_FOUND;
        current = null;
        return false;
    }
}

From source file:org.codelibs.elasticsearch.common.lucene.index.FreqTermsEnum.java

License:Apache License

public FreqTermsEnum(IndexReader reader, String field, boolean needDocFreq, boolean needTotalTermFreq,
        @Nullable Query filter, BigArrays bigArrays) throws IOException {
    super(reader, field, needTotalTermFreq ? PostingsEnum.FREQS : PostingsEnum.NONE, filter);
    this.bigArrays = bigArrays;
    this.needDocFreqs = needDocFreq;
    this.needTotalTermFreqs = needTotalTermFreq;
    if (needDocFreq) {
        termDocFreqs = bigArrays.newIntArray(INITIAL_NUM_TERM_FREQS_CACHED, false);
    } else {//from   w w w .  j  a  v  a 2 s  .  c o  m
        termDocFreqs = null;
    }
    if (needTotalTermFreq) {
        termsTotalFreqs = bigArrays.newLongArray(INITIAL_NUM_TERM_FREQS_CACHED, false);
    } else {
        termsTotalFreqs = null;
    }
    cachedTermOrds = new BytesRefHash(INITIAL_NUM_TERM_FREQS_CACHED, bigArrays);
}

From source file:org.elasticsearch.common.lucene.index.FilterableTermsEnum.java

License:Apache License

public FilterableTermsEnum(IndexReader reader, String field, int docsEnumFlag, @Nullable Query filter)
        throws IOException {
    if ((docsEnumFlag != PostingsEnum.FREQS) && (docsEnumFlag != PostingsEnum.NONE)) {
        throw new IllegalArgumentException("invalid docsEnumFlag of " + docsEnumFlag);
    }/*from w  w  w  .  j  a  v a  2s  .co  m*/
    this.docsEnumFlag = docsEnumFlag;
    if (filter == null) {
        // Important - need to use the doc count that includes deleted docs
        // or we have this issue: https://github.com/elasticsearch/elasticsearch/issues/7951
        numDocs = reader.maxDoc();
    }
    List<LeafReaderContext> leaves = reader.leaves();
    List<Holder> enums = new ArrayList<>(leaves.size());
    final Weight weight;
    if (filter == null) {
        weight = null;
    } else {
        final IndexSearcher searcher = new IndexSearcher(reader);
        searcher.setQueryCache(null);
        weight = searcher.createNormalizedWeight(filter, false);
    }
    for (LeafReaderContext context : leaves) {
        Terms terms = context.reader().terms(field);
        if (terms == null) {
            continue;
        }
        TermsEnum termsEnum = terms.iterator();
        if (termsEnum == null) {
            continue;
        }
        BitSet bits = null;
        if (weight != null) {
            Scorer scorer = weight.scorer(context);
            if (scorer == null) {
                // fully filtered, none matching, no need to iterate on this
                continue;
            }
            DocIdSetIterator docs = scorer.iterator();

            // we want to force apply deleted docs
            final Bits liveDocs = context.reader().getLiveDocs();
            if (liveDocs != null) {
                docs = new FilteredDocIdSetIterator(docs) {
                    @Override
                    protected boolean match(int doc) {
                        return liveDocs.get(doc);
                    }
                };
            }

            BitDocIdSet.Builder builder = new BitDocIdSet.Builder(context.reader().maxDoc());
            builder.or(docs);
            bits = builder.build().bits();

            // Count how many docs are in our filtered set
            // TODO make this lazy-loaded only for those that need it?
            numDocs += bits.cardinality();
        }
        enums.add(new Holder(termsEnum, bits));
    }
    this.enums = enums.toArray(new Holder[enums.size()]);
}

From source file:org.opengrok.suggest.SuggesterSearcher.java

License:Open Source License

private List<LookupResultItem> suggest(final Query query, final LeafReaderContext leafReaderContext,
        final String project, final SuggesterQuery suggesterQuery, final PopularityCounter searchCounts)
        throws IOException {
    if (Thread.currentThread().isInterrupted()) {
        interrupted = true;//from w w w .j a v a2  s  .  c o  m
        return Collections.emptyList();
    }

    boolean shouldLeaveOutSameTerms = shouldLeaveOutSameTerms(query, suggesterQuery);
    Set<BytesRef> tokensAlreadyIncluded = null;
    if (shouldLeaveOutSameTerms) {
        tokensAlreadyIncluded = SuggesterUtils.intoTermsExceptPhraseQuery(query).stream()
                .filter(t -> t.field().equals(suggesterQuery.getField())).map(Term::bytes)
                .collect(Collectors.toSet());
    }

    boolean needsDocumentIds = query != null && !(query instanceof MatchAllDocsQuery);

    ComplexQueryData complexQueryData = null;
    if (needsDocumentIds) {
        complexQueryData = getComplexQueryData(query, leafReaderContext);
        if (interrupted) {
            return Collections.emptyList();
        }
    }

    Terms terms = leafReaderContext.reader().terms(suggesterQuery.getField());

    TermsEnum termsEnum = suggesterQuery.getTermsEnumForSuggestions(terms);

    LookupPriorityQueue queue = new LookupPriorityQueue(resultSize);

    boolean needPositionsAndFrequencies = needPositionsAndFrequencies(query);

    PostingsEnum postingsEnum = null;

    BytesRef term = termsEnum.next();
    while (term != null) {
        if (Thread.currentThread().isInterrupted()) {
            interrupted = true;
            break;
        }

        if (needPositionsAndFrequencies) {
            postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.POSITIONS | PostingsEnum.FREQS);
        } else {
            postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE);
        }

        int score;
        if (!needsDocumentIds) {
            score = normalizeDocumentFrequency(termsEnum.docFreq(), numDocs);
        } else if (needPositionsAndFrequencies) {
            score = getPhraseScore(complexQueryData, leafReaderContext.docBase, postingsEnum);
        } else {
            score = getDocumentFrequency(complexQueryData.documentIds, leafReaderContext.docBase, postingsEnum);
        }

        if (score > 0) {
            if (!shouldLeaveOutSameTerms || !tokensAlreadyIncluded.contains(term)) {
                score += searchCounts.get(term) * TERM_ALREADY_SEARCHED_MULTIPLIER;

                if (queue.canInsert(score)) {
                    queue.insertWithOverflow(new LookupResultItem(term.utf8ToString(), project, score));
                }
            }
        }

        term = termsEnum.next();
    }

    return queue.getResult();
}

From source file:org.voyanttools.trombone.tool.corpus.DocumentCollocates.java

License:Open Source License

private FlexibleQueue<DocumentCollocate> getCollocates(LeafReader LeafReader, int luceneDoc, int corpusDocIndex,
        int lastToken, List<DocumentSpansData> documentSpansData, Keywords stopwords) throws IOException {

    Map<Integer, TermInfo> termsOfInterest = getTermsOfInterest(LeafReader, luceneDoc, lastToken,
            documentSpansData, true);/*w  w w  .jav a2  s . c  o  m*/

    Map<String, Map<String, AtomicInteger>> mapOfTermsMap = new HashMap<String, Map<String, AtomicInteger>>();

    Map<String, Integer> queryStringFrequencyMap = new HashMap<String, Integer>();

    // this keeps track of the terms we want to lookup total document frequencies
    Map<String, Integer> stringsOfInterestMap = new HashMap<String, Integer>();

    //      Map<String, Map<String, Integer>>
    for (DocumentSpansData dsd : documentSpansData) {

        Map<String, AtomicInteger> termsMap = new HashMap<String, AtomicInteger>();

        queryStringFrequencyMap.put(dsd.queryString, dsd.spansData.length);

        int contextTotalTokens = 0;

        for (int[] data : dsd.spansData) {

            int keywordstart = data[0];
            int keywordend = data[1];

            int leftstart = keywordstart - context;
            if (leftstart < 0) {
                leftstart = 0;
            }
            for (int i = leftstart; i < keywordstart - 1; i++) {
                contextTotalTokens++;
                String term = termsOfInterest.get(i).getText();
                if (stopwords.isKeyword(term)) {
                    continue;
                }
                if (collocatesWhitelist.isEmpty() == false && collocatesWhitelist.isKeyword(term) == false) {
                    continue;
                }
                stringsOfInterestMap.put(term, 0);
                if (termsMap.containsKey(term)) {
                    termsMap.get(term).getAndIncrement();
                } else {
                    termsMap.put(term, new AtomicInteger(1));
                }
            }

            for (int i = keywordstart; i < keywordend; i++) {
                String term = termsOfInterest.get(i).getText();
                if (stopwords.isKeyword(term)) {
                    continue;
                }
                if (collocatesWhitelist.isEmpty() == false && collocatesWhitelist.isKeyword(term) == false) {
                    continue;
                }
                stringsOfInterestMap.put(term, 0);
            }

            int rightend = keywordend + context;
            if (rightend > lastToken) {
                rightend = lastToken;
            }
            for (int i = keywordend; i < rightend; i++) {
                contextTotalTokens++;
                String term = termsOfInterest.get(i).getText();
                if (stopwords.isKeyword(term)) {
                    continue;
                }
                if (collocatesWhitelist.isEmpty() == false && collocatesWhitelist.isKeyword(term) == false) {
                    continue;
                }
                stringsOfInterestMap.put(term, 0);
                if (termsMap.containsKey(term)) {
                    termsMap.get(term).getAndIncrement();
                } else {
                    termsMap.put(term, new AtomicInteger(1));
                }
            }
        }

        mapOfTermsMap.put(dsd.queryString, termsMap);
    }

    // gather document frequency for strings of interest
    int documentTotalTokens = 0;

    Terms terms = LeafReader.getTermVector(luceneDoc, tokenType.name());
    TermsEnum termsEnum = terms.iterator();
    while (true) {
        BytesRef term = termsEnum.next();
        if (term != null) {
            String termString = term.utf8ToString();
            PostingsEnum postingsEnum = termsEnum.postings(null, PostingsEnum.FREQS);
            postingsEnum.nextDoc();
            int freq = postingsEnum.freq();
            documentTotalTokens += freq;
            if (stringsOfInterestMap.containsKey(termString)) {
                stringsOfInterestMap.put(termString, freq);
            }
        } else {
            break;
        }
    }

    FlexibleQueue<DocumentCollocate> documentCollocatesQueue = new FlexibleQueue(comparator, limit);

    for (Map.Entry<String, Map<String, AtomicInteger>> keywordMapEntry : mapOfTermsMap.entrySet()) {
        String keyword = keywordMapEntry.getKey();
        int keywordContextRawFrequency = queryStringFrequencyMap.get(keyword);

        Map<String, AtomicInteger> termsMap = keywordMapEntry.getValue();

        // once through to determine contextTotalTokens
        int contextTotalTokens = 0;
        for (Map.Entry<String, AtomicInteger> termsMapEntry : termsMap.entrySet()) {
            contextTotalTokens += termsMapEntry.getValue().intValue();
        }

        /*
         *    public DocumentCollocate(int corpusDocumentIndex, String keyword, String term,
        int keywordContextRawFrequency, int termContextRawFrequency, int termDocumentRawFrequency,
        int totalContextTokens, int totalDocumentTokens) {
                
         */
        // and now to create document collocate objects
        for (Map.Entry<String, AtomicInteger> termsMapEntry : termsMap.entrySet()) {
            String term = termsMapEntry.getKey();
            int termDocumentRawFrequency = stringsOfInterestMap.get(term);
            int termContextRawFrequency = termsMapEntry.getValue().intValue();
            DocumentCollocate documentCollocate = new DocumentCollocate(corpusDocIndex, keyword, term,
                    keywordContextRawFrequency, termContextRawFrequency, termDocumentRawFrequency,
                    contextTotalTokens, documentTotalTokens);
            //            DocumentCollocate documentCollocate = new DocumentCollocate(corpusDocIndex, keyword, term, contextTermRawFrequency, ((float) contextTermRawFrequency)/contextTotalTokens, documentTermRawFrequency, ((float) documentTermRawFrequency)/documentTotalTokens);
            documentCollocatesQueue.offer(documentCollocate);
        }

    }

    return documentCollocatesQueue;
}