Example usage for org.apache.lucene.index IndexReader getTermVectors

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader getTermVectors.

Prototype

public abstract Fields getTermVectors(int docID) throws IOException;

Source Link

Document

Retrieve term vectors for this document, or null if term vectors were not indexed.

Usage

From source file:com.o19s.solr.swan.highlight.SpanAwareFieldTermStack.java

License:Apache License

/**
 * a constructor.//from www.  j a  va 2s  .  com
 * 
 * @param reader IndexReader of the index
 * @param docId document id to be highlighted
 * @param fieldName field of the document to be highlighted
 * @param fieldQuery FieldQuery object
 * @throws IOException If there is a low-level I/O error
 */
public SpanAwareFieldTermStack(IndexReader reader, int docId, String fieldName,
        final SpanAwareFieldQuery fieldQuery) throws IOException {
    this.fieldName = fieldName;

    Set<String> termSet = fieldQuery.getTermSet(fieldName);
    Set<String> alwaysHighlightTermSet = fieldQuery.getHighlightTermSet(fieldName);

    // just return to make null snippet if un-matched fieldName specified when fieldMatch == true
    if (termSet == null)
        return;

    final Fields vectors = reader.getTermVectors(docId);
    if (vectors == null) {
        // null snippet
        return;
    }

    final Terms vector = vectors.terms(fieldName);
    if (vector == null) {
        // null snippet
        return;
    }

    final CharsRef spare = new CharsRef();
    final TermsEnum termsEnum = vector.iterator(null);
    DocsAndPositionsEnum dpEnum = null;
    BytesRef text;

    int numDocs = reader.maxDoc();
    while ((text = termsEnum.next()) != null) {
        UnicodeUtil.UTF8toUTF16(text, spare);
        final String term = spare.toString();
        if (!termSet.contains(term)) {
            continue;
        }
        dpEnum = termsEnum.docsAndPositions(null, dpEnum);
        if (dpEnum == null) {
            // null snippet
            return;
        }

        dpEnum.nextDoc();

        // For weight look here: http://lucene.apache.org/core/3_6_0/api/core/org/apache/lucene/search/DefaultSimilarity.html
        final float weight = (float) (Math
                .log(numDocs / (double) (reader.docFreq(new Term(fieldName, text)) + 1)) + 1.0);

        final int freq = dpEnum.freq();

        for (int i = 0; i < freq; i++) {
            int pos = dpEnum.nextPosition();
            if (dpEnum.startOffset() < 0) {
                return; // no offsets, null snippet
            }

            if (alwaysHighlightTermSet.contains(term)
                    || fieldQuery.doesDocFieldContainPosition(fieldName, docId, dpEnum.startOffset())) {
                termList.add(new TermInfo(term, dpEnum.startOffset(), dpEnum.endOffset(), pos, weight));
            }
        }

    }

    // sort by position
    Collections.sort(termList);
}

From source file:it.cnr.ilc.lc.clavius.search.Tester.java

private static void searchWithContext(String term) {

    try {//from w  ww.java 2s  . c  om
        logger.info("searchWithContext(" + term + ")");
        SpanQuery spanQuery = new SpanTermQuery(new Term("content", term));
        Directory indexDirectory = FSDirectory.open(
                Paths.get("/var/lucene/claviusTest/indexes/it.cnr.ilc.lc.clavius.search.entity.PlainText"));
        DirectoryReader indexReader = DirectoryReader.open(indexDirectory);
        IndexSearcher searcher = new IndexSearcher(indexReader);
        IndexReader reader = searcher.getIndexReader();
        //spanQuery = (SpanQuery) spanQuery.rewrite(reader);
        //SpanWeight weight = (SpanWeight) searcher.createWeight(spanQuery, false);
        Spans spans = spanQuery.createWeight(searcher, false)
                .getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS);
        //            Spans spans2 = weight.getSpans(reader.leaves().get(0),
        //                    SpanWeight.Postings.OFFSETS);
        //Spans spans = weight.getSpans(reader.leaves().get(0), SpanWeight.Postings.POSITIONS);
        ScoreDoc[] sc = searcher.search(spanQuery, 10).scoreDocs;

        logger.info("hits :" + sc.length);

        int i;
        if (null != spans) {
            //                while ((nextDoc = spans.nextDoc()) != Spans.NO_MORE_DOCS) {
            for (int k = 0; k < sc.length; k++) {
                int docId = sc[k].doc;
                logger.info("docID: " + docId);
                int newDocID = spans.advance(docId);
                logger.info("newDocID: " + newDocID);

                int nextSpan = -1;
                while ((nextSpan = spans.nextStartPosition()) != Spans.NO_MORE_POSITIONS) {
                    logger.info("nextSpan             : " + nextSpan);
                    logger.info("spans.startPosition(): " + spans.startPosition());
                    logger.info("spans.endPosition()  : " + spans.endPosition());
                    logger.info("spans.width()        : " + spans.width());

                    Fields fields = reader.getTermVectors(docId);
                    Terms terms = fields.terms("content");

                    TermsEnum termsEnum = terms.iterator();
                    BytesRef text;
                    PostingsEnum postingEnum = null;
                    int start = spans.startPosition() - 3;
                    int end = spans.endPosition() + 3;
                    while ((text = termsEnum.next()) != null) {
                        //could store the BytesRef here, but String is easier for this example
                        String s = new String(text.bytes, text.offset, text.length);
                        //                DocsAndPositionsEnum positionsEnum = termsEnum.docsAndPositions(null, null);
                        postingEnum = termsEnum.postings(postingEnum);
                        if (postingEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
                            i = 0;
                            int position = -1;
                            while (i < postingEnum.freq() && (position = postingEnum.nextPosition()) != -1) {
                                if (position >= start && position <= end) {
                                    logger.info("pos: " + position + ", term: " + s + " offset: " + text.offset
                                            + " length: " + text.length);
                                }
                                i++;
                            }

                        }

                    }
                }
            }
        } else {
            logger.info("no " + term + " found!");
        }
    } catch (IOException e) {
        logger.error(e.getMessage());
    }
    logger.info("End.");
}

From source file:org.apache.solr.handler.component.AlfrescoSolrHighlighter.java

License:Open Source License

/** Highlights and returns the highlight object for this field -- a String[] by default. Null if none. */
@SuppressWarnings("unchecked")
protected Object doHighlightingByHighlighter(Document doc, int docId, SchemaField schemaField, Query query,
        IndexReader reader, SolrQueryRequest req) throws IOException {
    final SolrParams params = req.getParams();
    final String fieldName = schemaField.getName();

    final int mvToExamine = params.getFieldInt(fieldName, HighlightParams.MAX_MULTIVALUED_TO_EXAMINE,
            (schemaField.multiValued()) ? Integer.MAX_VALUE : 1);

    // Technically this is the max *fragments* (snippets), not max values:
    int mvToMatch = params.getFieldInt(fieldName, HighlightParams.MAX_MULTIVALUED_TO_MATCH, Integer.MAX_VALUE);
    if (mvToExamine <= 0 || mvToMatch <= 0) {
        return null;
    }/*ww  w . j av a 2 s  .c o m*/

    int maxCharsToAnalyze = params.getFieldInt(fieldName, HighlightParams.MAX_CHARS, DEFAULT_MAX_CHARS);
    if (maxCharsToAnalyze < 0) {//e.g. -1
        maxCharsToAnalyze = Integer.MAX_VALUE;
    }

    List<String> fieldValues = getFieldValues(doc, fieldName, mvToExamine, maxCharsToAnalyze, req);
    if (fieldValues.isEmpty()) {
        return null;
    }

    // preserve order of values in a multiValued list
    boolean preserveMulti = params.getFieldBool(fieldName, HighlightParams.PRESERVE_MULTI, false);

    int numFragments = getMaxSnippets(fieldName, params);
    boolean mergeContiguousFragments = isMergeContiguousFragments(fieldName, params);

    List<TextFragment> frags = new ArrayList<>();

    //Try term vectors, which is faster
    //  note: offsets are minimally sufficient for this HL.
    final Fields tvFields = schemaField.storeTermOffsets() ? reader.getTermVectors(docId) : null;
    final TokenStream tvStream = TokenSources.getTermVectorTokenStreamOrNull(fieldName, tvFields,
            maxCharsToAnalyze - 1);
    //  We need to wrap in OffsetWindowTokenFilter if multi-valued
    final OffsetWindowTokenFilter tvWindowStream;
    if (tvStream != null && fieldValues.size() > 1) {
        tvWindowStream = new OffsetWindowTokenFilter(tvStream);
    } else {
        tvWindowStream = null;
    }

    for (String thisText : fieldValues) {
        if (mvToMatch <= 0 || maxCharsToAnalyze <= 0) {
            break;
        }

        TokenStream tstream;
        if (tvWindowStream != null) {
            // if we have a multi-valued field with term vectors, then get the next offset window
            tstream = tvWindowStream.advanceToNextWindowOfLength(thisText.length());
        } else if (tvStream != null) {
            tstream = tvStream; // single-valued with term vectors
        } else {
            // fall back to analyzer
            tstream = createAnalyzerTStream(schemaField, thisText);
        }

        Highlighter highlighter;
        if (params.getFieldBool(fieldName, HighlightParams.USE_PHRASE_HIGHLIGHTER, true)) {
            // We're going to call getPhraseHighlighter and it might consume the tokenStream. If it does, the tokenStream
            // needs to implement reset() efficiently.

            //If the tokenStream is right from the term vectors, then CachingTokenFilter is unnecessary.
            //  It should be okay if OffsetLimit won't get applied in this case.
            final TokenStream tempTokenStream;
            if (tstream != tvStream) {
                if (maxCharsToAnalyze >= thisText.length()) {
                    tempTokenStream = new CachingTokenFilter(tstream);
                } else {
                    tempTokenStream = new CachingTokenFilter(
                            new OffsetLimitTokenFilter(tstream, maxCharsToAnalyze));
                }
            } else {
                tempTokenStream = tstream;
            }

            // get highlighter
            highlighter = getPhraseHighlighter(query, fieldName, req, tempTokenStream);

            // if the CachingTokenFilter was consumed then use it going forward.
            if (tempTokenStream instanceof CachingTokenFilter
                    && ((CachingTokenFilter) tempTokenStream).isCached()) {
                tstream = tempTokenStream;
            }
            //tstream.reset(); not needed; getBestTextFragments will reset it.
        } else {
            // use "the old way"
            highlighter = getHighlighter(query, fieldName, req);
        }

        highlighter.setMaxDocCharsToAnalyze(maxCharsToAnalyze);
        maxCharsToAnalyze -= thisText.length();

        // Highlight!
        try {
            TextFragment[] bestTextFragments = highlighter.getBestTextFragments(tstream,
                    fixLocalisedText(thisText), mergeContiguousFragments, numFragments);
            for (TextFragment bestTextFragment : bestTextFragments) {
                if (bestTextFragment == null)//can happen via mergeContiguousFragments
                    continue;
                // normally we want a score (must be highlighted), but if preserveMulti then we return a snippet regardless.
                if (bestTextFragment.getScore() > 0 || preserveMulti) {
                    frags.add(bestTextFragment);
                    if (bestTextFragment.getScore() > 0)
                        --mvToMatch; // note: limits fragments (for multi-valued fields), not quite the number of values
                }
            }
        } catch (InvalidTokenOffsetsException e) {
            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
        }
    } //end field value loop

    // Put the fragments onto the Solr response (docSummaries)
    if (frags.size() > 0) {
        // sort such that the fragments with the highest score come first
        if (!preserveMulti) {
            Collections.sort(frags, (arg0, arg1) -> Float.compare(arg1.getScore(), arg0.getScore()));
        }

        // Truncate list to hl.snippets, but not when hl.preserveMulti
        if (frags.size() > numFragments && !preserveMulti) {
            frags = frags.subList(0, numFragments);
        }
        return getResponseForFragments(frags, req);
    }
    return null;//no highlights for this field
}

From source file:org.apache.solr.handler.component.TermVectorComponent.java

License:Apache License

@Override
public void process(ResponseBuilder rb) throws IOException {
    SolrParams params = rb.req.getParams();
    if (!params.getBool(COMPONENT_NAME, false)) {
        return;//  w w w  .  j av a2 s  .com
    }

    NamedList<Object> termVectors = new NamedList<Object>();
    rb.rsp.add(TERM_VECTORS, termVectors);

    IndexSchema schema = rb.req.getSchema();
    SchemaField keyField = schema.getUniqueKeyField();
    String uniqFieldName = null;
    if (keyField != null) {
        uniqFieldName = keyField.getName();
        termVectors.add("uniqueKeyFieldName", uniqFieldName);
    }

    FieldOptions allFields = new FieldOptions();
    //figure out what options we have, and try to get the appropriate vector
    allFields.termFreq = params.getBool(TermVectorParams.TF, false);
    allFields.positions = params.getBool(TermVectorParams.POSITIONS, false);
    allFields.offsets = params.getBool(TermVectorParams.OFFSETS, false);
    allFields.docFreq = params.getBool(TermVectorParams.DF, false);
    allFields.tfIdf = params.getBool(TermVectorParams.TF_IDF, false);
    //boolean cacheIdf = params.getBool(TermVectorParams.IDF, false);
    //short cut to all values.
    if (params.getBool(TermVectorParams.ALL, false)) {
        allFields.termFreq = true;
        allFields.positions = true;
        allFields.offsets = true;
        allFields.docFreq = true;
        allFields.tfIdf = true;
    }

    //Build up our per field mapping
    Map<String, FieldOptions> fieldOptions = new HashMap<String, FieldOptions>();
    NamedList<List<String>> warnings = new NamedList<List<String>>();
    List<String> noTV = new ArrayList<String>();
    List<String> noPos = new ArrayList<String>();
    List<String> noOff = new ArrayList<String>();

    Set<String> fields = getFields(rb);
    if (null != fields) {
        //we have specific fields to retrieve, or no fields
        for (String field : fields) {

            // workarround SOLR-3523
            if (null == field || "score".equals(field))
                continue;

            // we don't want to issue warnings about the uniqueKey field
            // since it can cause lots of confusion in distributed requests
            // where the uniqueKey field is injected into the fl for merging
            final boolean fieldIsUniqueKey = field.equals(uniqFieldName);

            SchemaField sf = schema.getFieldOrNull(field);
            if (sf != null) {
                if (sf.storeTermVector()) {
                    FieldOptions option = fieldOptions.get(field);
                    if (option == null) {
                        option = new FieldOptions();
                        option.fieldName = field;
                        fieldOptions.put(field, option);
                    }
                    //get the per field mappings
                    option.termFreq = params.getFieldBool(field, TermVectorParams.TF, allFields.termFreq);
                    option.docFreq = params.getFieldBool(field, TermVectorParams.DF, allFields.docFreq);
                    option.tfIdf = params.getFieldBool(field, TermVectorParams.TF_IDF, allFields.tfIdf);
                    //Validate these are even an option
                    option.positions = params.getFieldBool(field, TermVectorParams.POSITIONS,
                            allFields.positions);
                    if (option.positions && !sf.storeTermPositions() && !fieldIsUniqueKey) {
                        noPos.add(field);
                    }
                    option.offsets = params.getFieldBool(field, TermVectorParams.OFFSETS, allFields.offsets);
                    if (option.offsets && !sf.storeTermOffsets() && !fieldIsUniqueKey) {
                        noOff.add(field);
                    }
                } else {//field doesn't have term vectors
                    if (!fieldIsUniqueKey)
                        noTV.add(field);
                }
            } else {
                //field doesn't exist
                throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "undefined field: " + field);
            }
        }
    } //else, deal with all fields

    // NOTE: currently all typs of warnings are schema driven, and garunteed
    // to be consistent across all shards - if additional types of warnings 
    // are added that might be differnet between shards, finishStage() needs 
    // to be changed to account for that.
    boolean hasWarnings = false;
    if (!noTV.isEmpty()) {
        warnings.add("noTermVectors", noTV);
        hasWarnings = true;
    }
    if (!noPos.isEmpty()) {
        warnings.add("noPositions", noPos);
        hasWarnings = true;
    }
    if (!noOff.isEmpty()) {
        warnings.add("noOffsets", noOff);
        hasWarnings = true;
    }
    if (hasWarnings) {
        termVectors.add("warnings", warnings);
    }

    DocListAndSet listAndSet = rb.getResults();
    List<Integer> docIds = getInts(params.getParams(TermVectorParams.DOC_IDS));
    Iterator<Integer> iter;
    if (docIds != null && !docIds.isEmpty()) {
        iter = docIds.iterator();
    } else {
        DocList list = listAndSet.docList;
        iter = list.iterator();
    }
    SolrIndexSearcher searcher = rb.req.getSearcher();

    IndexReader reader = searcher.getIndexReader();
    //the TVMapper is a TermVectorMapper which can be used to optimize loading of Term Vectors

    //Only load the id field to get the uniqueKey of that
    //field

    final String finalUniqFieldName = uniqFieldName;

    final List<String> uniqValues = new ArrayList<String>();

    // TODO: is this required to be single-valued? if so, we should STOP
    // once we find it...
    final StoredFieldVisitor getUniqValue = new StoredFieldVisitor() {
        @Override
        public void stringField(FieldInfo fieldInfo, String value) {
            uniqValues.add(value);
        }

        @Override
        public void intField(FieldInfo fieldInfo, int value) {
            uniqValues.add(Integer.toString(value));
        }

        @Override
        public void longField(FieldInfo fieldInfo, long value) {
            uniqValues.add(Long.toString(value));
        }

        @Override
        public Status needsField(FieldInfo fieldInfo) {
            return (fieldInfo.name.equals(finalUniqFieldName)) ? Status.YES : Status.NO;
        }
    };

    TermsEnum termsEnum = null;

    while (iter.hasNext()) {
        Integer docId = iter.next();
        NamedList<Object> docNL = new NamedList<Object>();

        if (keyField != null) {
            reader.document(docId, getUniqValue);
            String uniqVal = null;
            if (uniqValues.size() != 0) {
                uniqVal = uniqValues.get(0);
                uniqValues.clear();
                docNL.add("uniqueKey", uniqVal);
                termVectors.add(uniqVal, docNL);
            }
        } else {
            // support for schemas w/o a unique key,
            termVectors.add("doc-" + docId, docNL);
        }

        if (null != fields) {
            for (Map.Entry<String, FieldOptions> entry : fieldOptions.entrySet()) {
                final String field = entry.getKey();
                final Terms vector = reader.getTermVector(docId, field);
                if (vector != null) {
                    termsEnum = vector.iterator(termsEnum);
                    mapOneVector(docNL, entry.getValue(), reader, docId, vector.iterator(termsEnum), field);
                }
            }
        } else {
            // extract all fields
            final Fields vectors = reader.getTermVectors(docId);
            for (String field : vectors) {
                Terms terms = vectors.terms(field);
                if (terms != null) {
                    termsEnum = terms.iterator(termsEnum);
                    mapOneVector(docNL, allFields, reader, docId, termsEnum, field);
                }
            }
        }
    }
}

From source file:org.eclipse.che.api.search.server.impl.LuceneSearcher.java

License:Open Source License

@Override
public SearchResult search(QueryExpression query) throws InvalidQueryException, QueryExecutionException {
    IndexSearcher luceneSearcher = null;
    try {//from  w  w w.j  av a2s  . c o m
        final long startTime = System.currentTimeMillis();
        searcherManager.maybeRefresh();
        luceneSearcher = searcherManager.acquire();

        Query luceneQuery = createLuceneQuery(query);

        ScoreDoc after = null;
        final int numSkipDocs = Math.max(0, query.getSkipCount());
        if (numSkipDocs > 0) {
            after = skipScoreDocs(luceneSearcher, luceneQuery, numSkipDocs);
        }

        final int numDocs = query.getMaxItems() > 0 ? Math.min(query.getMaxItems(), RESULT_LIMIT)
                : RESULT_LIMIT;
        TopDocs topDocs = luceneSearcher.searchAfter(after, luceneQuery, numDocs, sort, true, true);
        final long totalHitsNum = topDocs.totalHits;

        List<SearchResultEntry> results = newArrayList();
        List<OffsetData> offsetData = Collections.emptyList();
        for (int i = 0; i < topDocs.scoreDocs.length; i++) {
            ScoreDoc scoreDoc = topDocs.scoreDocs[i];
            int docId = scoreDoc.doc;
            Document doc = luceneSearcher.doc(docId);
            if (query.isIncludePositions()) {
                offsetData = new ArrayList<>();
                String txt = doc.get(TEXT_FIELD);
                if (txt != null) {
                    IndexReader reader = luceneSearcher.getIndexReader();

                    TokenStream tokenStream = TokenSources.getTokenStream(TEXT_FIELD,
                            reader.getTermVectors(docId), txt, luceneIndexWriter.getAnalyzer(), -1);

                    CharTermAttribute termAtt = tokenStream.addAttribute(CharTermAttribute.class);
                    OffsetAttribute offsetAtt = tokenStream.addAttribute(OffsetAttribute.class);

                    QueryScorer queryScorer = new QueryScorer(luceneQuery);
                    // TODO think about this constant
                    queryScorer.setMaxDocCharsToAnalyze(1_000_000);
                    TokenStream newStream = queryScorer.init(tokenStream);
                    if (newStream != null) {
                        tokenStream = newStream;
                    }
                    queryScorer.startFragment(null);

                    tokenStream.reset();

                    int startOffset, endOffset;
                    // TODO think about this constant
                    for (boolean next = tokenStream.incrementToken(); next
                            && (offsetAtt.startOffset() < 1_000_000); next = tokenStream.incrementToken()) {
                        startOffset = offsetAtt.startOffset();
                        endOffset = offsetAtt.endOffset();

                        if ((endOffset > txt.length()) || (startOffset > txt.length())) {
                            throw new QueryExecutionException("Token " + termAtt.toString()
                                    + " exceeds length of provided text size " + txt.length());
                        }

                        float res = queryScorer.getTokenScore();
                        if (res > 0.0F && startOffset <= endOffset) {
                            String tokenText = txt.substring(startOffset, endOffset);
                            Scanner sc = new Scanner(txt);
                            int lineNum = 1;
                            long len = 0;
                            String foundLine = "";
                            while (sc.hasNextLine()) {
                                foundLine = sc.nextLine();

                                len += foundLine.length();
                                if (len > startOffset) {
                                    break;
                                }
                                lineNum++;
                            }
                            offsetData.add(
                                    new OffsetData(tokenText, startOffset, endOffset, res, lineNum, foundLine));
                        }
                    }
                }
            }

            String filePath = doc.getField(PATH_FIELD).stringValue();
            LOG.debug("Doc {} path {} score {} ", docId, filePath, scoreDoc.score);
            results.add(new SearchResultEntry(filePath, offsetData));
        }

        final long elapsedTimeMillis = System.currentTimeMillis() - startTime;

        boolean hasMoreToRetrieve = numSkipDocs + topDocs.scoreDocs.length + 1 < totalHitsNum;
        QueryExpression nextPageQueryExpression = null;
        if (hasMoreToRetrieve) {
            nextPageQueryExpression = createNextPageQuery(query, numSkipDocs + topDocs.scoreDocs.length);
        }

        return SearchResult.aSearchResult().withResults(results).withTotalHits(totalHitsNum)
                .withNextPageQueryExpression(nextPageQueryExpression).withElapsedTimeMillis(elapsedTimeMillis)
                .build();
    } catch (ParseException e) {
        throw new InvalidQueryException(e.getMessage(), e);
    } catch (IOException e) {
        throw new QueryExecutionException(e.getMessage(), e);
    } finally {
        try {
            searcherManager.release(luceneSearcher);
        } catch (IOException e) {
            LOG.error(e.getMessage());
        }
    }
}

From source file:org.meresco.lucene.Lucene.java

License:Open Source License

public LuceneResponse similarDocuments(String identifier) throws Throwable {
    SearcherAndTaxonomy reference = data.getManager().acquire();
    try {//  w w  w  .  j  a v a2  s  . c  o m
        Query idQuery = new TermQuery(new Term(ID_FIELD, identifier));
        TopDocs topDocs = reference.searcher.search(idQuery, 1);
        if (topDocs.totalHits == 0)
            return new LuceneResponse(0);
        int docId = topDocs.scoreDocs[0].doc;
        IndexReader reader = reference.searcher.getIndexReader();
        CommonTermsQuery commonQuery = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD, 0.1f);
        Fields termVectors = reader.getTermVectors(docId);
        if (termVectors == null)
            return new LuceneResponse(0);
        for (String field : termVectors) {
            TermsEnum iterator = termVectors.terms(field).iterator(null);
            BytesRef b;
            while ((b = iterator.next()) != null) {
                Term term = new Term(field, b.utf8ToString());
                commonQuery.add(term);
            }
        }
        BooleanQuery query = new BooleanQuery();
        query.add(idQuery, Occur.MUST_NOT);
        query.add(commonQuery, Occur.MUST);
        return executeQuery(query);
    } finally {
        data.getManager().release(reference);
    }
}

From source file:pretraga.IsolationSimilarity.java

public void test(String vec) {
    List<String> vector = processInput(vec);
    HashMap<String, Long> map = new HashMap<>();
    try {//from   w  ww.j a va2s .c o  m
        Directory dir = FSDirectory.open(new File(indexDirectoryPath).toPath());

        IndexReader reader = DirectoryReader.open(dir);
        IndexSearcher searcher = new IndexSearcher(reader);

        List<Integer> docId = getDocumentsFromVector(vector, reader, searcher);

        for (int i = 0; i < docId.size(); i++) {
            Fields ff = reader.getTermVectors(docId.get(i));
            Terms terms = ff.terms(CONTENT);

            TermsEnum te = terms.iterator();
            Object tmp = te.next();
            while (tmp != null) {
                BytesRef by = (BytesRef) tmp;
                String term = by.utf8ToString();

                ClassicSimilarity sim = null;
                if (searcher.getSimilarity(true) instanceof ClassicSimilarity) {
                    sim = (ClassicSimilarity) searcher.getSimilarity(true);
                }
                float idf = sim.idf(te.docFreq(), reader.maxDoc());
                float tf = sim.tf(te.totalTermFreq());
                //System.out.println("idf = " + idf + ", tf = " + tf + ", docF: " + te.totalTermFreq());
                TermStatistics ts = new TermStatistics(by, te.docFreq(), te.totalTermFreq());
                CollectionStatistics s = new CollectionStatistics(CONTENT, reader.maxDoc(), terms.getDocCount(),
                        terms.getSumTotalTermFreq(), terms.getSumDocFreq());
                Document d = reader.document(docId.get(i));
                if (vector.contains(term)) {
                    float ttt = sim.simScorer(sim.computeWeight(s, ts), reader.getContext().leaves().get(0))
                            .score(docId.get(i), te.totalTermFreq());
                    System.out.println(ttt + ", " + d.get(TITLE) + ", term: " + term);
                }
                tmp = te.next();
            }

            /*Iterator<String> ss = ff.iterator();
            while (ss.hasNext()) {
            String fieldString = ss.next();
            System.out.println(fieldString);
            }*/
        }
    } catch (Exception e) {

    }
}