Example usage for org.apache.lucene.search IndexSearcher getSimilarity

List of usage examples for org.apache.lucene.search IndexSearcher getSimilarity

Introduction

In this page you can find the example usage for org.apache.lucene.search IndexSearcher getSimilarity.

Prototype

public Similarity getSimilarity() 

Source Link

Document

Expert: Get the Similarity to use to compute scores.

Usage

From source file:com.gentics.cr.lucene.indexaccessor.DefaultIndexAccessor.java

License:Apache License

/**
 * Fetches a double checked Searcher that has been checked for the presence of a reopen file
 * Note that it may occure that a prioritized Searcher may be reopened twice.
 * @param indexLocation //from w w  w .  j  a v  a 2 s  .co m
 * @return
 * @throws IOException
 */
public IndexSearcher getPrioritizedSearcher() throws IOException {
    boolean reopened = this.numReopening > 0;
    IndexSearcher searcher = (IndexSearcher) getSearcher();

    if (reopened) {
        //REOPEN SEARCHER AS IT WAS PRIORITIZED
        synchronized (DefaultIndexAccessor.this) {
            IndexReader reader = searcher.getIndexReader();
            IndexSearcher oldSearcher = searcher;
            IndexReader newReader = reader.reopen();
            if (newReader != reader) {
                searcher = new IndexSearcher(newReader);
                searcher.setSimilarity(oldSearcher.getSimilarity());
                oldSearcher.getIndexReader().close();
                for (Map.Entry<Similarity, IndexSearcher> e : cachedSearchers.entrySet()) {
                    if (e.getValue() == oldSearcher) {
                        cachedSearchers.put(e.getKey(), searcher);
                    }
                }
            }
        }
    }

    return searcher;
}

From source file:com.gentics.cr.lucene.indexaccessor.DefaultIndexAccessor.java

License:Apache License

/**
 * Reopens all of the Searchers in the Searcher cache. This method is invoked
 * in a synchronized context.//from ww w.j av a 2 s  . c  om
 */
private void reopenCachedSearchers() {
    LOGGER.debug(
            "reopening cached searchers (" + cachedSearchers.size() + "):" + Thread.currentThread().getId());
    Set<Similarity> keys = cachedSearchers.keySet();
    for (Similarity key : keys) {
        IndexSearcher searcher = cachedSearchers.get(key);
        try {
            IndexReader oldReader = searcher.getIndexReader();
            IndexSearcher oldSearcher = searcher;
            IndexReader newReader = oldReader.reopen();

            if (newReader != oldReader) {

                cachedSearchers.remove(key);
                searcher = new IndexSearcher(newReader);
                searcher.setSimilarity(oldSearcher.getSimilarity());
                oldSearcher.getIndexReader().close();
                cachedSearchers.put(key, searcher);
            }

        } catch (IOException e) {
            LOGGER.error("error reopening cached Searcher", e);
        }
    }

}

From source file:com.xiaomi.linden.lucene.query.flexiblequery.FlexibleWeight.java

License:Apache License

public FlexibleWeight(FlexibleQuery query, IndexSearcher searcher) throws IOException {
    this.query = query;
    this.similarity = searcher.getSimilarity();
    final IndexReaderContext context = searcher.getTopReaderContext();

    int[] maxDocFreqs = null;
    long[] maxTotalTermFreqs = null;
    Map<Term, TermContext> builtTermMap = new HashMap<>();
    if (query.enableGlobalIDF()) {
        FlexibleQuery.FlexibleTerm[][] globalTerms = query.getGlobalTerms();
        TermContext[][] globalStates = new TermContext[globalTerms.length][];
        for (int i = 0; i < globalTerms.length; ++i) {
            globalStates[i] = new TermContext[globalTerms[i].length];
            for (int j = 0; j < globalTerms[i].length; ++j) {
                Term term = globalTerms[i][j].term;
                TermContext termContext = builtTermMap.get(term);
                if (termContext != null) {
                    globalStates[i][j] = termContext;
                } else {
                    globalStates[i][j] = TermContext.build(context, globalTerms[i][j].term);
                    builtTermMap.put(term, globalStates[i][j]);
                }//ww w .jav  a  2 s .c  om
            }
        }
        maxDocFreqs = new int[globalTerms[0].length];
        maxTotalTermFreqs = new long[globalTerms[0].length];
        int fieldLength = globalTerms.length;
        int termLength = globalTerms[0].length;
        for (int i = 0; i < termLength; ++i) {
            int maxDocFreq = 0;
            long maxTotalTermFreq = 0;
            for (int j = 0; j < fieldLength; ++j) {
                maxDocFreq = Math.max(globalStates[j][i].docFreq(), maxDocFreq);
                maxTotalTermFreq = Math.max(globalStates[j][i].totalTermFreq(), maxTotalTermFreq);
            }
            maxDocFreqs[i] = maxDocFreq;
            maxTotalTermFreqs[i] = maxTotalTermFreq;
        }
    }

    FlexibleQuery.FlexibleTerm[][] terms = query.getTerms();
    TermContext[][] states = new TermContext[terms.length][];
    for (int i = 0; i < terms.length; ++i) {
        states[i] = new TermContext[terms[i].length];
        for (int j = 0; j < terms[i].length; ++j) {
            Term term = terms[i][j].term;
            TermContext termContext = builtTermMap.get(term);
            if (termContext != null) {
                states[i][j] = termContext;
            } else {
                states[i][j] = TermContext.build(context, terms[i][j].term);
                builtTermMap.put(term, states[i][j]);
            }
        }
    }
    termStatsMatrix = new TermStats[terms.length][];
    for (int i = 0; i < terms.length; ++i) {
        termStatsMatrix[i] = new TermStats[terms[i].length];
        for (int j = 0; j < terms[i].length; ++j) {
            FlexibleQuery.FlexibleTerm term = terms[i][j];
            TermContext state = states[i][j];
            TermStatistics termStats;
            if (query.enableGlobalIDF()) {
                termStats = new TermStatistics(term.term.bytes(), maxDocFreqs[j], maxTotalTermFreqs[j]);
            } else {
                termStats = searcher.termStatistics(term.term, state);
            }
            Similarity.SimWeight stats = similarity.computeWeight(term.boost,
                    searcher.collectionStatistics(term.term.field()), termStats);
            TermStats termStatsInfo = new TermStats();
            termStatsInfo.stats = stats;
            termStatsInfo.term = term.term;
            termStatsInfo.termContext = state;
            termStatsMatrix[i][j] = termStatsInfo;
        }
    }
}

From source file:in.student.project.queryexpansion.SearchFilesLDA.java

License:Apache License

/**
 *
 * args/* w  w w  .  j a va 2s .  c om*/
 * 0 - properties_file
 * 
 * # index-dir
 * index-dir = index
 * 
 * # query-file
 * query-file = queries.txt
 * 
 * # out-count
 * docs-per-query = 1000
 * 
 * # query-term-count (0 - any count) (ex. queries with only 3 terms - 3 )
 * query-terms-count = 0
 * 
 * # out-file - name of the file where results will be written
 * out-file = search.result
 *
 * #query-expansion
 * For details see <code> QueryExpansion Constants</code> 
 * @throws Exception 
 *
 */
public static void main(String[] args) throws Exception {
    // Load Properties
    Properties properties = new Properties();
    properties.load(new FileInputStream(args[0]));
    String runTag = args[0];
    String remoteQuery = "";
    if (args.length > 1) {
        remoteQuery = args[1];
        expandedQuery = "";
    }

    properties.setProperty(Defs.RUN_TAG_FLD, runTag);
    String indexDir = properties.getProperty("index-dir");
    String queryFile = properties.getProperty("query-file");
    int termCount = Integer.valueOf(properties.getProperty("query-terms-count")).intValue();
    int outCount = Integer.valueOf(properties.getProperty("docs-per-query")).intValue();
    String queryFileName = properties.getProperty("query-file");
    String outFileName = runTag + properties.getProperty("out-file");
    String queryExpansionFlag = properties.getProperty(QueryExpansion.METHOD_FLD, "");

    IndexReader idxReader = DirectoryReader.open(FSDirectory.open(new File(indexDir)));
    IndexSearcher searcher = new IndexSearcher(idxReader);
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_48);
    BufferedWriter writer = new BufferedWriter(new FileWriter(new File(outFileName)));
    String query_num = null;
    BufferedReader in = new BufferedReader(new FileReader(queryFileName));
    TFIDFSimilarity similarity = null;
    int hitsCount = 50;

    while (true) {
        String line;
        line = in.readLine();
        try {
            if (line.length() == -1)
                break;
        } catch (Exception e) {
            return;
        }
        StringTokenizer tknzr = new StringTokenizer(line);
        query_num = tknzr.nextToken();
        line = line.substring(query_num.length()).trim();
        if (remoteQuery.length() > 0)
            line = remoteQuery;
        String queryStr = line;
        queryStr = QueryParser.escape(queryStr);
        QueryParser parser = new QueryParser(Version.LUCENE_48, Defs.FLD_TEXT, analyzer);
        Query query = parser.parse(queryStr);
        QueryTermVector queryTermVector = new QueryTermVector(line, analyzer);
        String[] terms = queryTermVector.getTerms();
        similarity = (TFIDFSimilarity) searcher.getSimilarity();

        TopDocs hits = searcher.search(query, hitsCount);
        System.out.println("query" + " : " + query.toString());
        System.out.println(hits.totalHits + " total matching documents");

        // Query Expansion with LDA
        QueryExpansion queryExpansion;
        queryExpansion = new QueryExpansion(analyzer, searcher, similarity, properties);
        query = queryExpansion.expandQueryLDA(queryStr, hits, properties);
        expandedQuery = query.toString("contents");
        logger.info("Expanded Query: " + query);
        System.out.println("Expanded Query: " + query);
        hits = searcher.search(query, hitsCount);
        logger.info(hits.totalHits + " total matching documents");
        Vector<TermQuery> expandedQueryTerms = queryExpansion.getExpandedTerms();
        generateOutput(hits, expandedQueryTerms, query_num, writer, termCount, outCount, searcher, similarity,
                idxReader);

        writer.flush();
        if (remoteQuery.length() > 0)
            break; // onetime call.
    }
    writer.close();

}

From source file:in.student.project.queryexpansion.SearchFilesRocchio.java

License:Apache License

/**
 *
 * args/*from  w ww  .jav a2s.co  m*/
 * 0 - properties_file
 * 
 * # index-dir
 * index-dir = index
 * 
 * # query-file
 * query-file = queries.txt
 * 
 * # out-count
 * docs-per-query = 1000
 * 
 * # query-term-count (0 - any count) (ex. queries with only 3 terms - 3 )
 * query-terms-count = 0
 * 
 * # out-file - name of the file where results will be written
 * out-file = search.result
 *
 * #query-expansion
 * For details see <code> QueryExpansion Constants</code> 
 * @throws Exception 
 *
 */
public static void main(String[] args) throws Exception {
    // Load Properties
    Properties properties = new Properties();
    properties.load(new FileInputStream(args[0]));
    String runTag = args[0];
    String remoteQuery = "";
    if (args.length > 1) {
        remoteQuery = args[1];
        expandedQuery = "";
    }

    properties.setProperty(Defs.RUN_TAG_FLD, runTag);
    String indexDir = properties.getProperty("index-dir");
    String queryFile = properties.getProperty("query-file");
    int termCount = Integer.valueOf(properties.getProperty("query-terms-count")).intValue();
    int outCount = Integer.valueOf(properties.getProperty("docs-per-query")).intValue();
    String queryFileName = properties.getProperty("query-file");
    String outFileName = runTag + properties.getProperty("out-file");
    String queryExpansionFlag = properties.getProperty(QueryExpansion.METHOD_FLD, "");

    IndexReader idxReader = DirectoryReader.open(FSDirectory.open(new File(indexDir)));
    IndexSearcher searcher = new IndexSearcher(idxReader);
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_48);
    BufferedWriter writer = new BufferedWriter(new FileWriter(new File(outFileName)));
    String query_num = null;
    BufferedReader in = new BufferedReader(new FileReader(queryFileName));
    TFIDFSimilarity similarity = null;
    int hitsCount = 50;

    while (true) {
        String line;
        line = in.readLine();
        try {
            if (line.length() == -1)
                break;
        } catch (Exception e) {
            return;
        }
        StringTokenizer tknzr = new StringTokenizer(line);
        query_num = tknzr.nextToken();
        line = line.substring(query_num.length()).trim();
        if (remoteQuery.length() > 0)
            line = remoteQuery;
        String queryStr = line;
        queryStr = QueryParser.escape(queryStr);
        QueryParser parser = new QueryParser(Version.LUCENE_48, Defs.FLD_TEXT, analyzer);
        Query query = parser.parse(queryStr);
        QueryTermVector queryTermVector = new QueryTermVector(line, analyzer);
        String[] terms = queryTermVector.getTerms();
        similarity = (TFIDFSimilarity) searcher.getSimilarity();

        TopDocs hits = searcher.search(query, hitsCount);
        System.out.println("query" + " : " + query.toString());
        System.out.println(hits.totalHits + " total matching documents");

        // Query Expansion with Rocchio algorithm
        if (queryExpansionFlag.equals(QueryExpansion.ROCCHIO_METHOD)) {
            QueryExpansion queryExpansion;
            queryExpansion = new QueryExpansion(analyzer, searcher, similarity, properties);
            query = queryExpansion.expandQuery(queryStr, hits, properties);
            expandedQuery = query.toString("contents");
            System.out.println("Expanded Query: " + query);
            hits = searcher.search(query, hitsCount);
            Vector<TermQuery> expandedQueryTerms = queryExpansion.getExpandedTerms();
            generateOutput(hits, expandedQueryTerms, query_num, writer, termCount, outCount, searcher,
                    similarity, idxReader);
        }

        writer.flush();
        if (remoteQuery.length() > 0)
            break; // onetime call.
    }
    writer.close();
}

From source file:org.apache.blur.lucene.search.AbstractWrapperQuery.java

License:Apache License

public Similarity getSimilarity(IndexSearcher searcher) {
    return searcher.getSimilarity();
}

From source file:org.apache.solr.search.function.IDFValueSource.java

License:Apache License

@Override
public DocValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
    IndexSearcher searcher = (IndexSearcher) context.get("searcher");
    Similarity sim = searcher.getSimilarity();
    // todo: we need docFreq that takes a BytesRef
    String strVal = ByteUtils.UTF8toUTF16(indexedBytes);
    int docfreq = searcher.docFreq(new Term(indexedField, strVal));
    float idf = sim.idf(docfreq, searcher.maxDoc());
    return new ConstDoubleDocValues(idf, this);
}

From source file:org.apache.solr.search.function.NormValueSource.java

License:Apache License

@Override
public DocValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
    IndexSearcher searcher = (IndexSearcher) context.get("searcher");
    final Similarity similarity = searcher.getSimilarity();
    final byte[] norms = readerContext.reader.norms(field);
    if (norms == null) {
        return new ConstDoubleDocValues(0.0, this);
    }/*w w  w  . jav  a 2s  . co m*/

    return new FloatDocValues(this) {
        @Override
        public float floatVal(int doc) {
            return similarity.decodeNormValue(norms[doc]);
        }
    };
}

From source file:org.elasticsearch.common.lucene.search.ExtendedIndexSearcher.java

License:Apache License

public ExtendedIndexSearcher(IndexSearcher searcher) {
    super(searcher.getIndexReader());
    setSimilarity(searcher.getSimilarity());
}

From source file:org.elasticsearch.index.search.child.ChildrenConstantScoreQuery.java

License:Apache License

@Override
public Weight createWeight(IndexSearcher searcher) throws IOException {
    SearchContext searchContext = SearchContext.current();
    BytesRefHash parentIds = new BytesRefHash(512, searchContext.pageCacheRecycler());
    ParentIdCollector collector = new ParentIdCollector(parentType, parentChildIndexFieldData, parentIds);
    final Query childQuery;
    if (rewrittenChildQuery == null) {
        childQuery = rewrittenChildQuery = searcher.rewrite(originalChildQuery);
    } else {// ww w. j  av a2 s. c  o m
        assert rewriteIndexReader == searcher.getIndexReader();
        childQuery = rewrittenChildQuery;
    }
    IndexSearcher indexSearcher = new IndexSearcher(searcher.getIndexReader());
    indexSearcher.setSimilarity(searcher.getSimilarity());
    indexSearcher.search(childQuery, collector);

    long remaining = parentIds.size();
    if (remaining == 0) {
        return Queries.newMatchNoDocsQuery().createWeight(searcher);
    }

    Filter shortCircuitFilter = null;
    if (remaining == 1) {
        BytesRef id = parentIds.get(0, new BytesRef());
        shortCircuitFilter = new TermFilter(
                new Term(UidFieldMapper.NAME, Uid.createUidAsBytes(parentType, id)));
    } else if (remaining <= shortCircuitParentDocSet) {
        shortCircuitFilter = new ParentIdsFilter(parentType, nonNestedDocsFilter, parentIds);
    }

    ParentWeight parentWeight = new ParentWeight(parentFilter, shortCircuitFilter, parentIds);
    searchContext.addReleasable(parentWeight);
    return parentWeight;
}