List of usage examples for org.apache.lucene.search IndexSearcher getSimilarity
public Similarity getSimilarity()
From source file:com.gentics.cr.lucene.indexaccessor.DefaultIndexAccessor.java
License:Apache License
/** * Fetches a double checked Searcher that has been checked for the presence of a reopen file * Note that it may occure that a prioritized Searcher may be reopened twice. * @param indexLocation //from w w w . j a v a 2 s .co m * @return * @throws IOException */ public IndexSearcher getPrioritizedSearcher() throws IOException { boolean reopened = this.numReopening > 0; IndexSearcher searcher = (IndexSearcher) getSearcher(); if (reopened) { //REOPEN SEARCHER AS IT WAS PRIORITIZED synchronized (DefaultIndexAccessor.this) { IndexReader reader = searcher.getIndexReader(); IndexSearcher oldSearcher = searcher; IndexReader newReader = reader.reopen(); if (newReader != reader) { searcher = new IndexSearcher(newReader); searcher.setSimilarity(oldSearcher.getSimilarity()); oldSearcher.getIndexReader().close(); for (Map.Entry<Similarity, IndexSearcher> e : cachedSearchers.entrySet()) { if (e.getValue() == oldSearcher) { cachedSearchers.put(e.getKey(), searcher); } } } } } return searcher; }
From source file:com.gentics.cr.lucene.indexaccessor.DefaultIndexAccessor.java
License:Apache License
/** * Reopens all of the Searchers in the Searcher cache. This method is invoked * in a synchronized context.//from ww w.j av a 2 s . c om */ private void reopenCachedSearchers() { LOGGER.debug( "reopening cached searchers (" + cachedSearchers.size() + "):" + Thread.currentThread().getId()); Set<Similarity> keys = cachedSearchers.keySet(); for (Similarity key : keys) { IndexSearcher searcher = cachedSearchers.get(key); try { IndexReader oldReader = searcher.getIndexReader(); IndexSearcher oldSearcher = searcher; IndexReader newReader = oldReader.reopen(); if (newReader != oldReader) { cachedSearchers.remove(key); searcher = new IndexSearcher(newReader); searcher.setSimilarity(oldSearcher.getSimilarity()); oldSearcher.getIndexReader().close(); cachedSearchers.put(key, searcher); } } catch (IOException e) { LOGGER.error("error reopening cached Searcher", e); } } }
From source file:com.xiaomi.linden.lucene.query.flexiblequery.FlexibleWeight.java
License:Apache License
public FlexibleWeight(FlexibleQuery query, IndexSearcher searcher) throws IOException { this.query = query; this.similarity = searcher.getSimilarity(); final IndexReaderContext context = searcher.getTopReaderContext(); int[] maxDocFreqs = null; long[] maxTotalTermFreqs = null; Map<Term, TermContext> builtTermMap = new HashMap<>(); if (query.enableGlobalIDF()) { FlexibleQuery.FlexibleTerm[][] globalTerms = query.getGlobalTerms(); TermContext[][] globalStates = new TermContext[globalTerms.length][]; for (int i = 0; i < globalTerms.length; ++i) { globalStates[i] = new TermContext[globalTerms[i].length]; for (int j = 0; j < globalTerms[i].length; ++j) { Term term = globalTerms[i][j].term; TermContext termContext = builtTermMap.get(term); if (termContext != null) { globalStates[i][j] = termContext; } else { globalStates[i][j] = TermContext.build(context, globalTerms[i][j].term); builtTermMap.put(term, globalStates[i][j]); }//ww w .jav a 2 s .c om } } maxDocFreqs = new int[globalTerms[0].length]; maxTotalTermFreqs = new long[globalTerms[0].length]; int fieldLength = globalTerms.length; int termLength = globalTerms[0].length; for (int i = 0; i < termLength; ++i) { int maxDocFreq = 0; long maxTotalTermFreq = 0; for (int j = 0; j < fieldLength; ++j) { maxDocFreq = Math.max(globalStates[j][i].docFreq(), maxDocFreq); maxTotalTermFreq = Math.max(globalStates[j][i].totalTermFreq(), maxTotalTermFreq); } maxDocFreqs[i] = maxDocFreq; maxTotalTermFreqs[i] = maxTotalTermFreq; } } FlexibleQuery.FlexibleTerm[][] terms = query.getTerms(); TermContext[][] states = new TermContext[terms.length][]; for (int i = 0; i < terms.length; ++i) { states[i] = new TermContext[terms[i].length]; for (int j = 0; j < terms[i].length; ++j) { Term term = terms[i][j].term; TermContext termContext = builtTermMap.get(term); if (termContext != null) { states[i][j] = termContext; } else { states[i][j] = TermContext.build(context, terms[i][j].term); builtTermMap.put(term, states[i][j]); } } } termStatsMatrix = new TermStats[terms.length][]; for (int i = 0; i < terms.length; ++i) { termStatsMatrix[i] = new TermStats[terms[i].length]; for (int j = 0; j < terms[i].length; ++j) { FlexibleQuery.FlexibleTerm term = terms[i][j]; TermContext state = states[i][j]; TermStatistics termStats; if (query.enableGlobalIDF()) { termStats = new TermStatistics(term.term.bytes(), maxDocFreqs[j], maxTotalTermFreqs[j]); } else { termStats = searcher.termStatistics(term.term, state); } Similarity.SimWeight stats = similarity.computeWeight(term.boost, searcher.collectionStatistics(term.term.field()), termStats); TermStats termStatsInfo = new TermStats(); termStatsInfo.stats = stats; termStatsInfo.term = term.term; termStatsInfo.termContext = state; termStatsMatrix[i][j] = termStatsInfo; } } }
From source file:in.student.project.queryexpansion.SearchFilesLDA.java
License:Apache License
/** * * args/* w w w . j a va 2s . c om*/ * 0 - properties_file * * # index-dir * index-dir = index * * # query-file * query-file = queries.txt * * # out-count * docs-per-query = 1000 * * # query-term-count (0 - any count) (ex. queries with only 3 terms - 3 ) * query-terms-count = 0 * * # out-file - name of the file where results will be written * out-file = search.result * * #query-expansion * For details see <code> QueryExpansion Constants</code> * @throws Exception * */ public static void main(String[] args) throws Exception { // Load Properties Properties properties = new Properties(); properties.load(new FileInputStream(args[0])); String runTag = args[0]; String remoteQuery = ""; if (args.length > 1) { remoteQuery = args[1]; expandedQuery = ""; } properties.setProperty(Defs.RUN_TAG_FLD, runTag); String indexDir = properties.getProperty("index-dir"); String queryFile = properties.getProperty("query-file"); int termCount = Integer.valueOf(properties.getProperty("query-terms-count")).intValue(); int outCount = Integer.valueOf(properties.getProperty("docs-per-query")).intValue(); String queryFileName = properties.getProperty("query-file"); String outFileName = runTag + properties.getProperty("out-file"); String queryExpansionFlag = properties.getProperty(QueryExpansion.METHOD_FLD, ""); IndexReader idxReader = DirectoryReader.open(FSDirectory.open(new File(indexDir))); IndexSearcher searcher = new IndexSearcher(idxReader); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_48); BufferedWriter writer = new BufferedWriter(new FileWriter(new File(outFileName))); String query_num = null; BufferedReader in = new BufferedReader(new FileReader(queryFileName)); TFIDFSimilarity similarity = null; int hitsCount = 50; while (true) { String line; line = in.readLine(); try { if (line.length() == -1) break; } catch (Exception e) { return; } StringTokenizer tknzr = new StringTokenizer(line); query_num = tknzr.nextToken(); line = line.substring(query_num.length()).trim(); if (remoteQuery.length() > 0) line = remoteQuery; String queryStr = line; queryStr = QueryParser.escape(queryStr); QueryParser parser = new QueryParser(Version.LUCENE_48, Defs.FLD_TEXT, analyzer); Query query = parser.parse(queryStr); QueryTermVector queryTermVector = new QueryTermVector(line, analyzer); String[] terms = queryTermVector.getTerms(); similarity = (TFIDFSimilarity) searcher.getSimilarity(); TopDocs hits = searcher.search(query, hitsCount); System.out.println("query" + " : " + query.toString()); System.out.println(hits.totalHits + " total matching documents"); // Query Expansion with LDA QueryExpansion queryExpansion; queryExpansion = new QueryExpansion(analyzer, searcher, similarity, properties); query = queryExpansion.expandQueryLDA(queryStr, hits, properties); expandedQuery = query.toString("contents"); logger.info("Expanded Query: " + query); System.out.println("Expanded Query: " + query); hits = searcher.search(query, hitsCount); logger.info(hits.totalHits + " total matching documents"); Vector<TermQuery> expandedQueryTerms = queryExpansion.getExpandedTerms(); generateOutput(hits, expandedQueryTerms, query_num, writer, termCount, outCount, searcher, similarity, idxReader); writer.flush(); if (remoteQuery.length() > 0) break; // onetime call. } writer.close(); }
From source file:in.student.project.queryexpansion.SearchFilesRocchio.java
License:Apache License
/** * * args/*from w ww .jav a2s.co m*/ * 0 - properties_file * * # index-dir * index-dir = index * * # query-file * query-file = queries.txt * * # out-count * docs-per-query = 1000 * * # query-term-count (0 - any count) (ex. queries with only 3 terms - 3 ) * query-terms-count = 0 * * # out-file - name of the file where results will be written * out-file = search.result * * #query-expansion * For details see <code> QueryExpansion Constants</code> * @throws Exception * */ public static void main(String[] args) throws Exception { // Load Properties Properties properties = new Properties(); properties.load(new FileInputStream(args[0])); String runTag = args[0]; String remoteQuery = ""; if (args.length > 1) { remoteQuery = args[1]; expandedQuery = ""; } properties.setProperty(Defs.RUN_TAG_FLD, runTag); String indexDir = properties.getProperty("index-dir"); String queryFile = properties.getProperty("query-file"); int termCount = Integer.valueOf(properties.getProperty("query-terms-count")).intValue(); int outCount = Integer.valueOf(properties.getProperty("docs-per-query")).intValue(); String queryFileName = properties.getProperty("query-file"); String outFileName = runTag + properties.getProperty("out-file"); String queryExpansionFlag = properties.getProperty(QueryExpansion.METHOD_FLD, ""); IndexReader idxReader = DirectoryReader.open(FSDirectory.open(new File(indexDir))); IndexSearcher searcher = new IndexSearcher(idxReader); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_48); BufferedWriter writer = new BufferedWriter(new FileWriter(new File(outFileName))); String query_num = null; BufferedReader in = new BufferedReader(new FileReader(queryFileName)); TFIDFSimilarity similarity = null; int hitsCount = 50; while (true) { String line; line = in.readLine(); try { if (line.length() == -1) break; } catch (Exception e) { return; } StringTokenizer tknzr = new StringTokenizer(line); query_num = tknzr.nextToken(); line = line.substring(query_num.length()).trim(); if (remoteQuery.length() > 0) line = remoteQuery; String queryStr = line; queryStr = QueryParser.escape(queryStr); QueryParser parser = new QueryParser(Version.LUCENE_48, Defs.FLD_TEXT, analyzer); Query query = parser.parse(queryStr); QueryTermVector queryTermVector = new QueryTermVector(line, analyzer); String[] terms = queryTermVector.getTerms(); similarity = (TFIDFSimilarity) searcher.getSimilarity(); TopDocs hits = searcher.search(query, hitsCount); System.out.println("query" + " : " + query.toString()); System.out.println(hits.totalHits + " total matching documents"); // Query Expansion with Rocchio algorithm if (queryExpansionFlag.equals(QueryExpansion.ROCCHIO_METHOD)) { QueryExpansion queryExpansion; queryExpansion = new QueryExpansion(analyzer, searcher, similarity, properties); query = queryExpansion.expandQuery(queryStr, hits, properties); expandedQuery = query.toString("contents"); System.out.println("Expanded Query: " + query); hits = searcher.search(query, hitsCount); Vector<TermQuery> expandedQueryTerms = queryExpansion.getExpandedTerms(); generateOutput(hits, expandedQueryTerms, query_num, writer, termCount, outCount, searcher, similarity, idxReader); } writer.flush(); if (remoteQuery.length() > 0) break; // onetime call. } writer.close(); }
From source file:org.apache.blur.lucene.search.AbstractWrapperQuery.java
License:Apache License
public Similarity getSimilarity(IndexSearcher searcher) { return searcher.getSimilarity(); }
From source file:org.apache.solr.search.function.IDFValueSource.java
License:Apache License
@Override public DocValues getValues(Map context, AtomicReaderContext readerContext) throws IOException { IndexSearcher searcher = (IndexSearcher) context.get("searcher"); Similarity sim = searcher.getSimilarity(); // todo: we need docFreq that takes a BytesRef String strVal = ByteUtils.UTF8toUTF16(indexedBytes); int docfreq = searcher.docFreq(new Term(indexedField, strVal)); float idf = sim.idf(docfreq, searcher.maxDoc()); return new ConstDoubleDocValues(idf, this); }
From source file:org.apache.solr.search.function.NormValueSource.java
License:Apache License
@Override public DocValues getValues(Map context, AtomicReaderContext readerContext) throws IOException { IndexSearcher searcher = (IndexSearcher) context.get("searcher"); final Similarity similarity = searcher.getSimilarity(); final byte[] norms = readerContext.reader.norms(field); if (norms == null) { return new ConstDoubleDocValues(0.0, this); }/*w w w . jav a 2s . co m*/ return new FloatDocValues(this) { @Override public float floatVal(int doc) { return similarity.decodeNormValue(norms[doc]); } }; }
From source file:org.elasticsearch.common.lucene.search.ExtendedIndexSearcher.java
License:Apache License
public ExtendedIndexSearcher(IndexSearcher searcher) { super(searcher.getIndexReader()); setSimilarity(searcher.getSimilarity()); }
From source file:org.elasticsearch.index.search.child.ChildrenConstantScoreQuery.java
License:Apache License
@Override public Weight createWeight(IndexSearcher searcher) throws IOException { SearchContext searchContext = SearchContext.current(); BytesRefHash parentIds = new BytesRefHash(512, searchContext.pageCacheRecycler()); ParentIdCollector collector = new ParentIdCollector(parentType, parentChildIndexFieldData, parentIds); final Query childQuery; if (rewrittenChildQuery == null) { childQuery = rewrittenChildQuery = searcher.rewrite(originalChildQuery); } else {// ww w. j av a2 s. c o m assert rewriteIndexReader == searcher.getIndexReader(); childQuery = rewrittenChildQuery; } IndexSearcher indexSearcher = new IndexSearcher(searcher.getIndexReader()); indexSearcher.setSimilarity(searcher.getSimilarity()); indexSearcher.search(childQuery, collector); long remaining = parentIds.size(); if (remaining == 0) { return Queries.newMatchNoDocsQuery().createWeight(searcher); } Filter shortCircuitFilter = null; if (remaining == 1) { BytesRef id = parentIds.get(0, new BytesRef()); shortCircuitFilter = new TermFilter( new Term(UidFieldMapper.NAME, Uid.createUidAsBytes(parentType, id))); } else if (remaining <= shortCircuitParentDocSet) { shortCircuitFilter = new ParentIdsFilter(parentType, nonNestedDocsFilter, parentIds); } ParentWeight parentWeight = new ParentWeight(parentFilter, shortCircuitFilter, parentIds); searchContext.addReleasable(parentWeight); return parentWeight; }