List of usage examples for org.apache.lucene.search IndexSearcher IndexSearcher
public IndexSearcher(IndexReaderContext context)
From source file:com.esri.gpt.server.assertion.index.AsnIndexAdapter.java
License:Apache License
/** * Loads the assertion previously cast for the active subject, predicate and user. * @param context the assertion operation context * @return the previously cast assertion (can be null) * @throws Exception if an exception occurs */// w w w. ja v a 2 s .c om public Assertion loadPreviousUserAssertion(AsnContext context) throws Exception { AsnOperation operation = context.getOperation(); String userKey = Val.chkStr(operation.getUserPart().getKey()); String username = Val.chkStr(operation.getUserPart().getName()); boolean isAnonymous = username.equalsIgnoreCase(AsnConstants.ANONYMOUS_USERNAME); if (!isAnonymous && (userKey.length() > 0)) { IndexReader reader = null; IndexSearcher searcher = null; try { reader = this.makeIndexReader(); searcher = new IndexSearcher(reader); this.loadPreviousUserAssertion(context, searcher); } finally { this.closeReader(reader); this.closeSearcher(searcher); } } return null; }
From source file:com.evoapps.lucene.SearchFiles.java
License:Apache License
/** Simple command-line based search demo. */ public ArrayList<Publication> search(String queryTerm) throws Exception { list.clear();/*w ww .j av a2s . com*/ String usage = "Usage:\tjava org.apache.lucene.demo.SearchFiles [-index dir] [-field f] [-repeat n] [-queries file] [-query string] [-raw] [-paging hitsPerPage]\n\nSee http://lucene.apache.org/core/4_1_0/demo/ for details."; /* if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) { System.out.println(usage); System.exit(0); }*/ String index = "index"; String field = "contents"; // String field = "Abstract"; String queries = null; int repeat = 0; boolean raw = false; String queryString = queryTerm; int hitsPerPage = 20; /*for(int i = 0;i < args.length;i++) { if ("-index".equals(args[i])) { index = args[i+1]; i++; } else if ("-field".equals(args[i])) { field = args[i+1]; i++; } else if ("-queries".equals(args[i])) { queries = args[i+1]; i++; } else if ("-query".equals(args[i])) { queryString = args[i+1]; i++; } else if ("-repeat".equals(args[i])) { repeat = Integer.parseInt(args[i+1]); i++; } else if ("-raw".equals(args[i])) { raw = true; } else if ("-paging".equals(args[i])) { hitsPerPage = Integer.parseInt(args[i+1]); if (hitsPerPage <= 0) { System.err.println("There must be at least 1 hit per page."); System.exit(1); } i++; } } */ // index = "/home/subhash/LuceneFolder/Indexer"; index = "/home/subhash/Dropbox/LuceneFolder/IndexNewData"; IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(index))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40); BufferedReader in = null; if (queries != null) { in = new BufferedReader(new InputStreamReader(new FileInputStream(queries), "UTF-8")); } else { in = new BufferedReader(new InputStreamReader(System.in, "UTF-8")); } QueryParser parser = new QueryParser(Version.LUCENE_40, field, analyzer); while (true) { if (queries == null && queryString == null) { // prompt the user System.out.println("Enter query: "); } String line = queryString != null ? queryString : in.readLine(); if (line == null || line.length() == -1) { break; } line = line.trim(); if (line.length() == 0) { break; } Query query = parser.parse(line); System.out.println("Searching for: " + query.toString(field)); if (repeat > 0) { // repeat & time as benchmark Date start = new Date(); for (int i = 0; i < repeat; i++) { searcher.search(query, null, 100); } Date end = new Date(); System.out.println("Time: " + (end.getTime() - start.getTime()) + "ms"); } ArrayList<Publication> list = doPagingSearch(in, searcher, query, hitsPerPage, raw, queries == null && queryString == null); if (queryString != null) { break; } } reader.close(); return list; }
From source file:com.example.analyzer.server.database.DbFullTextIndex.java
License:Open Source License
public void search(String searchText, DbFullTextCollector collector) { try {/*from w ww . ja va2s . c om*/ long beginTime = System.currentTimeMillis(); IndexReader reader = IndexReader.open(ramDirectory, true); Searcher searcher = new IndexSearcher(reader); collector.setSearcher(searcher); QueryParser queryParser = new QueryParser(Version.LUCENE_30, VALUE, new StandardAnalyzer(Version.LUCENE_30)); Query query = queryParser.parse(searchText); searcher.search(query, collector); searcher.close(); long endTime = System.currentTimeMillis(); long elapsedTime = endTime - beginTime; System.out.println("executed query in " + elapsedTime + " ms"); } catch (CorruptIndexException e) { throw new RuntimeException(e); } catch (IOException e) { throw new RuntimeException(e); } catch (ParseException e) { throw new RuntimeException(e); } }
From source file:com.example.search.SearchFiles.java
License:Apache License
public static ArrayList<SearchResult> find(String input, int startPage) throws Exception { //String index = ".\\WebContent\\index";//D:\Users\admin\workspace\TestJsp\WebContent\index String index = "./index"; // System.out.println("index:"+index); String field = "content"; String queries = null;/*from w w w.j a v a2 s. com*/ int repeat = 0; boolean raw = false; String queryString = null; int hitsPerPage = HitsPerPage; IndexReader reader = IndexReader.open(FSDirectory.open(new File(index))); IndexSearcher searcher = new IndexSearcher(reader); // Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31); Analyzer analyzer = new ICTCLASAnalyzer(); BufferedReader in = null; // in = new BufferedReader(input); int pos; ArrayList<SearchResult> result = new ArrayList<SearchResult>(); if (((pos = input.indexOf('\r')) != -1) || (pos = input.indexOf('\n')) != -1) input = input.substring(0, pos); QueryParser parser = new QueryParser(Version.LUCENE_31, field, analyzer); // while (true) { // if (queries == null && queryString == null) { // prompt the user // System.out.println("Enter query: "); // } String line = queryString != null ? queryString : input; if (line == null || line.length() == -1) { searcher.close(); reader.close(); return result; } line = line.trim(); if (line.length() == 0) { searcher.close(); reader.close(); return result; } Query query = parser.parse(line); System.out.println("Searching for: " + query.toString(field)); if (repeat > 0) { // repeat & time as benchmark Date start = new Date(); for (int i = 0; i < repeat; i++) { searcher.search(query, null, 100); } Date end = new Date(); System.out.println("Time: " + (end.getTime() - start.getTime()) + "ms"); } doPagingSearch(in, searcher, query, hitsPerPage, raw, queries == null && queryString == null, result, startPage); searcher.close(); reader.close(); return result; }
From source file:com.facebook.presto.example.LuceneRecordCursor.java
License:Apache License
public LuceneRecordCursor(List<LuceneColumnHandle> columnHandles) throws ParseException { this.columnHandles = columnHandles; IndexReader reader = null;//from w w w . j a v a 2 s . c o m try { reader = DirectoryReader .open(FSDirectory.open(Paths.get("/home/liyong/workspace-neno/lucenetest/index"))); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } searcher = new IndexSearcher(reader); this.NumDoc = reader.maxDoc(); fieldToColumnIndex = new int[columnHandles.size()]; for (int i = 0; i < columnHandles.size(); i++) { LuceneColumnHandle columnHandle = columnHandles.get(i); fieldToColumnIndex[i] = columnHandle.getOrdinalPosition(); } }
From source file:com.facebook.presto.operator.HashAggregationOperator.java
License:Apache License
private Map<String, Long> GetGroupByResult() throws IOException { IndexReader reader = null;/* w w w . java 2s.c o m*/ Map<String, Long> returnMap = new HashMap<String, Long>(); try { reader = DirectoryReader .open(FSDirectory.open(Paths.get("/home/liyong/workspace-neno/lucenetest/index"))); } catch (IOException e) { e.printStackTrace(); } IndexSearcher searcher = new IndexSearcher(reader); Terms terms = MultiFields.getTerms(searcher.getIndexReader(), "orderpriority"); TermsEnum te = terms.iterator(); while (te.next() != null) { String name = te.term().utf8ToString(); int count = te.docFreq(); returnMap.put(name, Long.valueOf(count)); } return returnMap; }
From source file:com.facebook.presto.operator.ScanFilterAndProjectOperator.java
License:Apache License
private Map<String, Long> getCountResult() throws IOException { IndexReader reader = null;/*from w w w.java2s . co m*/ Map<String, Long> returnMap = new HashMap<String, Long>(); try { reader = DirectoryReader .open(FSDirectory.open(Paths.get("/home/liyong/workspace-neno/lucenetest/index"))); } catch (IOException e) { e.printStackTrace(); } IndexSearcher searcher = new IndexSearcher(reader); Terms terms = MultiFields.getTerms(searcher.getIndexReader(), "orderpriority"); TermsEnum te = terms.iterator(); while (te.next() != null) { String name = te.term().utf8ToString(); int count = te.docFreq(); returnMap.put(name, Long.valueOf(count)); } return returnMap; }
From source file:com.flaptor.hounder.indexer.IndexManager.java
License:Apache License
/** * Performs the deletes and remove duplicates from the index. *//*w w w. java 2 s . c o m*/ private synchronized void applyDeletes() { IndexReader reader = null; IndexSearcher searcher = null; try { reader = IndexReader.open(indexDirectory); Set<Integer> documentsToDelete = new HashSet<Integer>(); Enumeration keysEnum = lastOperation.keys(); //First we collect the lucene ids of document to be deleted. while (keysEnum.hasMoreElements()) { searcher = new IndexSearcher(reader); String key = (String) keysEnum.nextElement(); // if the last operation is a delete lastAddition will be 0 and we'll find no match in the index. //This way, all the documents with that DocumentId will be erased. String lastAddition = String.valueOf((Long) (lastOperation.get(key))); if (logger.isEnabledFor(Level.DEBUG)) { logger.debug("Applying deletes: searching " + docIdName + " = [" + key + "]"); } ScorelessHitCollector collector = new HashSetScorelessHitCollector(); searcher.search(new TermQuery(new Term(docIdName, key)), collector); Set<Integer> docIds = collector.getMatchingDocuments(); if (logger.isEnabledFor(Level.DEBUG)) { logger.debug("Applying deletes: found matches: " + docIds.size()); } for (Integer docId : docIds) { Document d = searcher.doc(docId); String addId = d.get("AddId"); if (!lastAddition.equals(addId)) { if (logger.isEnabledFor(Level.DEBUG)) { logger.debug("Applying deletes: deleting AddId:" + addId); } documentsToDelete.add(docId); } } } //Now we have all lucene's ids of documents to be deleted and we can //proceed with the actual deletion. for (Integer i : documentsToDelete) { reader.deleteDocument(i); } } catch (IOException e) { logger.fatal("applyDeletes: IOException caught.", e); throw new RuntimeException(e); } finally { if (searcher != null) { try { searcher.close(); } catch (Exception e) { String s = "applyDeletes: Couldn't close searcher, nothing I can do about it" + e; logger.error(s); throw new IllegalStateException(s); } } if (reader != null) { try { reader.close(); } catch (Exception e) { logger.warn("Couldn't close reader, nothing I can do about it", e); } } } lastOperation.clear(); }
From source file:com.flaptor.hounder.indexer.LuceneUnicodeTest.java
License:Apache License
@TestInfo(testType = TestInfo.TestType.UNIT) public void testIndexedContent() { try {/*w ww . ja va 2 s .com*/ String testString = "otorrinolaring\u00f3logo"; logger.debug("Using test string: " + testString); Document doc = new Document(); doc.add(new Field("field1", testString, Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(doc); writer.optimize(); writer.close(); IndexReader reader = IndexReader.open(dir); IndexSearcher searcher = new IndexSearcher(reader); int docId = searcher.search(new TermQuery(new Term("field1", testString)), null, 10).scoreDocs[0].doc; Document doc2 = searcher.doc(docId); String recoveredString = doc2.get("field1"); logger.debug("Recovered String: " + recoveredString); assertTrue("Strings do not match", testString.equals(recoveredString)); } catch (Exception e) { logger.error("Exception caught:" + e); assertTrue("exception", false); } }
From source file:com.flaptor.hounder.searcher.spell.SpellChecker.java
License:Apache License
/** * Suggest similar words (restricted or not to a field of a user index) * @param word String the word you want a spell check done on * @param num_sug int the number of suggest words * @param ir the indexReader of the user index (can be null see field param) * @param field String the field of the user index: if field is not null, the suggested * words are restricted to the words present in this field. * @param morePopular boolean return only the suggest words that are more frequent than the searched word * (only if restricted mode = (indexReader!=null and field!=null) * @throws IOException//www. j a va2 s. c om * @return String[] the sorted list of the suggest words with this 2 criteria: * first criteria: the edit distance, second criteria (only if restricted mode): the popularity * of the suggest words in the field of the user index */ public String[] suggestSimilar(String word, int num_sug, IndexReader ir, String field, boolean morePopular) throws IOException { float minScore = min; final TRStringDistance sd = new TRStringDistance(word); final int lengthWord = word.length(); final int goalFreq = (morePopular && ir != null) ? ir.docFreq(new Term(field, word)) : 0; if (!morePopular && goalFreq > 0) { return new String[] { word }; // return the word if it exist in the index and i don't want a more popular word } BooleanQuery query = new BooleanQuery(); String[] grams; String key; for (int ng = getMin(lengthWord); ng <= getMax(lengthWord); ng++) { key = "gram" + ng; // form key grams = formGrams(word, ng); // form word into ngrams (allow dups too) if (grams.length == 0) { continue; // hmm } if (bStart > 0) { // should we boost prefixes? add(query, "start" + ng, grams[0], bStart); // matches start of word } if (bEnd > 0) { // should we boost suffixes add(query, "end" + ng, grams[grams.length - 1], bEnd); // matches end of word } for (int i = 0; i < grams.length; i++) { add(query, key, grams[i]); } } IndexSearcher searcher = new IndexSearcher(this.spellindex); TopDocCollector collector = new TopDocCollector(10 * num_sug); // go thru more than 'maxr' matches in case the distance filter triggers searcher.search(query, collector); ScoreDoc[] scoreDocs = collector.topDocs().scoreDocs; SuggestWordQueue sugqueue = new SuggestWordQueue(num_sug); SuggestWord sugword = new SuggestWord(); for (int i = 0; i < scoreDocs.length; i++) { Document doc = searcher.doc(i); sugword.string = doc.get(F_WORD); // get orig word) if (sugword.string.equals(word)) { continue; // don't suggest a word for itself, that would be silly } //edit distance/normalize with the min word length sugword.score = doc.getBoost() * (1.0f - ((float) sd.getDistance(sugword.string) / Math.min(sugword.string.length(), lengthWord))); if (sugword.score < minScore) { continue; } if (ir != null) { // use the user index sugword.freq = ir.docFreq(new Term(field, sugword.string)); // freq in the index if ((morePopular && goalFreq > sugword.freq) || sugword.freq < 1) { // don't suggest a word that is not present in the field continue; } } sugqueue.insert(sugword); if (sugqueue.size() == num_sug) { //if queue full , maintain the min score minScore = ((SuggestWord) sugqueue.top()).score; } sugword = new SuggestWord(); } // convert to array string String[] list = new String[sugqueue.size()]; for (int i = sugqueue.size() - 1; i >= 0; i--) { list[i] = ((SuggestWord) sugqueue.pop()).string; } searcher.close(); return list; }