List of usage examples for org.apache.lucene.index DirectoryReader open
public static DirectoryReader open(final IndexCommit commit) throws IOException
From source file:br.com.crawlerspring.model.Searcher.java
public List<br.com.crawlerspring.model.Document> parametrizeDocuments(String parameters) throws Exception { List<br.com.crawlerspring.model.Document> parametrizedDocuments = new ArrayList<br.com.crawlerspring.model.Document>(); RegexQuery q = new RegexQuery(new Term("title", ".*" + parameters + ".*")); int hitsPerPage = 10; IndexReader reader = DirectoryReader.open(index); IndexSearcher searcher = new IndexSearcher(reader); TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true); searcher.search(q, collector);/*from w ww .j a v a2s .c o m*/ ScoreDoc[] hits = collector.topDocs().scoreDocs; for (int cont = 0; cont < hits.length; ++cont) { br.com.crawlerspring.model.Document document = new br.com.crawlerspring.model.Document(); int docId = hits[cont].doc; org.apache.lucene.document.Document luceneDocument = searcher.doc(docId); document.setTitle(luceneDocument.get("title")); document.setContent(luceneDocument.get("content")); parametrizedDocuments.add(document); } return parametrizedDocuments; }
From source file:br.pucminas.ri.jsearch.queryexpansion.RocchioQueryExpansion.java
License:Open Source License
private List<Entry<String, Float>> getTermScoreList(Directory directory) throws CorruptIndexException, IOException { Map<String, Float> termScoreMap = new HashMap<>(); ConcreteTFIDFSimilarity sim = new ConcreteTFIDFSimilarity(); try (IndexReader idxReader = DirectoryReader.open(directory)) { idxReader.leaves().stream().map((leaf) -> leaf.reader()).forEach((reader) -> { try { Terms terms = reader.terms(Constants.DOC_CONTENT); TermsEnum termsEnum = terms.iterator(); PostingsEnum postings = null; int docsNum = idxReader.numDocs(); BytesRef text;/*from www . j a v a2 s . co m*/ while ((text = termsEnum.next()) != null) { postings = termsEnum.postings(postings); while (postings.nextDoc() != PostingsEnum.NO_MORE_DOCS) { int freq = postings.freq(); float tf = sim.tf(freq); float idf = sim.idf(termsEnum.docFreq(), indexReader.numDocs()); termScoreMap.put(text.utf8ToString(), BETA * (tf * idf)); } } } catch (IOException ex) { Logger.getLogger(RocchioQueryExpansion.class.getName()).log(Level.SEVERE, null, ex); } finally { try { idxReader.close(); } catch (IOException ex) { Logger.getLogger(RocchioQueryExpansion.class.getName()).log(Level.SEVERE, null, ex); } } }); } return new ArrayList<>(termScoreMap.entrySet()); }
From source file:br.pucminas.ri.jsearch.queryexpansion.RocchioQueryExpansion.java
License:Open Source License
private float getScore(Directory directory, String term) throws CorruptIndexException, IOException { try (IndexReader idxReader = DirectoryReader.open(directory)) { ConcreteTFIDFSimilarity sim = new ConcreteTFIDFSimilarity(); for (LeafReaderContext context : idxReader.leaves()) { LeafReader reader = context.reader(); try { Terms terms = reader.terms(Constants.DOC_CONTENT); TermsEnum termsEnum = terms.iterator(); PostingsEnum postings = null; BytesRef text;//from w ww .j a v a2 s. co m while ((text = termsEnum.next()) != null) { postings = termsEnum.postings(postings); if (text.utf8ToString().equalsIgnoreCase(term)) { while (postings.nextDoc() != PostingsEnum.NO_MORE_DOCS) { int freq = postings.freq(); float tf = sim.tf(freq); float idf = sim.idf(termsEnum.docFreq(), indexReader.numDocs()); return tf * idf; } } } } catch (IOException ex) { Logger.getLogger(RocchioQueryExpansion.class.getName()).log(Level.SEVERE, null, ex); } } } return 0; }
From source file:br.ufmt.harmonizacao.implementer.PatenteeSearcher.java
public void setPath(String path) { try {//w w w . j ava 2s . c o m this.path = path + dirName; System.out.println(this.path); dir = new SimpleFSDirectory(new File(this.path)); reader = DirectoryReader.open(dir); searcher = new IndexSearcher(reader); } catch (IOException ex) { Logger.getLogger(PatenteeSearcher.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:br.ufmt.harmonizacao.implementer.StandardSearcher.java
public void setPath(String path) { try {/*from w ww. jav a 2 s . c om*/ this.path = path + dirName; System.out.println(this.path); dir = new SimpleFSDirectory(new File(this.path)); reader = DirectoryReader.open(dir); searcher = new IndexSearcher(reader); } catch (IOException ex) { Logger.getLogger(PatenteeSearcher.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:buscador.SearchFiles.java
License:Apache License
/** Simple command-line based search demo. */ public static void main(String[] args) throws Exception { String usage = "Usage:\tjava org.apache.lucene.demo.SearchFiles [-index dir] [-field f] [-repeat n] [-queries file] [-query string] [-raw] [-paging hitsPerPage]\n\nSee http://lucene.apache.org/core/4_1_0/demo/ for details."; if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) { System.out.println(usage); System.exit(0);// ww w . jav a2 s . c om } String index = "Zaguan1"; String[] fields = { "title", "description", "identifier", "date", "creator" }; BooleanClause.Occur[] flags = { BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD }; String queries = null; int repeat = 0; boolean raw = false; String queryString = null; int hitsPerPage = 10; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { index = args[i + 1]; i++; } else if ("-queries".equals(args[i])) { queries = args[i + 1]; i++; } else if ("-query".equals(args[i])) { queryString = args[i + 1]; i++; } else if ("-repeat".equals(args[i])) { repeat = Integer.parseInt(args[i + 1]); i++; } else if ("-raw".equals(args[i])) { raw = true; } else if ("-paging".equals(args[i])) { hitsPerPage = Integer.parseInt(args[i + 1]); if (hitsPerPage <= 0) { System.err.println("There must be at least 1 hit per page."); System.exit(1); } i++; } } IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(index))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new SpanishAnalyzer(Version.LATEST); BufferedReader in = null; if (queries != null) { in = new BufferedReader(new InputStreamReader(new FileInputStream(queries), "UTF-8")); } else { in = new BufferedReader(new InputStreamReader(System.in, "UTF-8")); } while (true) { if (queries == null && queryString == null) { // prompt the user System.out.println("Enter query: "); } String line = queryString != null ? queryString : in.readLine(); if (line == null || line.length() == -1) { break; } line = line.trim(); if (line.length() == 0) { break; } Query query = MultiFieldQueryParser.parse(line, fields, flags, analyzer); if (repeat > 0) { // repeat & time as benchmark Date start = new Date(); for (int i = 0; i < repeat; i++) { searcher.search(query, 100); } Date end = new Date(); System.out.println("Time: " + (end.getTime() - start.getTime()) + "ms"); } doPagingSearch(in, searcher, query, hitsPerPage, raw, queries == null && queryString == null); if (queryString != null) { break; } } reader.close(); }
From source file:bzh.terrevirtuelle.navisu.gazetteer.impl.lucene.GeoNameResolver.java
License:Apache License
private IndexReader createIndexReader(String indexerPath) throws IOException { File indexfile = new File(indexerPath); indexDir = FSDirectory.open(indexfile.toPath()); if (!DirectoryReader.indexExists(indexDir)) { LOG.log(Level.SEVERE, "No Lucene Index Dierctory Found, Invoke indexBuild() First !"); System.exit(1);/*from w w w.ja v a 2s. c o m*/ } return DirectoryReader.open(indexDir); }
From source file:ca.dracode.ais.indexer.FileSearcher.java
License:Open Source License
public FileSearcher() { IndexReader indexReader;/*from w ww .j a v a 2s .c o m*/ IndexSearcher indexSearcher = null; try { File indexDirFile = new File(FileIndexer.getRootStorageDir()); Directory tmpDir = FSDirectory.open(indexDirFile); indexReader = DirectoryReader.open(tmpDir); indexSearcher = new IndexSearcher(indexReader); } catch (IOException ioe) { Log.e(TAG, "Error", ioe); } this.indexSearcher = indexSearcher; }
From source file:ca.mcgill.cs.creco.logic.search.CategorySearch.java
License:Apache License
@Override public List<Category> queryCategories(String pQueryString) { List<Category> searchResult = new ArrayList<Category>(); try {/*from w w w .j av a2 s . c o m*/ DirectoryReader reader = DirectoryReader.open(aDirectory); IndexSearcher searcher = new IndexSearcher(reader); TopScoreDocCollector results = TopScoreDocCollector.create(MAX_NUM_RESULTS, true); // Search category names Query categoryNameQuery = new QueryParser(VERSION, CATEGORY_NAME, aAnalyzer).parse(pQueryString); searcher.search(categoryNameQuery, results); // Search flattened text (only product names for now) Query flattenedTextQuery = new QueryParser(VERSION, FLATTENED_TEXT, aAnalyzer).parse(pQueryString); searcher.search(flattenedTextQuery, results); for (ScoreDoc scoredResult : results.topDocs().scoreDocs) { Document doc = searcher.doc(scoredResult.doc); Category resultCategory = aDataStore.getCategory(doc.get(CATEGORY_ID)); if (!searchResult.contains(resultCategory) && resultCategory.getNumberOfProducts() > 0) { searchResult.add(resultCategory); } } } catch (IOException e) { LOG.error(e.getMessage()); } catch (ParseException e) { LOG.error(e.getMessage()); } return searchResult; }
From source file:calliope.search.AeseSearch.java
License:Open Source License
/** * Search the index for the given expression * @param expr the expression to be parsed * @param langCode the language of the expression and index * @param profile the hit profile (where to start from etc) * @return the result docs//from w w w. j a va 2s .c o m */ public static String searchIndex(String expr, String langCode, HitProfile profile) { StringBuilder sb = new StringBuilder(); try { Analyzer analyzer = AeseSearch.createAnalyzer(langCode); DirectoryReader reader = DirectoryReader.open(AeseSearch.index); if (reader != null) { IndexSearcher searcher = new IndexSearcher(reader); QueryParser qp = new QueryParser(Version.LUCENE_45, "text", analyzer); Query q = qp.parse(expr); TopDocs hits = searcher.search(q, AeseSearch.maxHits); ScoreDoc[] docs = hits.scoreDocs; for (int j = profile.from; j < profile.to && j < docs.length; j++) { Document doc = searcher.doc(docs[j].doc); String vid = doc.get(LuceneFields.VID); String docID = doc.get(LuceneFields.DOCID); Highlighter h = new Highlighter(new QueryScorer(q)); String text = getCorTexVersion(docID, vid); sb.append(formatDocID(docID)); sb.append(" "); sb.append(formatVersionID(vid)); sb.append(" "); String frag = h.getBestFragment(analyzer, "text", text); sb.append("<span class=\"found\">"); sb.append(frag); sb.append("</span>\n"); } profile.numHits = docs.length; } reader.close(); } catch (Exception e) { sb.append(e.getMessage()); } return sb.toString(); }