List of usage examples for org.apache.lucene.search IndexSearcher IndexSearcher
public IndexSearcher(IndexReaderContext context)
From source file:SearchFiles11.java
License:Apache License
/** Simple command-line based search demo. */ public static void main(String[] args) throws Exception { String usage = "Usage:\tjava org.apache.lucene.demo.SearchFiles [-index dir] [-field f] [-repeat n] [-queries file] [-query string] [-raw] [-paging hitsPerPage]\n\nSee http://lucene.apache.org/core/4_1_0/demo/ for details."; if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) { System.out.println(usage); System.exit(0);/*w w w.j a v a2 s . c om*/ } String index = "index"; String field = "contents"; String queries = null; int repeat = 0; boolean raw = false; String queryString = null; int hitsPerPage = 10; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { index = args[i + 1]; i++; } else if ("-field".equals(args[i])) { field = args[i + 1]; i++; } else if ("-queries".equals(args[i])) { queries = args[i + 1]; i++; } else if ("-query".equals(args[i])) { queryString = args[i + 1]; i++; } else if ("-repeat".equals(args[i])) { repeat = Integer.parseInt(args[i + 1]); i++; } else if ("-raw".equals(args[i])) { raw = true; } else if ("-paging".equals(args[i])) { hitsPerPage = Integer.parseInt(args[i + 1]); if (hitsPerPage <= 0) { System.err.println("There must be at least 1 hit per page."); System.exit(1); } i++; } } IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(index))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(); StandardQueryParser queryParserHelper = new StandardQueryParser(); Query query = queryParserHelper.parse( "Physical OR tests OR for OR shoulder OR impingements OR and OR local OR lesions OR of OR bursa, OR tendon OR labrum OR that OR may OR accompany OR impingement", field); TopDocs results = searcher.search(query, 100); Date end = new Date(); ScoreDoc[] hits = results.scoreDocs; int numTotalHits = results.totalHits; String FILENAME = "/home/devil/research/CLEF/ehealth/task2/dataset/pubmed11.res"; int i = 1; try (BufferedWriter bw = new BufferedWriter(new FileWriter(FILENAME))) { String content = ""; for (ScoreDoc h : hits) { Document doc = searcher.doc(h.doc); String path = doc.get("path"); String[] path_words = path.split("/"); System.out.println(path_words[path_words.length - 1] + " score=" + h.score); content = "CD007427 " + "NF " + path_words[path_words.length - 1] + " " + i++ + " " + h.score + " pubmed\n"; bw.write(content); } } catch (IOException e) { e.printStackTrace(); } //doPagingSearch(in, searcher, bQuery.build(), hitsPerPage, raw, queries == null && queryString == null); reader.close(); }
From source file:CFX_GoatSearch.java
License:Open Source License
/** * Classes that implement this interface can be specified in the CLASS attribute of the Java CFX tag. For example, in a class MyCustomTag, which implements this interface, the following CFML code calls the MyCustomTag.processRequest method. * * @param request/*from w w w .ja v a2 s .c o m*/ * @param response * @throws Exception */ public void processRequest(Request request, Response response) throws Exception { Date startTime = new Date(); String indexPath = null; String queryName = null; String searchString = null; String sortField = null; String sortDirection = null; int hitsPerPage = 0; int pageNumber = 0; Vector errors = new Vector(); if (request.attributeExists("INDEXPATH")) { indexPath = request.getAttribute("INDEXPATH"); } else { errors.add("The cfx_lucene tag requires an attribute called 'INDEXPATH'."); } if (request.attributeExists("HITSPERPAGE")) { hitsPerPage = request.getIntAttribute("HITSPERPAGE"); } if (request.attributeExists("PAGENUMBER")) { pageNumber = request.getIntAttribute("PAGENUMBER"); } if (request.attributeExists("QUERYNAME")) { queryName = request.getAttribute("QUERYNAME"); } else { errors.add("The cfx_lucene tag requires an attribute called 'QUERYNAME'."); } if (request.attributeExists("SEARCHSTRING")) { searchString = request.getAttribute("SEARCHSTRING"); } else { errors.add("The cfx_lucene tag requires an attribute called 'SEARCHSTRING'."); } //Sorting if (request.attributeExists("SORTFIELD")) { sortField = request.getAttribute("SORTFIELD"); } if (request.attributeExists("SORTDIRECTION")) { sortDirection = request.getAttribute("SORTDIRECTION"); } //Errors if (!errors.isEmpty()) { response.write("<h2 style=\"color: #FF0000\">CFX Goat Error:</h2>"); for (int i = 0; i < errors.size(); i++) { response.write("<p>Error: " + errors.get(i) + "</p>\n"); } //return; } else { try { IndexReader reader = IndexReader.open(indexPath); IndexSearcher searcher = new IndexSearcher(indexPath); if (searcher == null) { errors.add("Unable to open index"); } XMLReader readerXML = new XMLReader(); //XML Reader Class String configFile = ConfigFiles.getSchemaFile(indexPath); String[] indexTypeArray = new String[Integer.parseInt(readerXML.getTotalNodes(configFile))]; String[] columnNamesArray = new String[Integer.parseInt(readerXML.getTotalNodes(configFile))]; //Add Column Names int totalNodes = columnNamesArray.length; String nodeName = ""; //Sort .:. Index Type must be PrimaryKey,Keyword,Date Sort sortby = new Sort(); if (sortField != null) sortField.trim().toLowerCase(); //Change Field TO LowerCase Analyzer analyzer = new StandardAnalyzer(); QueryParser parser = new MultiFieldQueryParser(columnNamesArray, analyzer); Query query = parser.parse(searchString); if (query == null) { errors.add("Unable to build Query"); } //Build Query Here //Get Column Names for (int i = 0; i < totalNodes; i++) { columnNamesArray[i] = readerXML.getNodeValueByFile(configFile, i, "columnname"); indexTypeArray[i] = readerXML.getNodeValueByFile(configFile, i, "indextype"); /* Make Sure Field Can Be Seached */ if (columnNamesArray[i].equalsIgnoreCase(sortField) && (indexTypeArray[i].equalsIgnoreCase("PrimaryKey") || indexTypeArray[i].equalsIgnoreCase("Keyword") || indexTypeArray[i].equalsIgnoreCase("Date"))) { //Sort Ascending if (sortDirection != null && sortDirection.equalsIgnoreCase("desc")) { System.out.println("desc"); sortby = new Sort(sortField, true); } else if (sortDirection != null && sortDirection.equalsIgnoreCase("asc")) { System.out.println("asc"); sortby = new Sort(sortField, false); } } } if (hitsPerPage < 1) hitsPerPage = 1; int pageNum = pageNumber; int recordSet = (pageNum * hitsPerPage) + 100; TopFieldDocs resultDocs = searcher.search(query, null, recordSet, sortby); ScoreDoc[] hits = resultDocs.scoreDocs; int numTotalHits = resultDocs.totalHits; //Start int start = (pageNum - 1); if (start < 0) start = 0; if (pageNum > 1) { start = (pageNum * hitsPerPage) - hitsPerPage; } int end = (pageNum * hitsPerPage); end = Math.min(hits.length, start + hitsPerPage); //Coldfusion Query com.allaire.cfx.Query goatQuery = response.addQuery(queryName, columnNamesArray); for (int i = start; i < end; i++) { int row = goatQuery.addRow(); //Add Row int docId = hits[i].doc; Document d = searcher.doc(docId); for (int x = 0; x < totalNodes; x++) { nodeName = columnNamesArray[x]; goatQuery.setData(row, (x + 1), d.get(nodeName)); //Insert Values .:. Set Data starts with 1 } } //reader.close(); searcher.close(); Date endTime = new Date(); //Set other Values response.setVariable("goat.totaltime", Long.toString(endTime.getTime() - startTime.getTime())); response.setVariable("goat.totalresults", Integer.toString(numTotalHits)); response.setVariable("goat.totalpages", Integer.toString((numTotalHits / hitsPerPage))); } catch (Exception e) { errors.add("Failure caught a " + e.getClass() + " with message: " + e.getMessage()); } } //Output Final Errors If Needed if (!errors.isEmpty()) { response.write("<h2 style=\"color: #FF0000\">CFX Goat Error:</h2>"); for (int i = 0; i < errors.size(); i++) { response.write("<p>Error: " + errors.get(i) + "</p>\n"); } } }
From source file:syslogSearch.java
License:Open Source License
public void run() { try {// ww w . j a v a2s.com String searchQuery = (new BufferedReader(new InputStreamReader(searchSocket.getInputStream()))) .readLine().trim(); IndexReader reader = writer.getReader(); Searcher searcher = new IndexSearcher(reader); QueryParser indexParser = new QueryParser(Version.LUCENE_30, "data", analyzer); SortField hitSortField = new SortField("date", SortField.LONG); Sort hitSort = new Sort(hitSortField); TopFieldDocs hits = searcher.search(indexParser.parse(searchQuery), null, 1000, hitSort); PrintWriter searchReply = new PrintWriter(searchSocket.getOutputStream(), true); searchReply.println(hits.totalHits + " Hits for " + searchQuery); for (int i = 0; i < hits.totalHits; i++) { Document document = searcher.doc(hits.scoreDocs[i].doc); String host = document.get("hostname"); String date = document.get("date"); String data = document.get("data"); searchReply.print("host: " + host + ", date: " + date + ", data: " + data + "\n\n"); } searchReply.close(); searcher.close(); reader.close(); searchSocket.close(); } catch (Exception ex) { System.out.print("Exception: " + ex + "\n"); } }
From source file:DVBench.java
License:Apache License
static void doBench(int bpv) throws Exception { File file = new File("/data/indices/dvbench"); file.mkdirs();//from ww w . j a v a 2s . co m Directory dir = FSDirectory.open(file); IndexWriterConfig config = new IndexWriterConfig(null); config.setOpenMode(OpenMode.CREATE); config.setMergeScheduler(new SerialMergeScheduler()); config.setMergePolicy(new LogDocMergePolicy()); config.setMaxBufferedDocs(25000); IndexWriter writer = new IndexWriter(dir, config); MyRandom r = new MyRandom(); int numdocs = 400000; Document doc = new Document(); Field dv = new NumericDocValuesField("dv", 0); Field inv = new LongField("inv", 0, Field.Store.NO); Field boxed = new BinaryDocValuesField("boxed", new BytesRef(8)); Field boxed2 = new BinaryDocValuesField("boxed2", new BytesRef(8)); doc.add(dv); doc.add(inv); doc.add(boxed); doc.add(boxed2); for (int i = 0; i < numdocs; i++) { // defeat blockpackedwriter final long value; if (i % 8192 == 0) { value = bpv == 64 ? Long.MIN_VALUE : 0; } else if (i % 8192 == 1) { value = bpv == 64 ? Long.MAX_VALUE : (1L << bpv) - 1; } else { value = r.nextLong(bpv); } dv.setLongValue(value); inv.setLongValue(value); box(value, boxed.binaryValue()); box(value, boxed2.binaryValue()); boxed2.binaryValue().length = (bpv + 7) / 8; // fixed length writer.addDocument(doc); } writer.close(); // run dv search tests String description = "dv (bpv=" + bpv + ")"; DirectoryReader reader = DirectoryReader.open(dir); IndexSearcher searcher = new IndexSearcher(reader); searcher.setQueryCache(null); // don't bench the cache int hash = 0; // warmup hash += search(description, searcher, "dv", 300, true); hash += search(description, searcher, "dv", 300, false); // Uninverting Map<String, UninvertingReader.Type> mapping = Collections.singletonMap("inv", UninvertingReader.Type.LONG); DirectoryReader uninv = UninvertingReader.wrap(reader, mapping); IndexSearcher searcher2 = new IndexSearcher(uninv); searcher2.setQueryCache(null); // don't bench the cache description = "fc (bpv=" + bpv + ")"; // warmup hash += search(description, searcher2, "inv", 300, true); hash += search(description, searcher2, "inv", 300, false); // Boxed inside binary DirectoryReader boxedReader = new BinaryAsVLongReader(reader); IndexSearcher searcher3 = new IndexSearcher(boxedReader); searcher3.setQueryCache(null); // don't bench the cache description = "boxed (bpv=" + bpv + ")"; // warmup hash += search(description, searcher3, "boxed", 300, true); hash += search(description, searcher3, "boxed", 300, false); description = "boxed fixed-length (bpv=" + bpv + ")"; // warmup hash += search(description, searcher3, "boxed2", 300, true); hash += search(description, searcher3, "boxed2", 300, false); if (hash == 3) { // wont happen System.out.println("hash=" + hash); } reader.close(); dir.close(); }
From source file:ReadFiles.java
License:Apache License
public static Result doSearch(String path, DIRTYPE type, IndexReader ir) throws IOException { IndexReader reader;//from w w w.j a v a 2 s . co m Result r = new Result(); long beginTs, endTs; if (ir != null) reader = ir; else { beginTs = System.currentTimeMillis(); switch (type) { default: case MMAP: reader = DirectoryReader.open(MMapDirectory.open(new File(path))); break; case NIO: reader = DirectoryReader.open(NIOFSDirectory.open(new File(path))); break; case SIMPLE: reader = DirectoryReader.open(SimpleFSDirectory.open(new File(path))); break; } endTs = System.currentTimeMillis(); r.initTs += endTs - beginTs; r.initTsNr += 1; } System.out.println("-----Search it------"); IndexSearcher searcher = new IndexSearcher(reader); Query q = NumericRangeQuery.newIntRange("foo", new Integer("100000"), null, false, false); beginTs = System.currentTimeMillis(); ScoreDoc[] hits = searcher.search(q, searcher.getIndexReader().maxDoc()).scoreDocs; endTs = System.currentTimeMillis(); r.searchTs += endTs - beginTs; r.searchTsNr += hits.length; System.out.println("Hits -> " + hits.length); boolean isSeq = true; int lastid = 0; beginTs = System.currentTimeMillis(); for (int i = 0; i < hits.length; i++) { if (hits[i].doc < lastid) isSeq = false; Document doc = searcher.doc(hits[i].doc); doc.get("foo"); doc.get("bar"); //System.out.println("Key: " + doc.get("foo") + ", Value: " + doc.get("bar")); } System.out.println("Search DocID is SEQ? " + isSeq); endTs = System.currentTimeMillis(); r.fetchTs += endTs - beginTs; r.fetchTsNr += hits.length; if (ir == null) { beginTs = System.currentTimeMillis(); reader.close(); endTs = System.currentTimeMillis(); r.closeTs += endTs - beginTs; r.closeTsNr += 1; } return r; }
From source file:IndexAndSearchOpenStreetMaps1D.java
License:Apache License
private static void queryIndex() throws IOException { Directory dir = FSDirectory.open(Paths.get("/l/tmp/1dkd" + (USE_NF ? "_nf" : ""))); System.out.println("DIR: " + dir); IndexReader r = DirectoryReader.open(dir); System.out.println("maxDoc=" + r.maxDoc()); IndexSearcher s = new IndexSearcher(r); //System.out.println("reader MB heap=" + (reader.ramBytesUsed()/1024/1024.)); // London, UK: int STEPS = 5; double MIN_LAT = 51.0919106; double MAX_LAT = 51.6542719; double MIN_LON = -0.3867282; double MAX_LON = 0.8492337; byte[] scratch1 = new byte[4]; byte[] scratch2 = new byte[4]; for (int iter = 0; iter < 100; iter++) { long tStart = System.nanoTime(); long totHits = 0; int queryCount = 0; for (int latStep = 0; latStep < STEPS; latStep++) { double lat = MIN_LAT + latStep * (MAX_LAT - MIN_LAT) / STEPS; for (int lonStep = 0; lonStep < STEPS; lonStep++) { double lon = MIN_LON + lonStep * (MAX_LON - MIN_LON) / STEPS; for (int latStepEnd = latStep + 1; latStepEnd <= STEPS; latStepEnd++) { double latEnd = MIN_LAT + latStepEnd * (MAX_LAT - MIN_LAT) / STEPS; for (int lonStepEnd = lonStep + 1; lonStepEnd <= STEPS; lonStepEnd++) { double lonEnd = MIN_LON + lonStepEnd * (MAX_LON - MIN_LON) / STEPS; Query q;/*from w ww . j a va2 s . c o m*/ if (USE_NF) { q = LegacyNumericRangeQuery.newIntRange("latnum", (int) (1000000. * lat), (int) (1000000. * latEnd), true, true); } else { q = IntPoint.newRangeQuery("lat", (int) (1000000. * lat), (int) (1000000. * latEnd)); } TotalHitCountCollector c = new TotalHitCountCollector(); //long t0 = System.nanoTime(); s.search(q, c); //System.out.println("\nITER: now query lat=" + lat + " latEnd=" + latEnd + " lon=" + lon + " lonEnd=" + lonEnd); //Bits hits = reader.intersect(lat, latEnd, lon, lonEnd); //System.out.println(" total hits: " + hitCount); //totHits += ((FixedBitSet) hits).cardinality(); //System.out.println(" add tot " + c.getTotalHits()); totHits += c.getTotalHits(); queryCount++; } } } } long tEnd = System.nanoTime(); System.out.println("ITER: " + iter + " " + ((tEnd - tStart) / 1000000000.0) + " sec; totHits=" + totHits + "; " + queryCount + " queries"); if (iter == 0) { long bytes = 0; for (LeafReaderContext ctx : r.leaves()) { CodecReader cr = (CodecReader) ctx.reader(); System.out.println(Accountables.toString(cr)); bytes += cr.ramBytesUsed(); } System.out.println("READER MB: " + (bytes / 1024. / 1024.)); System.out.println("RAM: " + Accountables.toString((Accountable) r.leaves().get(0).reader())); } } IOUtils.close(r, dir); }
From source file:SearchFilesTest.java
License:Apache License
/** Simple command-line based search demo. */ public static void main(String[] args) throws Exception { String index = args[0];/*from w w w . j a va 2 s. c o m*/ String field = "contents"; String queries = "resources/query.txt"; String queryString = null; int hitsPerPage = 10; IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(index))); IndexSearcher searcher = new IndexSearcher(reader); File stopWordsFile = new File("resources/stop.txt"); CharArraySet stopWordsCharArraySet = WordlistLoader.getWordSet(new FileReader(stopWordsFile), Version.LUCENE_47); Analyzer analyzer = new RomanianAnalyzerUsingAnotherConstructorForStopwordAnalyzer(Version.LUCENE_47, stopWordsCharArraySet); BufferedReader in = null; in = new BufferedReader(new InputStreamReader(new FileInputStream(queries), codification)); QueryParser parser = new QueryParser(Version.LUCENE_47, field, analyzer); while (true) { String line = in.readLine(); if (line == null || line.length() == -1) { break; } line = line.trim(); if (line.length() == 0) { break; } Query query = parser.parse(line); System.out.println("Looking for: " + query.toString(field)); doPagingSearch(in, searcher, query); if (queryString != null) { break; } } reader.close(); }
From source file:SimpleLuceneSearch.java
License:Apache License
public SimpleLuceneSearch(String index) throws CorruptIndexException, IOException { this.INDEX = index;// "/home/coulet/workspace/data_resource/mesh/indexOnMesh"; IndexReader reader = IndexReader.open(FSDirectory.open(new File(INDEX)), true); SEARCHER = new IndexSearcher(reader); ANALYSER = new StandardAnalyzer(Version.LUCENE_35); }
From source file:KNearestNeighborClassifier.java
License:Apache License
/** * Creates a {@link KNearestNeighborClassifier}. * * @param leafReader the reader on the index to be used for classification * @param analyzer an {@link Analyzer} used to analyze unseen text * @param similarity the {@link Similarity} to be used by the underlying {@link IndexSearcher} or {@code null} * (defaults to {@link org.apache.lucene.search.similarities.ClassicSimilarity}) * @param query a {@link Query} to eventually filter the docs used for training the classifier, or {@code null} * if all the indexed docs should be used * @param k the no. of docs to select in the MLT results to find the nearest neighbor * @param minDocsFreq {@link MoreLikeThis#minDocFreq} parameter * @param minTermFreq {@link MoreLikeThis#minTermFreq} parameter * @param classFieldName the name of the field used as the output for the classifier * @param textFieldNames the name of the fields used as the inputs for the classifier, they can contain boosting indication e.g. title^10 *//* ww w . ja v a 2 s . co m*/ public KNearestNeighborClassifier(IndexReader leafReader, Similarity similarity, Analyzer analyzer, Query query, int k, int minDocsFreq, int minTermFreq, String classFieldName, String... textFieldNames) { this.textFieldNames = textFieldNames; this.classFieldName = classFieldName; this.mlt = new MoreLikeThis(leafReader); this.mlt.setAnalyzer(analyzer); this.mlt.setFieldNames(textFieldNames); this.indexSearcher = new IndexSearcher(leafReader); if (similarity != null) { this.indexSearcher.setSimilarity(similarity); } else { this.indexSearcher.setSimilarity(new ClassicSimilarity()); } if (minDocsFreq > 0) { mlt.setMinDocFreq(minDocsFreq); } if (minTermFreq > 0) { mlt.setMinTermFreq(minTermFreq); } this.query = query; this.k = k; }
From source file:luceneInterface.java
License:Apache License
public static List<Document> query(String index, String stoppath, String question, int numResult, String sim) throws Exception { IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(index))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new EnglishAnalyzer(StopFilter.makeStopSet(mygetStopwords(stoppath))); if (sim.equals("TFIDF")) searcher.setSimilarity(new ClassicSimilarity()); else if (sim.equals("BM25")) searcher.setSimilarity(new BM25Similarity()); else/* w ww .ja v a 2 s . c o m*/ searcher.setSimilarity(new BM25Similarity()); String field = "contents"; QueryParser parser = new QueryParser(field, analyzer); Query query = parser.parse(parser.escape(question)); BooleanQuery.Builder bqb = new BooleanQuery.Builder(); bqb.add(new TermQuery(new Term("contents", parser.escape(question))), BooleanClause.Occur.SHOULD); bqb.add(new TermQuery(new Term("sec", parser.escape(question))), BooleanClause.Occur.SHOULD); // Term term = new Term(field, question); // Query query = new TermQuery(term); // TopDocs results = searcher.search(query, numResult); TopDocs results = searcher.search(parser.parse(bqb.build().toString()), numResult); ScoreDoc[] hits = results.scoreDocs; List<Document> docs = new ArrayList<Document>(); int numTotalHits = results.totalHits; // System.out.println(numTotalHits + " total matching documents"); int end = Math.min(numTotalHits, numResult); String searchResult = ""; // System.out.println("Only results 1 - " + hits.length); for (int i = 0; i < end; i++) { Document doc = searcher.doc(hits[i].doc); docs.add(doc); } return docs; }