List of usage examples for org.apache.lucene.index DirectoryReader open
public static DirectoryReader open(final IndexCommit commit) throws IOException
From source file:DVBench.java
License:Apache License
static void doBench(int bpv) throws Exception { File file = new File("/data/indices/dvbench"); file.mkdirs();// w ww . j a va 2s .c o m Directory dir = FSDirectory.open(file); IndexWriterConfig config = new IndexWriterConfig(null); config.setOpenMode(OpenMode.CREATE); config.setMergeScheduler(new SerialMergeScheduler()); config.setMergePolicy(new LogDocMergePolicy()); config.setMaxBufferedDocs(25000); IndexWriter writer = new IndexWriter(dir, config); MyRandom r = new MyRandom(); int numdocs = 400000; Document doc = new Document(); Field dv = new NumericDocValuesField("dv", 0); Field inv = new LongField("inv", 0, Field.Store.NO); Field boxed = new BinaryDocValuesField("boxed", new BytesRef(8)); Field boxed2 = new BinaryDocValuesField("boxed2", new BytesRef(8)); doc.add(dv); doc.add(inv); doc.add(boxed); doc.add(boxed2); for (int i = 0; i < numdocs; i++) { // defeat blockpackedwriter final long value; if (i % 8192 == 0) { value = bpv == 64 ? Long.MIN_VALUE : 0; } else if (i % 8192 == 1) { value = bpv == 64 ? Long.MAX_VALUE : (1L << bpv) - 1; } else { value = r.nextLong(bpv); } dv.setLongValue(value); inv.setLongValue(value); box(value, boxed.binaryValue()); box(value, boxed2.binaryValue()); boxed2.binaryValue().length = (bpv + 7) / 8; // fixed length writer.addDocument(doc); } writer.close(); // run dv search tests String description = "dv (bpv=" + bpv + ")"; DirectoryReader reader = DirectoryReader.open(dir); IndexSearcher searcher = new IndexSearcher(reader); searcher.setQueryCache(null); // don't bench the cache int hash = 0; // warmup hash += search(description, searcher, "dv", 300, true); hash += search(description, searcher, "dv", 300, false); // Uninverting Map<String, UninvertingReader.Type> mapping = Collections.singletonMap("inv", UninvertingReader.Type.LONG); DirectoryReader uninv = UninvertingReader.wrap(reader, mapping); IndexSearcher searcher2 = new IndexSearcher(uninv); searcher2.setQueryCache(null); // don't bench the cache description = "fc (bpv=" + bpv + ")"; // warmup hash += search(description, searcher2, "inv", 300, true); hash += search(description, searcher2, "inv", 300, false); // Boxed inside binary DirectoryReader boxedReader = new BinaryAsVLongReader(reader); IndexSearcher searcher3 = new IndexSearcher(boxedReader); searcher3.setQueryCache(null); // don't bench the cache description = "boxed (bpv=" + bpv + ")"; // warmup hash += search(description, searcher3, "boxed", 300, true); hash += search(description, searcher3, "boxed", 300, false); description = "boxed fixed-length (bpv=" + bpv + ")"; // warmup hash += search(description, searcher3, "boxed2", 300, true); hash += search(description, searcher3, "boxed2", 300, false); if (hash == 3) { // wont happen System.out.println("hash=" + hash); } reader.close(); dir.close(); }
From source file:ReadFiles.java
License:Apache License
public static Result doScan(String path, DIRTYPE type, IndexReader ir) throws IOException { IndexReader reader;//from ww w .ja v a 2s. c o m Result r = new Result(); long beginTs, endTs; if (ir != null) reader = ir; else { beginTs = System.currentTimeMillis(); switch (type) { default: case MMAP: reader = DirectoryReader.open(MMapDirectory.open(new File(path))); break; case NIO: reader = DirectoryReader.open(NIOFSDirectory.open(new File(path))); break; case SIMPLE: reader = DirectoryReader.open(SimpleFSDirectory.open(new File(path))); break; } endTs = System.currentTimeMillis(); r.initTs += endTs - beginTs; r.initTsNr += 1; } System.out.println("-----Scan it------" + reader.maxDoc()); beginTs = System.currentTimeMillis(); for (int i = 0; i < reader.maxDoc(); i++) { Document doc = reader.document(i); doc.get("foo"); doc.get("bar"); //System.out.println("Key: " + doc.get("foo") + ", Value: " + doc.get("bar") + ", Content: " + doc.get("content")); } endTs = System.currentTimeMillis(); r.fetchTs += endTs - beginTs; r.fetchTsNr += reader.maxDoc(); if (ir == null) { beginTs = System.currentTimeMillis(); reader.close(); endTs = System.currentTimeMillis(); r.closeTs += endTs - beginTs; r.closeTsNr += 1; } return r; }
From source file:ReadFiles.java
License:Apache License
public static Result doSearch(String path, DIRTYPE type, IndexReader ir) throws IOException { IndexReader reader;//ww w . jav a 2s.co m Result r = new Result(); long beginTs, endTs; if (ir != null) reader = ir; else { beginTs = System.currentTimeMillis(); switch (type) { default: case MMAP: reader = DirectoryReader.open(MMapDirectory.open(new File(path))); break; case NIO: reader = DirectoryReader.open(NIOFSDirectory.open(new File(path))); break; case SIMPLE: reader = DirectoryReader.open(SimpleFSDirectory.open(new File(path))); break; } endTs = System.currentTimeMillis(); r.initTs += endTs - beginTs; r.initTsNr += 1; } System.out.println("-----Search it------"); IndexSearcher searcher = new IndexSearcher(reader); Query q = NumericRangeQuery.newIntRange("foo", new Integer("100000"), null, false, false); beginTs = System.currentTimeMillis(); ScoreDoc[] hits = searcher.search(q, searcher.getIndexReader().maxDoc()).scoreDocs; endTs = System.currentTimeMillis(); r.searchTs += endTs - beginTs; r.searchTsNr += hits.length; System.out.println("Hits -> " + hits.length); boolean isSeq = true; int lastid = 0; beginTs = System.currentTimeMillis(); for (int i = 0; i < hits.length; i++) { if (hits[i].doc < lastid) isSeq = false; Document doc = searcher.doc(hits[i].doc); doc.get("foo"); doc.get("bar"); //System.out.println("Key: " + doc.get("foo") + ", Value: " + doc.get("bar")); } System.out.println("Search DocID is SEQ? " + isSeq); endTs = System.currentTimeMillis(); r.fetchTs += endTs - beginTs; r.fetchTsNr += hits.length; if (ir == null) { beginTs = System.currentTimeMillis(); reader.close(); endTs = System.currentTimeMillis(); r.closeTs += endTs - beginTs; r.closeTsNr += 1; } return r; }
From source file:ReadFiles.java
License:Apache License
public static Result doRandFetch(String path, DIRTYPE type, IndexReader ir, int randfetchnr) throws IOException { IndexReader reader;// www.j a v a2 s.co m Result r = new Result(); long beginTs, endTs; if (ir != null) reader = ir; else { beginTs = System.currentTimeMillis(); switch (type) { default: case MMAP: reader = DirectoryReader.open(MMapDirectory.open(new File(path))); break; case NIO: reader = DirectoryReader.open(NIOFSDirectory.open(new File(path))); break; case SIMPLE: reader = DirectoryReader.open(SimpleFSDirectory.open(new File(path))); break; } endTs = System.currentTimeMillis(); r.initTs += endTs - beginTs; r.initTsNr += 1; } System.out.println("-----RandFt it------"); try { Thread.sleep(10000); } catch (InterruptedException e) { // TODO Auto-generated catch block e.printStackTrace(); } // Randomized the fetch Random rand = new Random(); beginTs = System.currentTimeMillis(); int maxDoc = reader.maxDoc(); if (randfetchnr > 0) maxDoc = randfetchnr; for (int i = 0; i < maxDoc; i++) { Document doc = reader.document(rand.nextInt(maxDoc)); doc.get("foo"); doc.get("bar"); //System.out.println("Key: " + doc.get("foo") + ", Value: " + doc.get("bar")); } endTs = System.currentTimeMillis(); r.fetchTs += endTs - beginTs; r.fetchTsNr += maxDoc; if (ir == null) { beginTs = System.currentTimeMillis(); reader.close(); endTs = System.currentTimeMillis(); r.closeTs += endTs - beginTs; r.closeTsNr += 1; } return r; }
From source file:ReadFiles.java
License:Apache License
public static Result doRepOpenClose(String path, DIRTYPE type, long nr) throws IOException { IndexReader reader;// w ww . j a v a 2 s .c om Result r = new Result(); long beginTs, endTs; System.out.println("-----Open/Close it------"); for (int i = 0; i < nr; i++) { beginTs = System.currentTimeMillis(); switch (type) { default: case MMAP: reader = DirectoryReader.open(MMapDirectory.open(new File(path))); break; case NIO: reader = DirectoryReader.open(NIOFSDirectory.open(new File(path))); break; case SIMPLE: reader = DirectoryReader.open(SimpleFSDirectory.open(new File(path))); break; } endTs = System.currentTimeMillis(); r.initTs += endTs - beginTs; r.initTsNr += 1; beginTs = System.currentTimeMillis(); reader.close(); endTs = System.currentTimeMillis(); r.closeTs += endTs - beginTs; r.closeTsNr += 1; } return r; }
From source file:IndexAndSearchOpenStreetMaps1D.java
License:Apache License
private static void queryIndex() throws IOException { Directory dir = FSDirectory.open(Paths.get("/l/tmp/1dkd" + (USE_NF ? "_nf" : ""))); System.out.println("DIR: " + dir); IndexReader r = DirectoryReader.open(dir); System.out.println("maxDoc=" + r.maxDoc()); IndexSearcher s = new IndexSearcher(r); //System.out.println("reader MB heap=" + (reader.ramBytesUsed()/1024/1024.)); // London, UK: int STEPS = 5; double MIN_LAT = 51.0919106; double MAX_LAT = 51.6542719; double MIN_LON = -0.3867282; double MAX_LON = 0.8492337; byte[] scratch1 = new byte[4]; byte[] scratch2 = new byte[4]; for (int iter = 0; iter < 100; iter++) { long tStart = System.nanoTime(); long totHits = 0; int queryCount = 0; for (int latStep = 0; latStep < STEPS; latStep++) { double lat = MIN_LAT + latStep * (MAX_LAT - MIN_LAT) / STEPS; for (int lonStep = 0; lonStep < STEPS; lonStep++) { double lon = MIN_LON + lonStep * (MAX_LON - MIN_LON) / STEPS; for (int latStepEnd = latStep + 1; latStepEnd <= STEPS; latStepEnd++) { double latEnd = MIN_LAT + latStepEnd * (MAX_LAT - MIN_LAT) / STEPS; for (int lonStepEnd = lonStep + 1; lonStepEnd <= STEPS; lonStepEnd++) { double lonEnd = MIN_LON + lonStepEnd * (MAX_LON - MIN_LON) / STEPS; Query q;// w w w.j a v a2 s. c o m if (USE_NF) { q = LegacyNumericRangeQuery.newIntRange("latnum", (int) (1000000. * lat), (int) (1000000. * latEnd), true, true); } else { q = IntPoint.newRangeQuery("lat", (int) (1000000. * lat), (int) (1000000. * latEnd)); } TotalHitCountCollector c = new TotalHitCountCollector(); //long t0 = System.nanoTime(); s.search(q, c); //System.out.println("\nITER: now query lat=" + lat + " latEnd=" + latEnd + " lon=" + lon + " lonEnd=" + lonEnd); //Bits hits = reader.intersect(lat, latEnd, lon, lonEnd); //System.out.println(" total hits: " + hitCount); //totHits += ((FixedBitSet) hits).cardinality(); //System.out.println(" add tot " + c.getTotalHits()); totHits += c.getTotalHits(); queryCount++; } } } } long tEnd = System.nanoTime(); System.out.println("ITER: " + iter + " " + ((tEnd - tStart) / 1000000000.0) + " sec; totHits=" + totHits + "; " + queryCount + " queries"); if (iter == 0) { long bytes = 0; for (LeafReaderContext ctx : r.leaves()) { CodecReader cr = (CodecReader) ctx.reader(); System.out.println(Accountables.toString(cr)); bytes += cr.ramBytesUsed(); } System.out.println("READER MB: " + (bytes / 1024. / 1024.)); System.out.println("RAM: " + Accountables.toString((Accountable) r.leaves().get(0).reader())); } } IOUtils.close(r, dir); }
From source file:SearchFilesTest.java
License:Apache License
/** Simple command-line based search demo. */ public static void main(String[] args) throws Exception { String index = args[0];//w w w.j a v a 2 s . c om String field = "contents"; String queries = "resources/query.txt"; String queryString = null; int hitsPerPage = 10; IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(index))); IndexSearcher searcher = new IndexSearcher(reader); File stopWordsFile = new File("resources/stop.txt"); CharArraySet stopWordsCharArraySet = WordlistLoader.getWordSet(new FileReader(stopWordsFile), Version.LUCENE_47); Analyzer analyzer = new RomanianAnalyzerUsingAnotherConstructorForStopwordAnalyzer(Version.LUCENE_47, stopWordsCharArraySet); BufferedReader in = null; in = new BufferedReader(new InputStreamReader(new FileInputStream(queries), codification)); QueryParser parser = new QueryParser(Version.LUCENE_47, field, analyzer); while (true) { String line = in.readLine(); if (line == null || line.length() == -1) { break; } line = line.trim(); if (line.length() == 0) { break; } Query query = parser.parse(line); System.out.println("Looking for: " + query.toString(field)); doPagingSearch(in, searcher, query); if (queryString != null) { break; } } reader.close(); }
From source file:ExpansionFrame.java
private void ExpansionBtnActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_ExpansionBtnActionPerformed String vocabulary = ""; DefaultListModel<String> TermsListModel = new DefaultListModel<>(); for (int docIndex = 0; docIndex < ChoicesList.getModel().getSize(); docIndex++) { int currentIndex = 0; boolean exists = false; for (int resultDocIndex = 0; resultDocIndex < ResultsList.getModel().getSize(); resultDocIndex++) { if (ResultsList.getModel().getElementAt(resultDocIndex) .equals(ChoicesList.getModel().getElementAt(docIndex))) { currentIndex = resultDocIndex; exists = true;// www . ja v a 2 s.c o m break; } } if (exists) { Set<Term> results; IndexReader indoReader; try { indoReader = DirectoryReader.open(myIndex); QueryParser qp = new QueryParser(Version.LUCENE_30, "summary", myAnalyzer); Query query; query = qp.parse(SummaryListModel.elementAt(currentIndex)); query.rewrite(indoReader); results = new HashSet<>(); query.extractTerms(results); Iterator<Term> iterator = results.iterator(); while (iterator.hasNext()) { String term = iterator.next().toString(); vocabulary += term.substring(8) + ";"; } vocabulary += "!"; } catch (IOException | ParseException ex) { Logger.getLogger(ExpansionFrame.class.getName()).log(Level.SEVERE, null, ex); } } else { System.out.println("Error!"); } } String term = ""; for (int i = 0; i < vocabulary.length(); i++) { if (vocabulary.charAt(i) != ';' && vocabulary.charAt(i) != '!') { term += vocabulary.charAt(i); } else { if (term.length() != 0) { boolean exists = false; for (int k = 0; k < TermsListModel.size(); k++) { if (term.equals(TermsListModel.elementAt(k))) { exists = true; break; } } if (!exists) { TermsListModel.addElement(term); term = ""; } } } } term = ""; int[][] termsFreqArray = new int[TermsListModel.size()][ChoicesList.getModel().getSize()]; for (int i = 0; i < TermsListModel.size(); i++) { for (int j = 0; j < ChoicesList.getModel().getSize(); j++) { termsFreqArray[i][j] = 0; } } int docIndex = 0; for (int i = 0; i < vocabulary.length(); i++) { if (vocabulary.charAt(i) != ';' && vocabulary.charAt(i) != '!') { term += vocabulary.charAt(i); } else { if (vocabulary.charAt(i) == '!') { docIndex++; } if (term.length() != 0) { int index = 0; for (int k = 0; k < TermsListModel.size(); k++) { if (term.equals(TermsListModel.elementAt(k))) { index = k; break; } } termsFreqArray[index][docIndex]++; term = ""; } } } /* for (int i = 0; i < TermsListModel.size(); i++) { System.out.print(TermsListModel.elementAt(i) + " : "); for (int j = 0; j < ChoicesList.getModel().getSize(); j++) { System.out.print(termsFreqArray[i][j] + ";"); } System.out.println(); } */ }
From source file:luceneInterface.java
License:Apache License
public static List<Document> query(String index, String stoppath, String question, int numResult, String sim) throws Exception { IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(index))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new EnglishAnalyzer(StopFilter.makeStopSet(mygetStopwords(stoppath))); if (sim.equals("TFIDF")) searcher.setSimilarity(new ClassicSimilarity()); else if (sim.equals("BM25")) searcher.setSimilarity(new BM25Similarity()); else/* ww w . j a v a2 s. c om*/ searcher.setSimilarity(new BM25Similarity()); String field = "contents"; QueryParser parser = new QueryParser(field, analyzer); Query query = parser.parse(parser.escape(question)); BooleanQuery.Builder bqb = new BooleanQuery.Builder(); bqb.add(new TermQuery(new Term("contents", parser.escape(question))), BooleanClause.Occur.SHOULD); bqb.add(new TermQuery(new Term("sec", parser.escape(question))), BooleanClause.Occur.SHOULD); // Term term = new Term(field, question); // Query query = new TermQuery(term); // TopDocs results = searcher.search(query, numResult); TopDocs results = searcher.search(parser.parse(bqb.build().toString()), numResult); ScoreDoc[] hits = results.scoreDocs; List<Document> docs = new ArrayList<Document>(); int numTotalHits = results.totalHits; // System.out.println(numTotalHits + " total matching documents"); int end = Math.min(numTotalHits, numResult); String searchResult = ""; // System.out.println("Only results 1 - " + hits.length); for (int i = 0; i < end; i++) { Document doc = searcher.doc(hits[i].doc); docs.add(doc); } return docs; }
From source file:LuceneSearchFiles.java
License:Apache License
/** Simple command-line based search demo. */ public static void search(String phrase, String field, int hitsPerPage) { try {// w ww . j ava 2s . com IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(m_index))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40); QueryParser parser = new QueryParser(Version.LUCENE_40, field, analyzer); Query query = parser.parse(phrase); System.out.println("Searching for: " + query.toString(field)); searcher.search(query, null, hitsPerPage); TopDocs results = searcher.search(query, hitsPerPage); ScoreDoc[] hits = results.scoreDocs; for (ScoreDoc hit : hits) { Document doc = searcher.doc(hit.doc); //String path = doc.get("path"); String title = doc.get("title"); System.out.println(hit.score + " -" + title); } reader.close(); } catch (IOException e) { } catch (ParseException e) { } }