List of usage examples for org.apache.lucene.index DirectoryReader open
public static DirectoryReader open(final IndexCommit commit) throws IOException
From source file:com.mycompany.mavenproject1.Main.java
public static void main(String[] args) throws IOException, ParseException { StandardAnalyzer analyzer = new StandardAnalyzer(); // Directory index = new RAMDirectory(); Directory index = new SimpleFSDirectory(Paths.get( "C:\\Users\\slete\\Documents\\NetBeansProjects\\mavenproject1\\src\\main\\java\\com\\mycompany\\mavenproject1\\data")); IndexWriterConfig config = new IndexWriterConfig(analyzer); //config.setOpenMode(OpenMode.CREATE); IndexWriter w = new IndexWriter(index, config); try (ItemProvider provider = new ItemProvider( "C:\\Users\\slete\\Documents\\NetBeansProjects\\mavenproject1\\src\\main\\java\\com\\mycompany\\mavenproject1\\items.xml")) { while (provider.hasNext()) { Item item = provider.next(); addItem(w, item);/*www . ja v a 2s .c o m*/ } } catch (XMLStreamException | IOException ex) { ex.getMessage(); } // w.commit(); w.close(); // String queryStr = "id:1* NOT id:19*"; String a = "id:1* NOT id:19*"; String b = "name:Dekielek AND description:(ty AND obiektywu)"; String c = "category:Dek*"; String ds = "id:1232~2"; String e = "price:[0.0 TO 100.0]"; Query q = new QueryParser("name", analyzer).parse(ds); int hitsPerPage = 10; IndexReader reader = DirectoryReader.open(index); IndexSearcher searcher = new IndexSearcher(reader); TopDocs docs = searcher.search(q, hitsPerPage); ScoreDoc[] hits = docs.scoreDocs; System.out.println("Found " + hits.length + " hits."); for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; Document d = searcher.doc(docId); System.out .println(d.get("id") + "\t" + d.get("price") + "\t" + d.get("name") + "\t" + d.get("category"));//+"\t" + d.get("description")); } }
From source file:com.mylucene.basiclucene.SearchFiles.java
License:Apache License
/** Simple command-line based search demo. */ public static void main(String[] args) throws Exception { String usage = "Usage:\tjava org.apache.lucene.demo.SearchFiles [-index dir] [-field f] [-repeat n] [-queries file] [-query string] [-raw] [-paging hitsPerPage]\n\nSee http://lucene.apache.org/core/4_1_0/demo/ for details."; if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) { System.out.println(usage); System.exit(0);// w ww . j a v a 2 s. co m } String index = "index"; String field = "contents"; String queries = null; int repeat = 0; boolean raw = false; String queryString = null; int hitsPerPage = 10; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { index = args[i + 1]; i++; } else if ("-field".equals(args[i])) { field = args[i + 1]; i++; } else if ("-queries".equals(args[i])) { queries = args[i + 1]; i++; } else if ("-query".equals(args[i])) { queryString = args[i + 1]; i++; } else if ("-repeat".equals(args[i])) { repeat = Integer.parseInt(args[i + 1]); i++; } else if ("-raw".equals(args[i])) { raw = true; } else if ("-paging".equals(args[i])) { hitsPerPage = Integer.parseInt(args[i + 1]); if (hitsPerPage <= 0) { System.err.println("There must be at least 1 hit per page."); System.exit(1); } i++; } } IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(index))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_44); BufferedReader in = null; if (queries != null) { in = new BufferedReader(new InputStreamReader(new FileInputStream(queries), "UTF-8")); } else { in = new BufferedReader(new InputStreamReader(System.in, "UTF-8")); } QueryParser parser = new QueryParser(Version.LUCENE_44, field, analyzer); while (true) { if (queries == null && queryString == null) { // prompt the user System.out.println("Enter query: "); } String line = queryString != null ? queryString : in.readLine(); if (line == null || line.length() == -1) { break; } line = line.trim(); if (line.length() == 0) { break; } Query query = parser.parse(line); System.out.println("Searching for: " + query.toString(field)); if (repeat > 0) { // repeat & time as benchmark Date start = new Date(); for (int i = 0; i < repeat; i++) { searcher.search(query, null, 100); } Date end = new Date(); System.out.println("Time: " + (end.getTime() - start.getTime()) + "ms"); } doPagingSearch(in, searcher, query, hitsPerPage, raw, queries == null && queryString == null); if (queryString != null) { break; } } reader.close(); }
From source file:com.nec.scg.senseRanking.CountTextSimilarity.java
private void init() { analyzer = new StandardAnalyzer(Version.LUCENE_43); try {//from w w w. ja va 2 s .co m directory = FSDirectory.open(new File("D:\\KBTextIndex")); ireader = DirectoryReader.open(directory); isearcher = new IndexSearcher(ireader); } catch (IOException e) { e.printStackTrace(); } cache = new StringIntegerCache(cache_file); cache.readCache(); }
From source file:com.netcrest.pado.index.provider.lucene.LuceneSearch.java
License:Open Source License
protected Set<Object> getIdentityKeySet(String queryString, Directory dir) { Set<Object> identityKeySet = new HashSet<Object>(); DirectoryReader reader;/*from ww w . j ava 2s .c o m*/ try { reader = DirectoryReader.open(dir); } catch (CorruptIndexException e1) { e1.printStackTrace(); throw new RuntimeException(e1); } catch (IOException e1) { e1.printStackTrace(); throw new RuntimeException(e1); } Query query; try { StandardQueryParser parser = new StandardQueryParser(new StandardAnalyzer(LUCENE_VERSION)); query = parser.parse(queryString.replaceAll("\\-", "\\\\-"), "__doc"); } catch (Exception ex) { // Lucene bug. Unable to serialize exception. Log the message and // throw a new exception with the string message. ex.printStackTrace(); throw new PadoException(ex.getMessage()); } IndexSearcher searcher = new IndexSearcher(reader); TopDocs results; try { results = searcher.search(query, null, Integer.MAX_VALUE); for (ScoreDoc hit : results.scoreDocs) { Document doc; try { doc = searcher.doc(hit.doc); } catch (CorruptIndexException e) { e.printStackTrace(); throw new RuntimeException(e); } catch (IOException e) { e.printStackTrace(); throw new RuntimeException(e); } // IndexableField field = doc.getField("IdentityKey"); // if (field == null) { // continue; // } // Object identityKey = field.stringValue(); // if (identityKey == null) { // identityKey = field.numericValue(); // } // if (identityKey == null) { // BytesRef br = field.binaryValue(); // if (br != null) { // byte[] blob = br.bytes; // try { // identityKey = BlobHelper.deserializeBlob(blob); // identityKeySet.add(identityKey); // } catch (Exception ex) { // Logger.warning("Identity key deserialization error", ex); // } // } else { // identityKey = field.toString(); // } // } LuceneField luceneField = new LuceneField(); ITemporalKey temporalKey = luceneField.getTemporalKey(doc); if (temporalKey != null) { identityKeySet.add(temporalKey.getIdentityKey()); } } } catch (IOException e1) { e1.printStackTrace(); throw new RuntimeException(e1); } return identityKeySet; }
From source file:com.netcrest.pado.index.provider.lucene.LuceneSearch.java
License:Open Source License
protected Set<ITemporalKey> getTemporalKeySet(String queryString, Directory dir) { Set<ITemporalKey> temporalKeySet = new HashSet<ITemporalKey>(); DirectoryReader reader;/*from w ww. j a va2s.com*/ try { reader = DirectoryReader.open(dir); } catch (CorruptIndexException e1) { e1.printStackTrace(); throw new RuntimeException(e1); } catch (IOException e1) { e1.printStackTrace(); throw new RuntimeException(e1); } Query query; try { StandardQueryParser parser = new StandardQueryParser(new StandardAnalyzer(LUCENE_VERSION)); query = parser.parse(queryString.replaceAll("\\-", "\\\\-"), "__doc"); } catch (Exception ex) { // Lucene 4.7 bug, internal message not serializable // Send message instead of nesting the cause. throw new RuntimeException(ex.getMessage()); } IndexSearcher searcher = new IndexSearcher(reader); TopDocs results; try { results = searcher.search(query, null, Integer.MAX_VALUE); for (ScoreDoc hit : results.scoreDocs) { Document doc; try { doc = searcher.doc(hit.doc); } catch (CorruptIndexException e) { e.printStackTrace(); throw new RuntimeException(e); } catch (IOException e) { e.printStackTrace(); throw new RuntimeException(e); } LuceneField luceneField = new LuceneField(); ITemporalKey temporalKey = luceneField.getTemporalKey(doc); if (temporalKey != null) { temporalKeySet.add(temporalKey); } } } catch (IOException e1) { e1.printStackTrace(); throw new RuntimeException(e1); } return temporalKeySet; }
From source file:com.netcrest.pado.index.provider.lucene.TopNLuceneSearch.java
License:Open Source License
protected Set<Object> getIdentityKeySet(String queryString, Directory dir) { Set<Object> identityKeySet = new HashSet<Object>(); DirectoryReader reader;//ww w . j a v a 2s . c o m try { reader = DirectoryReader.open(dir); } catch (CorruptIndexException e1) { e1.printStackTrace(); throw new RuntimeException(e1); } catch (IOException e1) { e1.printStackTrace(); throw new RuntimeException(e1); } Query query; try { StandardQueryParser parser = new StandardQueryParser(new StandardAnalyzer(LUCENE_VERSION)); query = parser.parse(queryString.replaceAll("\\-", "\\\\-"), "__doc"); } catch (Exception ex) { // Lucene bug. Unable to serialize exception. Log the message and // throw a new exception with the string message. ex.printStackTrace(); throw new PadoException(ex.getMessage()); } IndexSearcher searcher = new IndexSearcher(reader); TopDocs results; try { results = searcher.search(query, null, Integer.MAX_VALUE); for (ScoreDoc hit : results.scoreDocs) { Document doc; try { doc = searcher.doc(hit.doc); } catch (CorruptIndexException e) { e.printStackTrace(); throw new RuntimeException(e); } catch (IOException e) { e.printStackTrace(); throw new RuntimeException(e); } // IndexableField field = doc.getField("IdentityKey"); // if (field == null) { // continue; // } // Object identityKey = field.stringValue(); // if (identityKey == null) { // identityKey = field.numericValue(); // } // if (identityKey == null) { // BytesRef br = field.binaryValue(); // if (br != null) { // byte[] blob = br.bytes; // try { // identityKey = BlobHelper.deserializeBlob(blob); // identityKeySet.add(identityKey); // } catch (Exception ex) { // Logger.warning("Identity key deserialization error", ex); // } // } else { // identityKey = field.toString(); // } // } LuceneField luceneField = new LuceneField(); ITemporalKey temporalKey = luceneField.getTemporalKey(doc); if (temporalKey != null) { float docScore = hit.score; identityKeySet.add(temporalKey.getIdentityKey()); } } } catch (IOException e1) { e1.printStackTrace(); throw new RuntimeException(e1); } return identityKeySet; }
From source file:com.nuvolect.deepdive.lucene.Index.java
public static JSONObject index(final String volumeId, final String searchPath, final boolean forceIndex) { if (m_interrupt[0]) { LogUtil.log(LogUtil.LogType.INDEX, "Index canceled post interrupt"); m_interrupt[0] = false;//from ww w .jav a 2 s . c om return responseInterruptIndexing(); } OmniFile cacheDir = IndexUtil.getCacheDir(volumeId, searchPath); boolean cacheDirCreated = false; try { cacheDirCreated = OmniUtil.forceMkdir(cacheDir); } catch (IOException e) { return responseFolderCreateError(searchPath); } final String luceneDirPath = cacheDir.getAbsolutePath(); boolean cacheDirExists = !cacheDirCreated; boolean indexingOngoing = m_indexThread != null && m_indexThread.isAlive(); boolean indexingRequired = !cacheDirExists || forceIndex; synchronized (m_lock) { if (indexingOngoing) { if (m_fileTreeActive) m_index_state = INDEX_STATE.filetree; else m_index_state = INDEX_STATE.indexing; } else { if (indexingRequired) m_index_state = INDEX_STATE.indexing; else m_index_state = INDEX_STATE.complete; } } if (indexingRequired || indexingOngoing) { if (indexingOngoing) { // Nothing to do, let the background process run. Monitor m_indexedDocs for progress. } else { synchronized (m_lock) { m_index_state = INDEX_STATE.filetree; m_totalDocs[0] = 0; m_indexedDocs[0] = 0; m_error[0] = ""; } m_threadGroup = new ThreadGroup(INDEX_THREAD_GROUP); m_indexThread = new Thread(m_threadGroup, new Runnable() { @Override public void run() { // Analyzer analyzer = new org.apache.lucene.analysis.core.WhitespaceAnalyzer(); // Analyzer analyzer = new org.apache.lucene.analysis.core.KeywordAnalyzer(); // Analyzer analyzer = new org.apache.lucene.analysis.standard.StandardAnalyzer(); Analyzer analyzer = new org.apache.lucene.analysis.core.SimpleAnalyzer(); IndexWriterConfig config = new IndexWriterConfig(analyzer); IndexWriter iwriter = null; try { Directory m_directory = FSDirectory.open(Paths.get(luceneDirPath)); iwriter = new IndexWriter(m_directory, config); iwriter.deleteAll(); iwriter.commit(); } catch (IOException e) { LogUtil.logException(LogUtil.LogType.INDEX, e); m_error[0] = "IndexWriter constructor exception"; } synchronized (m_lock) { m_fileTreeActive = true; m_index_state = INDEX_STATE.filetree; } Collection<OmniFile> files = IndexUtil.getFilePaths(volumeId, searchPath); synchronized (m_lock) { m_index_state = INDEX_STATE.indexing; m_fileTreeActive = false; m_totalDocs[0] = files.size(); m_indexedDocs[0] = 0; } try { for (OmniFile file : files) { if (m_interrupt[0]) { LogUtil.log(LogUtil.LogType.INDEX, "Iterator loop canceled"); break; } String path = file.getPath(); // LogUtil.log(LogUtil.LogType.INDEX, "indexing: " + path);// this is a bit excessive iwriter.addDocument(makeDoc(volumeId, path)); synchronized (m_lock) { ++m_indexedDocs[0]; } } iwriter.commit(); iwriter.close(); synchronized (m_lock) { m_index_state = m_interrupt[0] ? INDEX_STATE.interrupted : INDEX_STATE.complete; m_totalDocs[0] = m_indexedDocs[0]; } } catch (Exception e) { LogUtil.logException(LogUtil.LogType.INDEX, e); m_error[0] = "IndexWriter addDocument exception"; } } }, INDEX_THREAD, STACK_SIZE); m_indexThread.setPriority(Thread.MAX_PRIORITY); m_indexThread.start(); } } else { // Indexing is complete // Get number of documents indexed try { Directory directory = FSDirectory.open(Paths.get(luceneDirPath)); DirectoryReader ireader = DirectoryReader.open(directory); synchronized (m_lock) { m_indexedDocs[0] = ireader.numDocs(); m_totalDocs[0] = m_indexedDocs[0]; m_index_state = INDEX_STATE.complete; } ireader.close(); directory.close(); } catch (IOException e) { LogUtil.logException(LogUtil.LogType.INDEX, e); } } JSONObject result = new JSONObject(); try { synchronized (m_lock) { result.put("index_state", m_index_state.toString()); result.put("error", m_error[0]); result.put("indexed_docs", m_indexedDocs[0]); result.put("total_docs", m_totalDocs[0]); // result.put("full_path", cacheDir.getAbsolutePath()); result.put("search_path", searchPath); } } catch (JSONException e) { e.printStackTrace(); } return result; }
From source file:com.nuvolect.deepdive.lucene.Search.java
/** * Return results for a search along a specific path. If the path is changed or new * create an index.//w ww . j av a 2s .com * @param searchQuery * @param searchPath * @return */ public static JSONObject search(String searchQuery, String volumeId, String searchPath) { JSONObject result = new JSONObject(); JSONArray jsonArray = new JSONArray(); Context ctx = App.getContext(); DirectoryReader ireader = null; ScoreDoc[] scoreDocs = null; String error = ""; preSearch(volumeId, searchPath); try { ireader = DirectoryReader.open(m_directory); } catch (IOException e) { LogUtil.logException(LogUtil.LogType.SEARCH, e); error += e.toString(); } IndexSearcher isearcher = new IndexSearcher(ireader); Query query = null; try { LogUtil.log(LogUtil.LogType.SEARCH, "query: " + searchQuery + ", vid: " + volumeId + ", path: " + searchPath); // Parse a simple query that searches for "text": QueryParser parser = new QueryParser(CConst.FIELD_CONTENT, m_analyzer); query = parser.parse(searchQuery); TopScoreDocCollector collector = TopScoreDocCollector.create(MAX_HITS); isearcher.search(query, collector); scoreDocs = collector.topDocs().scoreDocs; } catch (ParseException | IOException e) { LogUtil.logException(LogUtil.LogType.SEARCH, e); error += e.toString(); } // Iterate through the results creating an object for each file HashMap<String, Integer> hitCounts = new HashMap<>(); HashMap<String, Integer> hitIndexes = new HashMap<>(); /** * First iterate the hit list and count duplicates based on file path. */ for (int ii = 0; scoreDocs != null && ii < scoreDocs.length; ++ii) { Document hitDoc = null; int fileHits = 1; try { hitDoc = isearcher.doc(scoreDocs[ii].doc); Explanation explanation = isearcher.explain(query, scoreDocs[ii].doc); Explanation[] details = explanation.getDetails(); String description = details[0].getDescription(); /** * FIXME, find a better way to count hits in each file */ if (description.contains("=")) { String[] lineParts = description.split("="); String[] elementParts = lineParts[2].split(Pattern.quote(")")); if (elementParts.length > 0) { fileHits = ((int) Double.parseDouble(elementParts[0])); } } } catch (IOException e) { LogUtil.logException(LogUtil.LogType.SEARCH, e); error += e.toString(); } String filePath = hitDoc.get((CConst.FIELD_PATH)); if (hitCounts.containsKey(filePath)) { hitCounts.put(filePath, hitCounts.get(filePath) + fileHits); } else { hitCounts.put(filePath, fileHits); hitIndexes.put(filePath, ii); } } /** * Iterate over each unique hit and save the results */ for (Map.Entry<String, Integer> uniqueHit : hitIndexes.entrySet()) { Document hitDoc = null; try { hitDoc = isearcher.doc(scoreDocs[uniqueHit.getValue()].doc); } catch (IOException e) { LogUtil.logException(LogUtil.LogType.SEARCH, e); error += e.toString(); } String file_name = hitDoc.get((CConst.FIELD_FILENAME)); String file_path = hitDoc.get((CConst.FIELD_PATH)); try { String folder_url = OmniHash.getStartPathUrl(ctx, volumeId, file_path); JSONObject hitObj = new JSONObject(); hitObj.put("volume_id", volumeId); hitObj.put("file_path", file_path); hitObj.put("file_name", file_name); hitObj.put("folder_url", folder_url); hitObj.put("num_hits", hitCounts.get(file_path)); hitObj.put("error", error); jsonArray.put(hitObj); } catch (Exception e) { LogUtil.logException(LogUtil.LogType.SEARCH, e); } } int num_hits = scoreDocs != null ? scoreDocs.length : 0; try { result.put("hits", jsonArray != null ? jsonArray : new JSONArray()); result.put("num_hits", num_hits); result.put("error", error); ireader.close(); m_directory.close(); } catch (JSONException | IOException e) { LogUtil.logException(LogUtil.LogType.SEARCH, e); } return result; }
From source file:com.o19s.es.explore.ExplorerQueryTests.java
License:Apache License
@Before public void setupIndex() throws Exception { dir = new RAMDirectory(); try (IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER))) { for (int i = 0; i < docs.length; i++) { Document doc = new Document(); doc.add(new Field("_id", Integer.toString(i + 1), StoredField.TYPE)); doc.add(newTextField("text", docs[i], Field.Store.YES)); indexWriter.addDocument(doc); }//from w w w . j a v a 2 s.co m } reader = DirectoryReader.open(dir); searcher = new IndexSearcher(reader); }
From source file:com.o19s.es.ltr.logging.LoggingFetchSubPhaseTests.java
License:Apache License
@BeforeClass public static void init() throws Exception { directory = newDirectory(random());/* w ww. j a v a 2 s . c om*/ try (IndexWriter writer = new IndexWriter(directory, newIndexWriterConfig(new StandardAnalyzer()))) { int nDoc = TestUtil.nextInt(random(), 20, 100); docs = new HashMap<>(); for (int i = 0; i < nDoc; i++) { Document d = buildDoc(random().nextBoolean() ? "foo" : "bar", random().nextFloat()); writer.addDocument(d); if (random().nextInt(4) == 0) { writer.commit(); } docs.put(d.get("id"), d); } writer.commit(); } IndexReader reader = closeAfterSuite(DirectoryReader.open(directory)); searcher = new IndexSearcher(reader); }