List of usage examples for org.apache.lucene.index IndexReader close
@Override public final synchronized void close() throws IOException
From source file:coreservlets.consolesearch.java
License:Apache License
/** Simple command-line based search demo. */ public static void main(String[] args) throws Exception { String usage = "Usage:\tjava org.apache.lucene.demo.SearchFiles [-index dir] [-field f] [-repeat n] [-queries file] [-query string] [-raw] [-paging hitsPerPage]\n\nSee http://lucene.apache.org/core/4_1_0/demo/ for details."; if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) { System.out.println(usage); System.exit(0);// ww w .j av a 2 s. co m } String index = "index"; String queries = null; int repeat = 0; boolean raw = false; String queryString = null; int hitsPerPage = 10; String[] fields = { "title", "description", "keywords", "contents" }; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { index = args[i + 1]; i++; } /*else if ("-field".equals(args[i])) { fields = field.args[i+1]; i++; } */else if ("-queries".equals(args[i])) { queries = args[i + 1]; i++; } else if ("-query".equals(args[i])) { queryString = args[i + 1]; i++; } else if ("-repeat".equals(args[i])) { repeat = Integer.parseInt(args[i + 1]); i++; } else if ("-raw".equals(args[i])) { raw = true; } else if ("-paging".equals(args[i])) { hitsPerPage = Integer.parseInt(args[i + 1]); if (hitsPerPage <= 0) { System.err.println("There must be at least 1 hit per page."); System.exit(1); } i++; } } System.out.println(System.getenv()); IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(index))); IndexSearcher searcher = new IndexSearcher(reader); //Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40); Analyzer analyzer = new ICTCLASAnalyzer(); MultiFieldQueryParser mp = new MultiFieldQueryParser(Version.LUCENE_44, fields, analyzer); BufferedReader in = null; if (queries != null) { in = new BufferedReader(new InputStreamReader(new FileInputStream(queries), "UTF-8")); } else { in = new BufferedReader(new InputStreamReader(System.in, "UTF-8")); } // QueryParser parser = new QueryParser(Version.LUCENE_40, field, analyzer); while (true) { if (queries == null && queryString == null) { // prompt the user System.out.println("Enter query: "); } String line = queryString != null ? queryString : in.readLine(); if (line == null || line.length() == -1) { break; } line = line.trim(); if (line.length() == 0) { break; } System.out.println(line); Query query = mp.parse(line); System.out.println("Searching for: " + query.toString()); if (repeat > 0) { // repeat & time as benchmark Date start = new Date(); for (int i = 0; i < repeat; i++) { searcher.search(query, null, 100); } Date end = new Date(); System.out.println("Time: " + (end.getTime() - start.getTime()) + "ms"); } doPagingSearch(in, searcher, query, hitsPerPage, raw, queries == null && queryString == null); if (queryString != null) { break; } } reader.close(); }
From source file:cz.muni.fi.japanesedictionary.engine.FragmentListAsyncTask.java
License:Open Source License
/** * Loads translation using Lucene/*from ww w. ja v a 2 s . com*/ */ @Override protected List<Translation> doInBackground(String... params) { String expression = params[0]; String part = params[1]; SharedPreferences settings = mContext.getSharedPreferences(ParserService.DICTIONARY_PREFERENCES, 0); String pathToDictionary = settings.getString(Const.PREF_JMDICT_PATH, null); SharedPreferences sharedPrefs = PreferenceManager.getDefaultSharedPreferences(mContext); final boolean englishBool = sharedPrefs.getBoolean("language_english", false); final boolean frenchBool = sharedPrefs.getBoolean("language_french", false); final boolean dutchBool = sharedPrefs.getBoolean("language_dutch", false); final boolean germanBool = sharedPrefs.getBoolean("language_german", false); final boolean russianBool = sharedPrefs.getBoolean("language_russian", false); final boolean searchOnlyFavorised = sharedPrefs.getBoolean("search_only_favorite", false); final boolean searchDeinflected = sharedPrefs.getBoolean("search_deinflected", false); final List<Translation> translations = new ArrayList<>(); if (expression == null) { // first run Log.i(LOG_TAG, "First run - last 10 translations "); GlossaryReaderContract database = new GlossaryReaderContract(mContext); List<Translation> translationsTemp = database.getLastTranslations(10); database.close(); return translationsTemp; } if (pathToDictionary == null) { Log.e(LOG_TAG, "No path to jmdict dictionary"); return null; } File file = new File(pathToDictionary); if (!file.exists() || !file.canRead()) { Log.e(LOG_TAG, "Can't read jmdict dictionary directory"); return null; } if (expression.length() < 1) { Log.w(LOG_TAG, "No expression to translate"); return null; } Analyzer analyzer = new CJKAnalyzer(Version.LUCENE_36); IndexReader reader; try { final String search; final String hiragana; boolean onlyReb = false; if (Pattern.matches("\\p{Latin}*", expression)) { // only romaji onlyReb = true; Log.i(LOG_TAG, "Only latin letters, converting to hiragana. "); expression = TranscriptionConverter.kunreiToHepburn(expression); expression = RomanizationEnum.Hepburn.toHiragana(expression); } hiragana = expression; expression = insertSpaces(expression); switch (part) { case "end": search = "\"" + expression + "lucenematch\""; break; case "beginning": search = "\"lucenematch " + expression + "\""; break; case "middle": search = "\"" + expression + "\""; break; default: if (searchDeinflected) { StringBuilder sb = new StringBuilder("\"lucenematch " + expression + "lucenematch\""); for (Predicate predicate : Deconjugator.deconjugate(hiragana)) { if (predicate.isSuru()) { sb.append(" OR ").append("(\"lucenematch ") .append(insertSpaces(predicate.getPredicate())) .append("lucenematch\" AND (pos:vs OR pos:vs-c OR pos:vs-s OR pos:vs-i))"); } else if (predicate.isKuru()) { sb.append(" OR ").append("(\"lucenematch ") .append(insertSpaces(predicate.getPredicate())) .append("lucenematch\" AND pos:vk)"); } else if (predicate.isIku()) { sb.append(" OR ").append("(\"lucenematch ") .append(insertSpaces(predicate.getPredicate())) .append("lucenematch\" AND pos:v5k-s)"); } else if (predicate.isIAdjective()) { sb.append(" OR ").append("(\"lucenematch ") .append(insertSpaces(predicate.getPredicate())) .append("lucenematch\" AND pos:adj-i)"); } else sb.append(" OR ").append("(\"lucenematch ") .append(insertSpaces(predicate.getPredicate())) .append("lucenematch\" AND (pos:v1 OR pos:v2 OR pos:v5 OR pos:vz OR pos:vi OR pos:vn OR pos:vr))"); } search = sb.toString(); } else { search = "\"lucenematch " + expression + "lucenematch\""; } } Log.i(LOG_TAG, " Searching for: " + search); Query q; if (onlyReb) { q = (new QueryParser(Version.LUCENE_36, "index_japanese_reb", analyzer)).parse(search); } else { StandardQueryParser parser = new StandardQueryParser(analyzer); q = parser.parse(search, "japanese"); } Directory dir = FSDirectory.open(file); reader = IndexReader.open(dir); final IndexSearcher searcher = new IndexSearcher(reader); Collector collector = new Collector() { int max = 1000; int count = 0; private int docBase; @Override public boolean acceptsDocsOutOfOrder() { return true; } @Override public void collect(int docID) throws IOException { Document d = searcher.doc(docID + docBase); Translation translation = new Translation(); String prioritized = d.get("prioritized"); if (searchOnlyFavorised && prioritized == null) { return; } if (prioritized != null) { //is prioritized translation.setPrioritized(true); } String ruby = d.get("ruby"); if (ruby != null && ruby.length() > 0) { translation.setRuby(ruby); } String japanese_keb = d.get("japanese_keb"); if (japanese_keb != null && japanese_keb.length() != 0) { translation.parseJapaneseKeb(japanese_keb); } String japanese_reb = d.get("japanese_reb"); if (japanese_reb != null && japanese_reb.length() != 0) { translation.parseJapaneseReb(japanese_reb); } String english = d.get("english"); if (english != null && english.length() != 0) { translation.parseEnglish(english); } String french = d.get("french"); if (french != null && french.length() != 0) { translation.parseFrench(french); } String dutch = d.get("dutch"); if (dutch != null && dutch.length() != 0) { translation.parseDutch(dutch); } String german = d.get("german"); if (german != null && german.length() != 0) { translation.parseGerman(german); } String russian = d.get("russian"); if (russian != null && russian.length() != 0) { translation.parseRussian(russian); } if ((englishBool && translation.getEnglishSense() != null) || (dutchBool && translation.getDutchSense() != null) || (germanBool && translation.getGermanSense() != null) || (frenchBool && translation.getFrenchSense() != null) || (russianBool && translation.getRussianSense() != null)) { count++; if (count < max) { if (!FragmentListAsyncTask.this.isCancelled()) { FragmentListAsyncTask.this.publishProgress(translation); translations.add(translation); } else { translations.clear(); throw new IOException("Loader canceled"); } } else { throw new IOException("Max exceeded"); } } } @Override public void setNextReader(IndexReader reader, int docBas) throws IOException { docBase = docBas; } @Override public void setScorer(Scorer arg0) throws IOException { } }; searcher.search(q, collector); reader.close(); } catch (IOException ex) { Log.e(LOG_TAG, "IO Exception: " + ex.toString()); return translations; } catch (Exception ex) { Log.e(LOG_TAG, "Exception: " + ex.toString()); return null; } return translations.isEmpty() ? null : translations; }
From source file:de.dkt.eservices.elucene.indexmanagement.SearchFiles.java
License:Apache License
/** * Searches a query against a field of an index and return hitsToReturn documents. * @param index index where to search for the query text * @param field document field against what to match the query * @param queryString text of the input query * @param hitsToReturn number of documents to be returned * @return JSON format string containing the results information and content * @throws ExternalServiceFailedException *///from w ww.ja va 2 s.c om public static JSONObject search(String index, String sFields, String sAnalyzers, String queryType, String queryString, String language, int hitsToReturn) throws ExternalServiceFailedException { try { // System.out.println(index+"__"+sFields+"__"+sAnalyzers+"__"+queryType+"__"+language+"__"+hitsToReturn); // System.out.println(indexDirectory); Date start = new Date(); File f = FileFactory.generateFileInstance(indexDirectory + index); if (f == null || !f.exists()) { throw new ExternalServiceFailedException( "Specified index [" + indexDirectory + index + "] does not exists."); } logger.info("Searching in folder: " + f.getAbsolutePath()); Directory dir = FSDirectory.open(f); IndexReader reader = DirectoryReader.open(dir); IndexSearcher searcher = new IndexSearcher(reader); // System.out.println(reader.docFreq(new Term("content", "madrid"))); Document doc = reader.document(0); // System.out.println(reader.numDocs()); // System.out.println(doc); String[] fields = sFields.split(";"); String[] analyzers = sAnalyzers.split(";"); if (fields.length != analyzers.length) { logger.error("The number of fields and analyzers is different"); throw new BadRequestException("The number of fields and analyzers is different"); } //System.out.println("CHECK IF THE QUERY IS WORKING PROPERLY: "+queryString); Query query = OwnQueryParser.parseQuery(queryType, queryString, fields, analyzers, language); //System.out.println("\t QUERY: "+query); TopDocs results = searcher.search(query, hitsToReturn); Explanation exp = searcher.explain(query, 0); // System.out.println("EXPLANATION: "+exp); // System.out.println("TOTAL HITS: " + results.totalHits); Date end = new Date(); logger.info("Time: " + (end.getTime() - start.getTime()) + "ms"); // System.out.println("Time: "+(end.getTime()-start.getTime())+"ms"); JSONObject resultModel = JSONLuceneResultConverter.convertResults(query, searcher, results); reader.close(); return resultModel; } catch (IOException e) { e.printStackTrace(); throw new ExternalServiceFailedException("IOException with message: " + e.getMessage()); } }
From source file:de.elbe5.cms.search.SearchBean.java
License:Open Source License
public void searchContent(ContentSearchResultData result) { result.getResults().clear();/* ww w . ja va 2 s. c o m*/ String[] fieldNames = result.getFieldNames(); ScoreDoc[] hits = null; float maxScore = 0f; try { String indexPath = ApplicationPath.getAppPath() + "contentindex"; ensureDirectory(indexPath); IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(indexPath))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(); MultiFieldQueryParser parser = new MultiFieldQueryParser(fieldNames, analyzer); String pattern = result.getPattern(); pattern = pattern.trim(); Query query = null; if (pattern.length() != 0) { query = parser.parse(pattern); //Log.log("Searching for: " + query.toString()); TopDocs topDocs = searcher.search(query, result.getMaxSearchResults()); hits = topDocs.scoreDocs; maxScore = topDocs.getMaxScore(); } if (hits != null) { for (ScoreDoc hit : hits) { Document doc = searcher.doc(hit.doc); ContentSearchData data = null; String type = doc.get("type"); switch (type) { case SiteSearchData.TYPE: data = new SiteSearchData(); break; case PageSearchData.TYPE: data = new PageSearchData(); break; case FileSearchData.TYPE: data = new FileSearchData(); break; } assert (data != null); data.setDoc(doc); data.setScore(maxScore <= 1f ? hit.score : hit.score / maxScore); data.evaluateDoc(); data.setContexts(query, analyzer); result.getResults().add(data); } } reader.close(); } catch (Exception ignore) { } }
From source file:de.elbe5.cms.search.SearchBean.java
License:Open Source License
public void searchUsers(UserSearchResultData result) { result.getResults().clear();/*ww w . j ava 2 s .co m*/ String[] fieldNames = result.getFieldNames(); ScoreDoc[] hits = null; float maxScore = 0f; try { String indexPath = ApplicationPath.getAppPath() + "userindex"; ensureDirectory(indexPath); IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(indexPath))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(); MultiFieldQueryParser parser = new MultiFieldQueryParser(fieldNames, analyzer); String pattern = result.getPattern(); pattern = pattern.trim(); Query query = null; if (pattern.length() != 0) { query = parser.parse(pattern); //Log.log("Searching for: " + query.toString()); TopDocs topDocs = searcher.search(query, result.getMaxSearchResults()); hits = topDocs.scoreDocs; maxScore = topDocs.getMaxScore(); } if (hits != null) { for (ScoreDoc hit : hits) { Document doc = searcher.doc(hit.doc); UserSearchData data = new UserSearchData(); data.setDoc(doc); data.setScore(maxScore <= 1f ? hit.score : hit.score / maxScore); data.evaluateDoc(); data.setContexts(query, analyzer); result.getResults().add(data); } } reader.close(); } catch (Exception ignore) { } }
From source file:de.hsmannheim.ss15.alr.searchengine.DefaultLuceneController.java
public List<StoredDocument> doSearch(String queryString) throws IOException, ParseException { String field = "contents"; String queries = null;/*from w ww.ja v a 2 s . c o m*/ boolean raw = false; int hitsPerPage = 10; IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(indexDir))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(); BufferedReader in = null; if (queries != null) { in = Files.newBufferedReader(Paths.get(queries), StandardCharsets.UTF_8); } else { in = new BufferedReader(new InputStreamReader(System.in, StandardCharsets.UTF_8)); } QueryParser parser = new QueryParser(field, analyzer); Query query = parser.parse(queryString); Highlighter highlighter = new Highlighter(new QueryScorer(query)); TotalHitCountCollector collector = new TotalHitCountCollector(); searcher.search(query, collector); TopDocs topDocs = searcher.search(query, Math.max(1, collector.getTotalHits())); List<StoredDocument> results = new ArrayList<>(); for (ScoreDoc scoreDoc : topDocs.scoreDocs) { StoredDocument doc = searcher.doc(scoreDoc.doc); try { File file = new File(doc.get("path")); BufferedReader docReader = new BufferedReader( new InputStreamReader(Files.newInputStream(file.toPath()), StandardCharsets.UTF_8)); List<String> lines = new ArrayList<>(); while (docReader.ready()) { lines.add(docReader.readLine()); } lines.remove(0); lines.remove(0); lines.remove(0); String content = ""; for (String s : lines) { content = content + s; } String highLight = highlighter.getBestFragment(analyzer, null, content); if (highLight == null) { LOGGER.warn("No Highlight found"); } else { doc.add(new TextField("highlight", highLight, Field.Store.YES)); } } catch (InvalidTokenOffsetsException ex) { LOGGER.warn("No Highlight found"); } results.add(doc); } reader.close(); return results; }
From source file:de.hybris.platform.lucenesearch.jalo.LuceneTest.java
License:Open Source License
private void assertTermSearch(final Collection documents, final String term) throws IOException { final Query query = new TermQuery(new Term("text", term)); final IndexReader reader = IndexReader.open(directory); final IndexSearcher searcher = new IndexSearcher(reader); final TopDocs hits = searcher.search(query, Integer.MAX_VALUE); assertHits(documents, hits, searcher); reader.close(); }
From source file:de.ingrid.interfaces.csw.index.impl.IngridGeoTKLuceneIndexer.java
License:EUPL
/** * This method remove documents identified by query from the index. * //from w w w.j a v a 2 s .com * @param query * @throws ParseException */ public List<String> removeDocumentByQuery(final String queryString) throws ParseException { List<String> deletedRecords = new ArrayList<String>(); try { final QueryParser parser = new QueryParser(Version.LUCENE_36, "anytext", analyzer); Query query = parser.parse(queryString); final IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_36, analyzer); final IndexWriter writer = new IndexWriter(LuceneUtils.getAppropriateDirectory(getFileDirectory()), config); LOGGER.log(logLevel, "Query:{0}", query); IndexReader reader = IndexReader.open(writer, false); IndexSearcher searcher = new IndexSearcher(reader); TopDocs docs = searcher.search(query, Integer.MAX_VALUE); for (ScoreDoc doc : docs.scoreDocs) { deletedRecords.add(reader.document(doc.doc).get("id")); } writer.deleteDocuments(query); writer.commit(); searcher.close(); reader.close(); writer.close(); } catch (CorruptIndexException ex) { LOGGER.log(Level.WARNING, "CorruptIndexException while indexing document: " + ex.getMessage(), ex); } catch (IOException ex) { LOGGER.log(Level.WARNING, "IOException while indexing document: " + ex.getMessage(), ex); } return deletedRecords; }
From source file:de.innovationgate.wgpublisher.lucene.LuceneManager.java
License:Open Source License
public List<LuceneTerm> getTerms(String fieldname) throws CorruptIndexException, IOException { IndexReader reader = IndexReader.open(_indexDirectory); TermEnum terms = reader.terms();//from w w w .java 2 s . co m List<LuceneTerm> uniqueTerms = new ArrayList<LuceneTerm>(); while (terms.next()) { Term term = terms.term(); if (term.field().equals(fieldname)) { uniqueTerms.add(new LuceneTerm(term.text(), terms.docFreq())); } } reader.close(); return uniqueTerms; }
From source file:de.linguatools.disco.DISCO.java
License:Apache License
/*************************************************************************** * Run trough all documents (i.e. queryable words) in the index, and retrieve * the word and its frequency. Write both informations to the file named * outputFileName. This method can be used to check index integrity.<br/> * @param outputFileName/*w ww .j a va 2 s . c om*/ * @return number of words written to the output file. In case of success the * value is equal to the number of words in the index. */ public int wordFrequencyList(String outputFileName) { // erzeuge einen IndexReader fuer das indexDir IndexReader ir = null; try { if (indexRAM != null) { ir = IndexReader.open(indexRAM); } else { ir = IndexReader.open(FSDirectory.open(new File(indexName))); } } catch (CorruptIndexException ex) { System.out.println(DISCO.class.getName() + ": " + ex); return -1; } catch (IOException ex) { System.out.println(DISCO.class.getName() + ": " + ex); return -1; } // Hole Anzahl Dokumente im Index int N = ir.numDocs(); // ffne Ausgabedatei FileWriter fw; try { fw = new FileWriter(outputFileName); } catch (IOException ex) { System.out.println(DISCO.class.getName() + ": " + ex); return -1; } // durchlaufe alle Dokumente int corrupt = 0; int ioerror = 0; int i = 0; for (i = 0; i < N; i++) { Document doc = null; try { doc = ir.document(i); } catch (CorruptIndexException ex) { corrupt++; continue; } catch (IOException ex) { ioerror++; continue; } // Wort Nr. i holen String word = doc.get("word"); // Frequenz von Wort i holen int f = Integer.parseInt(doc.get("freq")); try { // Wort und Frequenz in Ausgabe schreiben fw.write(word + "\t" + f + "\n"); } catch (IOException ex) { System.out.println(DISCO.class.getName() + ": word " + i + ": " + ex); return i; } // Info ausgeben if (i % 100 == 0) { System.out.print("\r" + i); } } System.out.println(); if (corrupt > 0 || ioerror > 0) { int e = corrupt + ioerror; System.out.println("*** WARNING! ***"); System.out.println("The language data packet \"" + indexName + "\" " + "has " + e + " defect entries (" + corrupt + " corrupt, " + ioerror + " IO errors)"); System.out.println("All functioning words have been written to " + outputFileName); } // aufrumen try { fw.close(); ir.close(); } catch (IOException ex) { System.out.println(DISCO.class.getName() + ": " + ex); return -1; } return (i - corrupt - ioerror); }