List of usage examples for org.apache.lucene.index IndexReader close
@Override public final synchronized void close() throws IOException
From source file:com.parc.uplib.indexing.UpLibQueryParser.java
License:Open Source License
private static void remove(File index_file, String[] doc_ids, int start) { String number;//w w w.j a va 2 s. c o m String list; Term term; TermDocs matches; if (debug_mode) System.err.println("index file is " + index_file + " and it " + (index_file.exists() ? "exists." : "does not exist.")); try { if (index_file.exists() && (doc_ids.length > start)) { IndexReader reader = IndexReader.open(index_file); try { for (int i = start; i < doc_ids.length; i++) { term = new Term("id", doc_ids[i]); int deleted = reader.deleteDocuments(term); System.out.println("Deleted " + deleted + " existing instances of " + doc_ids[i]); } } finally { reader.close(); } } } catch (Exception e) { if (debug_mode) { e.printStackTrace(System.err); } else { System.out.println( "* LuceneIndexing 'remove' raised " + e.getClass() + " with message " + e.getMessage()); System.err.println( "LuceneIndexing 'remove': caught a " + e.getClass() + "\n with message: " + e.getMessage()); System.out.flush(); } System.exit(JAVA_EXCEPTION); } System.out.flush(); }
From source file:com.radialpoint.word2vec.lucene.SearchFiles.java
License:Open Source License
/** Simple command-line based search demo. */ public static void main(String[] args) throws Exception { String usage = "Usage:\tjava com.radialpoint.word2vec.lucene.SearchFiles [-index dir] [-vectors v] [-field f] [-repeat n] [-queries file] [-query string] [-raw] [-paging hitsPerPage]\n\nSee http://lucene.apache.org/core/4_1_0/demo/ for details."; if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) { System.out.println(usage); System.exit(0);//from w w w .j a va 2s . c om } String index = "index"; String field = "contents"; String queries = null; String vectors = "vectors"; int repeat = 0; boolean raw = false; String queryString = null; int hitsPerPage = 10; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { index = args[i + 1]; i++; } else if ("-field".equals(args[i])) { field = args[i + 1]; i++; } else if ("-vectors".equals(args[i])) { vectors = args[i + 1]; i++; } else if ("-queries".equals(args[i])) { queries = args[i + 1]; i++; } else if ("-query".equals(args[i])) { queryString = args[i + 1]; i++; } else if ("-repeat".equals(args[i])) { repeat = Integer.parseInt(args[i + 1]); i++; } else if ("-raw".equals(args[i])) { raw = true; } else if ("-paging".equals(args[i])) { hitsPerPage = Integer.parseInt(args[i + 1]); if (hitsPerPage <= 0) { System.err.println("There must be at least 1 hit per page."); System.exit(1); } i++; } } IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(index))); IndexSearcher searcher = new IndexSearcher(reader); // Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40); final File vectorsFile = new File(vectors); Analyzer analyzer = new Analyzer() { @SuppressWarnings("deprecation") @Override protected TokenStreamComponents createComponents(final String fieldName, final java.io.Reader reader) { final StandardTokenizer src = new StandardTokenizer(Version.LUCENE_40, reader); src.setMaxTokenLength(15); TokenStream tok = new StandardFilter(Version.LUCENE_40, src); tok = new LowerCaseFilter(Version.LUCENE_40, tok); tok = new StopFilter(Version.LUCENE_40, tok, StandardAnalyzer.STOP_WORDS_SET); TokenStream baseTok = tok; if (vectorsFile.exists()) { try { tok = new Word2VecFilter(tok, new QueryExpander(new Vectors(new FileInputStream(vectorsFile)), true, TermSelection.CUT_75_ABS), 3, false); } catch (IOException e) { e.printStackTrace(); tok = baseTok; } } return new TokenStreamComponents(src, tok) { @Override protected void setReader(final java.io.Reader reader) throws IOException { src.setMaxTokenLength(15); super.setReader(reader); } }; } }; BufferedReader in = null; if (queries != null) { in = new BufferedReader(new InputStreamReader(new FileInputStream(queries), "UTF-8")); } else { in = new BufferedReader(new InputStreamReader(System.in, "UTF-8")); } @SuppressWarnings("deprecation") QueryParser parser = new QueryParser(Version.LUCENE_40, field, analyzer); while (true) { if (queries == null && queryString == null) { // prompt the user System.out.println("Enter query: "); } String line = queryString != null ? queryString : in.readLine(); if (line == null || line.length() == -1) { break; } line = line.trim(); if (line.length() == 0) { break; } Query query = parser.parse(line); System.out.println("Searching for: " + query.toString(field)); if (repeat > 0) { // repeat & time as benchmark Date start = new Date(); for (int i = 0; i < repeat; i++) { searcher.search(query, null, 100); } Date end = new Date(); System.out.println("Time: " + (end.getTime() - start.getTime()) + "ms"); } doPagingSearch(in, searcher, query, hitsPerPage, raw, queries == null && queryString == null); if (queryString != null) { break; } } reader.close(); }
From source file:com.redhat.satellite.search.index.IndexManager.java
License:Open Source License
/** * Query a index/* w ww . java2 s.co m*/ * * @param indexName name of the index * @param query search query * @param lang language * @param isFineGrained * true: will limit results, less are returned but they are closer * to the search query, useful for advanced/free form queries * * false: will allow queries to be more flexible returning words * which are spelled similarly * * @return list of hits * @throws IndexingException if there is a problem indexing the content. * @throws QueryParseException */ public List<Result> search(String indexName, String query, String lang, boolean isFineGrained) throws IndexingException, QueryParseException { IndexSearcher searcher = null; IndexReader reader = null; List<Result> retval = null; try { reader = getIndexReader(indexName, lang); searcher = getIndexSearcher(indexName, lang); QueryParser qp = getQueryParser(indexName, lang, isFineGrained); Query q = qp.parse(query); if (log.isDebugEnabled()) { log.debug("Original query was: " + query); log.debug("Parsed Query is: " + q.toString()); } Hits hits = searcher.search(q); if (log.isDebugEnabled()) { log.debug(hits.length() + " results were found."); } Set<Term> queryTerms = null; try { queryTerms = new HashSet<Term>(); Query newQ = q.rewrite(reader); newQ.extractTerms(queryTerms); } catch (Exception e) { e.printStackTrace(); throw new QueryParseException(e); } retval = processHits(indexName, hits, queryTerms, query, lang); if (explainResults) { debugExplainResults(indexName, hits, searcher, q, queryTerms); } } catch (IOException e) { // this exception is thrown, when there're no packages or errata on the system // and the user performs a search // if this is the case, just return 0 results, otherwise rethrow the exception if (!e.getMessage().contains( "no segments* file found in org.apache.lucene.store.FSDirectory@/var/lib/rhn/search/indexes")) { throw new IndexingException(e); } log.error(e.getMessage()); retval = new ArrayList<Result>(); } catch (ParseException e) { throw new QueryParseException("Could not parse query: '" + query + "'"); } finally { try { if (searcher != null) { searcher.close(); } if (reader != null) { reader.close(); } } catch (IOException ex) { throw new IndexingException(ex); } } return retval; }
From source file:com.redhat.satellite.search.index.IndexManager.java
License:Open Source License
/** * @param indexName/*from ww w. j a va 2 s. com*/ * @param doc document with data to index * @param uniqueField field in doc which identifies this uniquely * @param lang language * @throws IndexingException */ public void addUniqueToIndex(String indexName, Document doc, String uniqueField, String lang) throws IndexingException { IndexReader reader = null; int numFound = 0; try { reader = getIndexReader(indexName, lang); Term term = new Term(uniqueField, doc.get(uniqueField)); numFound = reader.docFreq(term); } catch (FileNotFoundException e) { // Index doesn't exist, so this add will be unique // we don't need to do anything/ } catch (IOException e) { throw new IndexingException(e); } finally { if (reader != null) { try { reader.close(); } catch (IOException e) { // } } } if (numFound > 0) { log.info("Found " + numFound + " <" + indexName + " docs for " + uniqueField + ":" + doc.get(uniqueField) + " will remove them now."); removeFromIndex(indexName, uniqueField, doc.get(uniqueField)); } addToIndex(indexName, doc, lang); }
From source file:com.redhat.satellite.search.index.IndexManager.java
License:Open Source License
/** * Remove a document from an index/*w ww. ja va 2s .c o m*/ * * @param indexName index to use * @param uniqueField field name which represents this data's unique id * @param objectId unique document id * @throws IndexingException something went wrong removing the document */ public void removeFromIndex(String indexName, String uniqueField, String objectId) throws IndexingException { log.info("Removing <" + indexName + "> " + uniqueField + ":" + objectId); Term t = new Term(uniqueField, objectId); IndexReader reader; try { reader = getIndexReader(indexName, IndexHandler.DEFAULT_LANG); try { reader.deleteDocuments(t); reader.flush(); } finally { if (reader != null) { reader.close(); } } } catch (CorruptIndexException e) { throw new IndexingException(e); } catch (IOException e) { throw new IndexingException(e); } }
From source file:com.redhat.satellite.search.index.IndexManager.java
License:Open Source License
/** * Removes any documents which are not related to the passed in Set of good value * @param ids Set of ids of all known/good values * @param indexName index name to operate on * @param uniqField the name of the field in the Document to uniquely identify * this record//from ww w . j ava2 s .co m * @return the number of documents deleted */ public int deleteRecordsNotInList(Set<String> ids, String indexName, String uniqField) { int count = 0; IndexReader reader = null; try { reader = getIndexReader(indexName, IndexHandler.DEFAULT_LANG); // Use maxDoc() to iterate over all docs, numDocs() returns the // number of currently alive docs leaving out the deleted ones. int maxDoc = reader.maxDoc(); for (int i = 0; i < maxDoc; i++) { if (!reader.isDeleted(i)) { Document doc = reader.document(i); String uniqId = doc.getField(uniqField).stringValue(); if (!ids.contains(uniqId)) { log.info(indexName + ":" + uniqField + ": <" + uniqId + "> not found in list of current/good values " + "assuming this has been deleted from Database and we " + "should remove it."); removeFromIndex(indexName, uniqField, uniqId); count++; } } } } catch (IOException e) { e.printStackTrace(); log.info("deleteRecordsNotInList() caught exception : " + e); } catch (IndexingException e) { e.printStackTrace(); log.info("deleteRecordsNotInList() caught exception : " + e); } finally { if (reader != null) { try { reader.close(); } catch (IOException e) { // } } } return count; }
From source file:com.revorg.goat.Document.java
License:Open Source License
public static String deleteDBDocument(String indexPath, String primaryKey) { try {/*from w w w. j a va2 s . c o m*/ XMLReader readerXML = new XMLReader(); //XML Reader Class String configFile = ConfigFiles.getSchemaFile(indexPath); String[] indexTypeArray = new String[Integer.parseInt(readerXML.getTotalNodes(configFile))]; String[] columnNamesArray = new String[Integer.parseInt(readerXML.getTotalNodes(configFile))]; int totalNodes = columnNamesArray.length; String fieldName = ""; //Get Column Names for (int i = 0; i < totalNodes; i++) { columnNamesArray[i] = readerXML.getNodeValueByFile(configFile, i, "columnname"); indexTypeArray[i] = readerXML.getNodeValueByFile(configFile, i, "indextype"); //System.out.println(indexTypeArray[i] + " " + columnNamesArray[i]); if (indexTypeArray[i].equalsIgnoreCase("primarykey")) { fieldName = columnNamesArray[i]; break; } } int totalDeleted = 0; int totalTerms = 0; int deleted = 0; Directory directory = FSDirectory.getDirectory(indexPath); IndexReader reader = IndexReader.open(directory); StringTokenizer tokenizer = new StringTokenizer(primaryKey, ","); String nextTerm; Term term = new Term("goat", "goat"); while (tokenizer.hasMoreTokens()) { totalTerms++; //Total Terms nextTerm = tokenizer.nextToken().trim(); term = new Term(fieldName, nextTerm); deleted = reader.deleteDocuments(term); if (deleted != 0) { totalDeleted++; } } reader.close(); directory.close(); if (totalDeleted > 0) { return "Success: Total of " + totalDeleted + "/" + totalTerms + " documents deleted"; } else { return "Failure: No Documents Match the term " + term; } } catch (Exception e) { ActionResultError = " caught a " + e.getClass() + " with message: " + e.getMessage(); System.out.println("Failure to count index: " + indexPath); } ActionResult = "Failure"; return ActionResult + ActionResultError; }
From source file:com.revorg.goat.Document.java
License:Open Source License
/** * Returns the list of fields for a particular Document. * @param indexPath Directory that contains the Lucene Collection * @throws Exception//from w w w .j a v a2 s . c o m * @return ActionResult */ public static List getDocumentFields(String indexPath) { //Assign Document to Lucene Document org.apache.lucene.document.Document doc = new org.apache.lucene.document.Document(); try { IndexReader reader = IndexReader.open(indexPath); doc = reader.document(0); reader.close(); List AllTheFields = doc.getFields(); return AllTheFields; } catch (Exception e) { ActionResultError = " caught a " + e.getClass() + " with message: " + e.getMessage(); System.out.println("Failure on getDocumentFields "); } ActionResult = "Failure"; return new LinkedList(); }
From source file:com.revorg.goat.IndexManager.java
License:Open Source License
/** * unregisters a collection and deletes its directories. * * @param indexPath Directory that contains the Lucene Collection * @throws Exception/*www. j a v a2 s .c om*/ * @return ActionResult */ public static String deleteIndex(String indexPath) { //Cast To Directory File theDir = new File(indexPath); //Convert String To File try { IndexReader reader = IndexReader.open(indexPath); boolean indexExists = reader.indexExists(indexPath); reader.close(); //Close Index //if (theDir.exists() != stringFails) if (theDir.exists()) { FileUtils.deleteDirectory(theDir); ActionResult = "Success"; return ActionResult; } else { ActionResult = "Failure to delete index: " + indexPath; return ActionResult; } } catch (Exception e) { ActionResultError = " caught a " + e.getClass() + " with message: " + e.getMessage(); System.out.println("Failure to delete index: " + indexPath); } ActionResult = "Failure"; return ActionResult + ActionResultError; }
From source file:com.revorg.goat.IndexManager.java
License:Open Source License
/** * Gets the Index Version.//from www. java 2 s. c o m * * @param indexPath Directory that contains the Lucene Collection * @throws Exception * @return ActionResult */ public static String getIndexVersion(String indexPath) { try { IndexReader reader = IndexReader.open(indexPath); ActionResult = Long.toString(reader.getVersion()); reader.close(); return ActionResult; } catch (Exception e) { ActionResultError = " caught a " + e.getClass() + " with message: " + e.getMessage(); System.out.println("Failure to optimize index: " + indexPath); } ActionResult = "Failure"; return ActionResult + ActionResultError; }