List of usage examples for org.apache.lucene.index IndexReader close
@Override public final synchronized void close() throws IOException
From source file:edu.coeia.tasks.ExtensionFrequencyTask.java
License:Open Source License
private int getFactor() throws IOException { Directory directory = FSDirectory.open(new File(this.caseFacade.getCaseIndexFolderLocation())); IndexReader indexReader = IndexReader.open(directory); int factor = indexReader.maxDoc() / 200; indexReader.close(); return factor; }
From source file:edu.coeia.tasks.MultimediaLoadingTask.java
License:Open Source License
private void loadItems() throws IOException { String indexDir = this.caseFacade.getCaseIndexFolderLocation(); Directory dir = FSDirectory.open(new File(indexDir)); IndexReader indexReader = IndexReader.open(dir); for (int i = 0; i < indexReader.maxDoc(); i++) { if (this.isCancelledTask()) break; Document document = indexReader.document(i); if (document != null) { Field field = document.getField(IndexingConstant.FILE_MIME); if (field != null && field.stringValue() != null) { String documentExtension = field.stringValue(); final StringBuilder fullpath = new StringBuilder(); if (type == MultimediaViewerPanel.TYPE.IMAGE && isImage(documentExtension)) { fullpath.append(this.caseFacade.getFullPath(document.get(IndexingConstant.FILE_PATH))); } else if (type == MultimediaViewerPanel.TYPE.AUDIO && isAudio(documentExtension)) { fullpath.append(this.caseFacade.getFullPath(document.get(IndexingConstant.FILE_PATH))); } else if (type == MultimediaViewerPanel.TYPE.ARCHIVE && isArchieve(documentExtension)) { fullpath.append(this.caseFacade.getFullPath(document.get(IndexingConstant.FILE_PATH))); } else if (type == MultimediaViewerPanel.TYPE.VIDEO && isVideo(documentExtension)) { fullpath.append(this.caseFacade.getFullPath(document.get(IndexingConstant.FILE_PATH))); }//from w w w. ja va2 s.c o m if (!fullpath.toString().isEmpty()) { EventQueue.invokeLater(new Runnable() { @Override public void run() { File file = new File(fullpath.toString()); Object[] data = { file.getAbsolutePath(), FileUtil.getExtension(file), file.lastModified(), file.isHidden(), file.length() }; JTableUtil.addRowToJTable(panel.getTable(), data); } }); } } } } indexReader.close(); }
From source file:edu.cuhk.hccl.cmd.AppSearchEngine.java
License:Apache License
public static void main(String[] args) throws IOException { // Get parameters CommandLineParser parser = new BasicParser(); Options options = createOptions();/*from ww w . j a v a 2 s . co m*/ File dataFolder = null; String queryStr = null; int topK = 0; File resultFile = null; String queryType = null; File similarityFile = null; try { CommandLine line = parser.parse(options, args); dataFolder = new File(line.getOptionValue('d')); queryStr = line.getOptionValue('q'); queryType = line.getOptionValue('t'); topK = Integer.parseInt(line.getOptionValue('k')); resultFile = new File(line.getOptionValue('f')); similarityFile = new File(line.getOptionValue('s')); if (line.hasOption('m')) { String modelPath = line.getOptionValue('m'); if (queryType.equalsIgnoreCase("WordVector")) { expander = new WordVectorExpander(modelPath); } else if (queryType.equalsIgnoreCase("WordNet")) { expander = new WordNetExpander(modelPath); } else { System.out.println("Please choose a correct expander: WordNet or WordVector!"); System.exit(-1); } } } catch (ParseException exp) { System.out.println("Error in parameters: \n" + exp.getMessage()); System.exit(-1); } // Create Index StandardAnalyzer analyzer = new StandardAnalyzer(); Directory index = createIndex(dataFolder, analyzer); // Build query Query query = buildQuery(analyzer, queryStr, queryType); // Search index for topK hits IndexReader reader = DirectoryReader.open(index); IndexSearcher searcher = new IndexSearcher(reader); TopScoreDocCollector collector = TopScoreDocCollector.create(topK, true); searcher.search(query, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; // Show search results System.out.println("\n[INFO] " + hits.length + " hits were returned:"); List<String> hitLines = new ArrayList<String>(); for (int i = 0; i < hits.length; i++) { int docId = hits[i].doc; Document d = searcher.doc(docId); String line = (i + 1) + "\t" + d.get(PATH_FIELD) + "\t" + hits[i].score; System.out.println(line); hitLines.add(line); } // Compute cosine similarity between documents List<String> simLines = new ArrayList<String>(); for (int m = 0; m < hits.length; m++) { int doc1 = hits[m].doc; Terms terms1 = reader.getTermVector(doc1, CONTENT_FIELD); for (int n = m + 1; n < hits.length; n++) { int doc2 = hits[n].doc; Terms terms2 = reader.getTermVector(doc2, CONTENT_FIELD); CosineDocumentSimilarity cosine = new CosineDocumentSimilarity(terms1, terms2); double similarity = cosine.getCosineSimilarity(); String line = searcher.doc(doc1).get(PATH_FIELD) + "\t" + searcher.doc(doc2).get(PATH_FIELD) + "\t" + similarity; simLines.add(line); } } // Release resources reader.close(); if (expander != null) { expander.close(); } // Save search results System.out.println("\n[INFO] Search results are saved in file: " + resultFile.getPath()); FileUtils.writeLines(resultFile, hitLines, false); System.out.println("\n[INFO] Cosine similarities are saved in file: " + similarityFile.getPath()); FileUtils.writeLines(similarityFile, simLines, false); }
From source file:edu.ehu.galan.lite.algorithms.ranked.supervised.tfidf.corpus.lucene.CorpusHighFreqTerms.java
License:Open Source License
/** * * Extracts the top n most frequent terms (by document frequency ) from an existing Lucene index * (the dir must be specified via args or via tfidfTester on * /resources/lite/configs/general.conf) in this case the Wikipedia corpus) and reports their * document frequency./*ww w . j ava 2s . c o m*/ * * @param args * @throws java.lang.Exception */ public static void main(String[] args) throws Exception { try { Properties prop = new Properties(); InputStream is = new FileInputStream("resources/lite/configs/general.conf"); FSDirectory dir; if (args.length == 1) { if (Paths.get(args[0]).toFile().isDirectory()) { dir = FSDirectory.open(new File(args[0])); } else { System.out.println("The specified directory does not exist\n" + " backing to load the lucene index specified in the config files"); dir = FSDirectory.open(new File(prop.getProperty("tfidfTester"))); } } else if (args.length > 1) { System.out.println("The args only need one parameter, the directory of the Lucene Index\n " + "backing to load the lucene index specified in the config files"); dir = FSDirectory.open(new File(prop.getProperty("tfidfTester"))); } else { dir = FSDirectory.open(new File(prop.getProperty("tfidfTester"))); } IndexReader reader = null; String field = null; boolean IncludeTermFreqs = false; prop.load(is); IncludeTermFreqs = true; reader = DirectoryReader.open(dir); System.out.println("num Docs " + reader.numDocs()); TermStats[] terms = getHighFreqTerms(reader, numTerms, field); if (!IncludeTermFreqs) { //default HighFreqTerms behavior for (int i = 0; i < terms.length; i++) { System.out.printf("%s:%s %,d \n", terms[i].field, terms[i].termtext.utf8ToString(), terms[i].docFreq); } } else { TermStats[] termsWithTF = sortByTotalTermFreq(reader, terms); for (int i = 0; i < termsWithTF.length; i++) { System.out.printf("%s:%s \t totalTF = %,d \t doc freq = %,d \n", termsWithTF[i].field, termsWithTF[i].termtext.utf8ToString(), termsWithTF[i].totalTermFreq, termsWithTF[i].docFreq); } } reader.close(); } catch (Exception ex) { logger.error("The directory specified contains a Lucene index?", ex); } }
From source file:edu.harvard.iq.dvn.core.index.Indexer.java
License:Apache License
public void deleteVersionDocuments(long studyId) { try {/* w w w .j a v a 2 s . c o m*/ IndexReader reader = IndexReader.open(dir, false); reader.deleteDocuments(new Term("versionStudyId", Long.toString(studyId))); reader.close(); } catch (IOException ex) { ex.printStackTrace(); } }
From source file:edu.harvard.iq.dvn.core.index.Indexer.java
License:Apache License
protected void updateStudyDocument(long studyId, String field, String value) throws IOException { IndexReader reader = IndexReader.open(dir, false); try {//from w ww . jav a2s . c om if (reader != null) { TermDocs matchingDocuments = reader.termDocs(); if (matchingDocuments != null) { int c = 1; if (matchingDocuments.next()) { // We only expect 1 document when searching by study id. Document studyDocument = reader.document(matchingDocuments.doc()); logger.fine("processing matching document number " + c++); if (studyDocument != null) { logger.fine("got a non-zero doc;"); reader.close(); reader = null; logger.fine("deleted the document;"); //updateDocument(studyDocument, studyId); IndexWriter localWriter = new IndexWriter(dir, getAnalyzer(), isIndexEmpty(), IndexWriter.MaxFieldLength.UNLIMITED); localWriter.updateDocument(new Term("id", Long.toString(studyId)), studyDocument); localWriter.commit(); localWriter.close(); logger.fine("wrote the updated version of the document;"); } } } } } catch (IOException ex) { ex.printStackTrace(); } finally { if (reader != null) { reader.close(); } } }
From source file:edu.mit.ll.vizlinc.highlight.WeightedSpanTermExtractor.java
License:Apache License
private void closeReaders() { Collection<IndexReader> readerSet = readers.values(); for (final IndexReader reader : readerSet) { try {/*ww w. j a v a 2 s. c om*/ reader.close(); } catch (IOException e) { // alert? } } }
From source file:edu.siena.cs.sawus.search.Searcher.java
License:Apache License
/** Simple command-line based search demo. */ public static void search(String index) throws Exception { String field = "contents"; String queries = null;// w ww . ja v a2 s .co m int repeat = 0; boolean raw = false; String queryString = null; int hitsPerPage = 10; IndexReader reader = IndexReader.open(FSDirectory.open(new File(index))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40); BufferedReader in = null; if (queries != null) { in = new BufferedReader(new InputStreamReader(new FileInputStream(queries), "UTF-8")); } else { in = new BufferedReader(new InputStreamReader(System.in, "UTF-8")); } QueryParser parser = new QueryParser(Version.LUCENE_40, field, analyzer); while (true) { if (queries == null && queryString == null) { // prompt the user System.out.println("Enter query: "); } String line = queryString != null ? queryString : in.readLine(); if (line == null || line.length() == -1) { break; } line = line.trim(); if (line.length() == 0) { break; } Query query = parser.parse(line); System.out.println("Searching for: " + query.toString(field)); if (repeat > 0) { // repeat & time as benchmark Date start = new Date(); for (int i = 0; i < repeat; i++) { searcher.search(query, null, 100); } Date end = new Date(); System.out.println("Time: " + (end.getTime() - start.getTime()) + "ms"); } doPagingSearch(in, searcher, query, hitsPerPage, raw, queries == null && queryString == null); if (queryString != null) { break; } } //searcher.close(); reader.close(); }
From source file:edu.stanford.muse.index.Indexer.java
License:Apache License
private synchronized Directory copyDirectoryExcludeFields(Directory dir, String out_basedir, String out_name, String... fields_to_be_removed) throws IOException { IndexReader reader = DirectoryReader.open(dir); // IndexReader.open(dir, true); // read-only=true Directory newDir = createDirectory(out_basedir, out_name); IndexWriter writer = openIndexWriter(newDir); //log.info("Removing field(s) " + Util.join(fields_to_be_removed, ", ") + " from index."); for (int i = 0; i < reader.numDocs(); i++) { org.apache.lucene.document.Document doc = reader.document(i); for (String field : fields_to_be_removed) doc.removeFields(field);//from w w w .j av a 2 s . c o m writer.addDocument(doc); } writer.close(); reader.close(); return newDir; }
From source file:edu.stanford.muse.index.Indexer.java
License:Apache License
private synchronized Directory copyDirectoryWithDocFilter(Directory dir, String out_basedir, String out_name, FilterFunctor filter_func) throws IOException { long startTime = System.currentTimeMillis(); IndexReader reader = DirectoryReader.open(dir); // IndexReader.open(dir, true); // read-only=true Directory newDir = createDirectory(out_basedir, out_name); IndexWriter writer = openIndexWriter(newDir); //log.info("Removing field(s) " + Util.join(fields_to_be_removed, ", ") + " from index."); int count = 0; for (int i = 0; i < reader.numDocs(); i++) { org.apache.lucene.document.Document doc = reader.document(i); if (filter_func == null || filter_func.filter(doc)) { writer.addDocument(doc);//w ww . ja va 2 s. c om count++; } } writer.close(); reader.close(); log.info("CopyDirectoryWithtDocFilter to dir:" + out_basedir + " name: " + baseDir + " time: " + (System.currentTimeMillis() - startTime) + " ms docs: " + count); return newDir; }