List of usage examples for org.apache.lucene.index IndexReader close
@Override public final synchronized void close() throws IOException
From source file:index.reader.LuceneQuery.java
License:Apache License
/** * Simple command-line based search demo. *//*from w w w .ja va 2s .c om*/ public static void main(String[] args) throws Exception { String usage = "Usage:\tindex.reader.LuceneQuery " + "[-index dir] [-field f] [-repeat n] [-queries file] [-query string] [-raw] [-paging hitsPerPage]" + "\n\nSee http://lucene.apache.org/core/4_1_0/demo/ for details."; if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) { System.out.println(usage); System.exit(0); } String index = "index"; String field = "contents"; String queries = null; int repeat = 0; boolean raw = false; String queryString = null; int hitsPerPage = 10; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { index = args[i + 1]; i++; } else if ("-field".equals(args[i])) { field = args[i + 1]; i++; } else if ("-queries".equals(args[i])) { queries = args[i + 1]; i++; } else if ("-query".equals(args[i])) { queryString = args[i + 1]; i++; } else if ("-repeat".equals(args[i])) { repeat = Integer.parseInt(args[i + 1]); i++; } else if ("-raw".equals(args[i])) { raw = true; } else if ("-paging".equals(args[i])) { hitsPerPage = Integer.parseInt(args[i + 1]); if (hitsPerPage <= 0) { System.err.println("There must be at least 1 hit per page."); System.exit(1); } i++; } } IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(index))); IndexSearcher searcher = new IndexSearcher(reader); // :Post-Release-Update-Version.LUCENE_XY: Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40); BufferedReader in = null; if (queries != null) { in = new BufferedReader(new InputStreamReader(new FileInputStream(queries), StandardCharsets.UTF_8)); } else { in = new BufferedReader(new InputStreamReader(System.in, StandardCharsets.UTF_8)); } // :Post-Release-Update-Version.LUCENE_XY: QueryParser parser = new QueryParser(Version.LUCENE_40, field, analyzer); while (true) { if (queries == null && queryString == null) { // prompt the user System.out.println("Enter query: "); } String line = queryString != null ? queryString : in.readLine(); if (line == null || line.length() == -1) { break; } line = line.trim(); if (line.length() == 0) { break; } Query query = parser.parse(line); System.out.println("Searching for: " + query.toString(field)); if (repeat > 0) { // repeat & time as benchmark Date start = new Date(); for (int i = 0; i < repeat; i++) { searcher.search(query, null, 100); } Date end = new Date(); System.out.println("Time: " + (end.getTime() - start.getTime()) + "ms"); } doPagingSearch(in, searcher, query, hitsPerPage, raw, queries == null && queryString == null); if (queryString != null) { break; } } reader.close(); }
From source file:indexer.IndexHtmlToText.java
static String getHTMLFromDocId(String indexDirPath, String docId) throws Exception { IndexReader reader; IndexSearcher searcher;/*from w ww . j a v a 2 s.c o m*/ File indexDir = new File(indexDirPath); reader = DirectoryReader.open(FSDirectory.open(indexDir)); searcher = new IndexSearcher(reader); TopScoreDocCollector collector; TopDocs topDocs; Query query = new TermQuery(new Term(TrecDocIndexer.FIELD_ID, docId)); collector = TopScoreDocCollector.create(1, true); searcher.search(query, collector); topDocs = collector.topDocs(); ScoreDoc sd = topDocs.scoreDocs[0]; Document doc = reader.document(sd.doc); String htmlDecompressed = decompress(doc.getBinaryValue(WTDOC_FIELD_HTML).bytes); System.out.println(htmlDecompressed); reader.close(); return htmlDecompressed; }
From source file:indexer.IndexSplitter.java
public void split() throws Exception { IndexReader reader = DirectoryReader.open(FSDirectory.open(indexDir)); final int numDocs = reader.numDocs(); IndexWriter pWriter; // pointer variable for (int i = 0; i < numDocs; i++) { Document d = reader.document(i); pWriter = d.get(FIELD_CODEMIXED).equals("1") ? mixedIndexWriter : pureIndexWriter; pWriter.addDocument(d);/*from w w w .j ava 2s . c o m*/ } reader.close(); pureIndexWriter.close(); mixedIndexWriter.close(); }
From source file:indexer.OptimizedRealValuedVecIndexer.java
void processAllTermWise() throws Exception { Cell cell, requantizedCell;//from w w w . j a va 2s .c o m copyIndex(); IndexReader currentReader = DirectoryReader.open(writer, true); List<Cell> splitCells = new ArrayList<>(); Fields fields = MultiFields.getFields(reader); Terms terms = fields.terms(DocVector.FIELD_CELL_ID); TermsEnum te = terms.iterator(); // Iterate through every term (a cell docName) and requantize the // points within the cell if required. while (te.next() != null) { String cellId = te.term().utf8ToString(); cell = new Cell(cellId); if (cell.toSplit(reader)) { splitCells.add(cell); List<DocVector> containedPoints = cell.getVectors(currentReader, terms, numDimensions); for (DocVector p : containedPoints) { requantizedCell = cell.quantize(p); // this function returns a new object p.quantize(requantizedCell); // update quantization info (cell docName) Document doc = p.constructDoc(); Term t = new Term(DocVector.FIELD_ID, p.docName); writer.deleteDocuments(t); writer.addDocument(doc); } //Much faster if we don't commit here... //writer.commit(); } } saveSplitCells(writer, splitCells); currentReader.close(); reader.close(); writer.close(); }
From source file:info.johtani.jjug.lucene.sample.SearcherSample.java
License:Apache License
public static void main(String[] args) { String indexDirectory = "./indexdir"; //String keyword = ""; String keyword = "johtani"; IndexReader reader = null; try {/*from w w w. jav a2s . com*/ //?? Directory dir = FSDirectory.open(new File(indexDirectory)); //IndexReader?? reader = DirectoryReader.open(dir); //IndexSearcher?? IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer; //Standard analyzer = new StandardAnalyzer(); // //analyzer = new JapaneseAnalyzer(); //?? QueryParser parser = new QueryParser("content", analyzer); //???? Query query = parser.parse(keyword); //?????1????? TopDocs hits = searcher.search(query, 10); //???? System.out.println("Found " + hits.totalHits + " document(s)"); //???? for (ScoreDoc scoreDoc : hits.scoreDocs) { System.out.println("--- " + scoreDoc.doc + " ---"); //ID?? Document doc = searcher.doc(scoreDoc.doc); //??? System.out.println(doc.get("content")); System.out.println("---------"); } } catch (IOException | ParseException e) { e.printStackTrace(); } finally { try { if (reader != null) { reader.close(); } } catch (IOException e) { // ignore } } }
From source file:InformationRetrieval.Search.java
License:Apache License
/** Simple command-line based search demo. * @throws ParseException *//*w ww. j a va2 s. c o m*/ public static ArrayList<Store> search(String stringQuery) throws IOException, Exception { ArrayList<Store> list = null; // String usage = // "Usage:\tjava org.apache.lucene.demo.SearchFiles [-index dir] [-field f] [-repeat n] [-queries file] [-query string] [-raw] [-paging hitsPerPage]\n\nSee http://lucene.apache.org/core/4_1_0/demo/ for details."; // if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) { // System.out.println(usage); // System.exit(0); // } String index = "index"; String field = "contents"; String queries = null; int repeat = 0; boolean raw = false; String queryString = null; int hitsPerPage = 50; // for(int i = 0;i < args.length;i++) { // if ("-index".equals(args[i])) { // index = args[i+1]; // i++; // } else if ("-field".equals(args[i])) { // field = args[i+1]; // i++; // } else if ("-queries".equals(args[i])) { // queries = args[i+1]; // i++; // } else if ("-query".equals(args[i])) { // queryString = args[i+1]; // i++; // } else if ("-repeat".equals(args[i])) { // repeat = Integer.parseInt(args[i+1]); // i++; // } else if ("-raw".equals(args[i])) { // raw = true; // } else if ("-paging".equals(args[i])) { // hitsPerPage = Integer.parseInt(args[i+1]); // if (hitsPerPage <= 0) { // System.err.println("There must be at least 1 hit per page."); // System.exit(1); // } // i++; // } // } IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(index))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_41); BufferedReader in = null; if (queries != null) { in = new BufferedReader(new InputStreamReader(new FileInputStream(queries), "UTF-8")); } else { in = new BufferedReader(new InputStreamReader(System.in, "UTF-8")); } QueryParser parser = new QueryParser(Version.LUCENE_41, field, analyzer); // while (true) { // if (queries == null && queryString == null) { // prompt the user // System.out.println("Enter query: "); // } String line = queryString != null ? queryString : stringQuery; if (line == null || line.length() == -1) { return null; } line = line.trim(); if (line.length() == 0) { return null; } Query query = parser.parse(line); System.out.println("Searching for: " + query.toString(field)); if (repeat > 0) { // repeat & time as benchmark Date start = new Date(); for (int i = 0; i < repeat; i++) { searcher.search(query, null, 100); } Date end = new Date(); System.out.println("Time: " + (end.getTime() - start.getTime()) + "ms"); } list = doPagingSearch(in, searcher, query, hitsPerPage, raw, queries == null && queryString == null); if (queryString != null) { return null; } reader.close(); return list; }
From source file:insa.luyten.SearchCISI.java
License:Apache License
private void search() throws Exception { StandardAnalyzer standardAnalyzer = new StandardAnalyzer(Version.LUCENE_47); EnglishAnalyzer englishAnalyzer = new EnglishAnalyzer(Version.LUCENE_47); String[] fields = { "content", "title", "authors", "references" }; Analyzer[] analysers = { englishAnalyzer, englishAnalyzer, standardAnalyzer, englishAnalyzer }; String index = "index"; String field = fields[0];/* w ww.j a v a 2s.c om*/ String queries = null; int repeat = 0; boolean raw = false; String queryString = null; int hitsPerPage = 10; IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(index))); IndexSearcher searcher = new IndexSearcher(reader); // :Post-Release-Update-Version.LUCENE_XY: Analyzer analyzer = analysers[0]; BufferedReader in = new BufferedReader(new InputStreamReader(System.in, "UTF-8")); // :Post-Release-Update-Version.LUCENE_XY: while (true) { System.out.println("\nChoose query field :"); for (int i = 0; i < fields.length; i++) { System.out.println(i + 1 + ". " + fields[i]); } int n = Integer.parseInt(in.readLine()) - 1; field = fields[n]; analyzer = analysers[n]; QueryParser parser = new QueryParser(Version.LUCENE_47, field, analyzer); if (queries == null && queryString == null) { // prompt the user System.out.println("Enter query: "); } String line = queryString != null ? queryString : in.readLine(); if (line == null || line.length() == -1) { break; } line = line.trim(); if (line.length() == 0) { break; } Query query = parser.parse(line); System.out.println("Searching for: " + query.toString(field)); if (repeat > 0) { // repeat & time as benchmark Date start = new Date(); for (int i = 0; i < repeat; i++) { searcher.search(query, null, 100); } Date end = new Date(); System.out.println("Time: " + (end.getTime() - start.getTime()) + "ms"); } //catch NewQueryException queryString = null; //by default int nq = 0; try { this.doPagingSearch(in, searcher, query, field, hitsPerPage, raw, queries == null && queryString == null); } catch (NewQueryException e) { String newQueryString = e.getMessage(); System.out.println("Is \"" + newQueryString + "\" a new query ?"); do { System.out.println(" Enter (y)es or (n)o."); String l = in.readLine(); if (l.length() == 1 && l.charAt(0) == 'y') nq = 1; else if (l.length() == 1 && l.charAt(0) == 'n') nq = -1; } while (nq == 0); if (nq == 1) { queryString = newQueryString; } } if (queryString != null && nq == 1) // it's okay ; else if (queryString != null) break; } reader.close(); }
From source file:integration.ForumTermCollocationTest.java
License:Apache License
private void extractCollocations() throws IOException, CorruptIndexException { Directory dir = FSDirectory.open(new File(indexDir)); IndexReader reader = DirectoryReader.open(dir); CollocationExtractor collocationExtractor = new CollocationExtractor(reader); CollocationIndexer collocationIndexer = new CollocationIndexer(collocsDir, new StandardAnalyzer(Version.LUCENE_48)); collocationExtractor.extract(collocationIndexer); collocationIndexer.close();/*from w w w. ja v a 2 s . com*/ reader.close(); }
From source file:invertedindex.LineNumberSearcher.java
public ArrayList<String> search(String keyword, String filePath) throws IOException { String indexLocation = getLineIndexLocation(); // System.out.println("Inside LINE search method"); try {//from ww w. j av a 2 s . com IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexLocation))); IndexSearcher searcher = new IndexSearcher(reader); TopScoreDocCollector collector = TopScoreDocCollector.create(topDocs, true); String queryContent = keyword; queryContent = "\"" + queryContent + "\""; // queryContent = "*" + queryContent + "*"; String queryFilePath = filePath; // System.out.println("FIELPATH "+queryFilePath); queryFilePath = "\"" + queryFilePath + "\""; // queryFilePath = "*" + queryFilePath + "*"; QueryParser queryParserContent = new QueryParser(Version.LUCENE_47, "contents", analyzer); QueryParser queryParserFilePath = new QueryParser(Version.LUCENE_47, "path", analyzer); queryParserContent.setAllowLeadingWildcard(true); //queryParserFileName.setAllowLeadingWildcard(true); // Query q = queryParser.parse(query); Query qContent = queryParserContent.parse(queryContent); Query qFileName = queryParserFilePath.parse(queryFilePath); // System.out.println("FIELPATH "+qFileName); BooleanQuery q = new BooleanQuery(); q.add(qContent, Occur.MUST); // MUST implies that the keyword must occur. q.add(qFileName, Occur.MUST); searcher.search(q, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; // System.out.println("Found " + hits.length + " hits."); lineNumbersList = new ArrayList<>(); for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; Document d = searcher.doc(docId); // System.out.println((i + 1) + ". " + d.get("filename") // + " score=" + hits[i].score); // System.out.println("Line Number is "+d.get("lineNumber")); // System.out.println("Content is "+d.get("contents")); // String filePath = d.get("path"); lineNumbersList.add(d.get("lineNumber")); } reader.close(); return lineNumbersList; } catch (Exception e) { System.out.println("Error searching in line number search " + indexLocation + " : " + e.getMessage()); } return lineNumbersList; }
From source file:invertedindex.SearchIndex.java
public ArrayList<SearchResults> search(String keyword) throws IOException { String indexLocation = this.getIndexLocation(); // System.out.println("Inside search method"); // indexLocation = ""; // BufferedReader br = new BufferedReader(new InputStreamReader(System.in)); // while (true) { try {// w w w . j a v a2 s.co m IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexLocation))); IndexSearcher searcher = new IndexSearcher(reader); TopScoreDocCollector collector = TopScoreDocCollector.create(topDocs, true); String query = keyword; query = "\"" + query + "\""; Query q = new QueryParser(Version.LUCENE_47, "contents", analyzer).parse(query); SimpleFragListBuilder fragListBuilder = new SimpleFragListBuilder(); ScoreOrderFragmentsBuilder fragBuilder = new ScoreOrderFragmentsBuilder(); FastVectorHighlighter fvh = new FastVectorHighlighter(FastVectorHighlighter.DEFAULT_PHRASE_HIGHLIGHT, FastVectorHighlighter.DEFAULT_FIELD_MATCH, fragListBuilder, fragBuilder); fvh = new FastVectorHighlighter(FastVectorHighlighter.DEFAULT_PHRASE_HIGHLIGHT, FastVectorHighlighter.DEFAULT_FIELD_MATCH, fragListBuilder, fragBuilder); // System.out.println(q); // searcher.search(q, collector); // searcher.search(q, null,topDocs); ScoreDoc[] hits = collector.topDocs().scoreDocs; // 4. display results System.out.println("Found " + hits.length + " hits."); totalHits = hits.length; searchResulsAL = new ArrayList<>(); for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; FieldQuery fq = fvh.getFieldQuery(q); // System.out.println("fq "+fq); String[] fragments = fvh.getBestFragments(fq, searcher.getIndexReader(), docId, "contents", 50, 10); //String[] lineFragments = fvh.getBestFragments(fq, searcher.getIndexReader(), docId, "contents", 18,10); Document d = searcher.doc(docId); String filePath = d.get("path"); for (int j = 0; j < fragments.length; j++) { // System.out.println("FRAGMENT iS "+fragments[j]); // int k=0; // for(k=0;k<lineFragments.length;k++){ // fragments[j].getSc String temp = Jsoup.parse(fragments[j]).text(); // LineNumberSearcher lns = new LineNumberSearcher(); //lineNumbersList = new ArrayList<>(); lineNumberArrayList = new ArrayList<>(); lineNumber = "null"; boolean g = Pattern.compile("\\n").matcher(fragments[j]).find(); if (!g) { // System.out.println("NO G"); lineNumbersList = lns.search(temp, filePath); // for(String s : lineNumbersList){ // System.out.println("s is "+s); // } // if(lineNumbersList.get(0).isEmpty()){ // lineNumber = "Not Found"; // }else { if (!lineNumbersList.isEmpty()) { // System.out.println("in line number"); lineNumber = lineNumbersList.get(0); } // } } //here is the tried code for enter space /* else{ System.out.println("YES G"); String lines[] = fragments[j].split("\\r?\\n"); // ArrayList<String> newLines = new ArrayList<>(); ArrayList<String> newLines = new ArrayList<>(Arrays.asList(lines)); System.out.println("Here 3"); int special = 0; for(String line : newLines){ if(Pattern.compile("^$").matcher(line).find()){ newLines.remove(line); special++; } } System.out.println("Here 4"); // List<String> list = Arrays.asList(lines); // if(list.contains(temp)){ // // } // for(String line: newLines){ // System.out.println("LINE IS "+line); // } if(newLines.size()==1){ // System.out.println("Yes G but NOT G"); lineNumbersList = lns.search(temp,filePath); if(!lineNumberArrayList.isEmpty()){ lineNumber = lineNumbersList.get(0); } System.out.println("Here 1"); }else{ System.out.println("Here 2"); ArrayList<String> a0 = lns.search(Jsoup.parse(newLines.get(0)).text(),filePath); ArrayList<String> a1 = lns.search(Jsoup.parse(newLines.get(1)).text(),filePath); int k,l; outerloop: for(k=0;k<a0.size();k++){ for(l=0;l<a1.size();l++){ int secondline = Integer.parseInt(a1.get(l)); // System.out.println("second line is"+ secondline); int firstline = Integer.parseInt(a0.get(k)); // System.out.println("first line is"+firstline); int diff = secondline - firstline; // System.out.println("DIFFERENCE IS "+diff); // System.out.println("Special IS "+special); if(diff == special+1){ insideLoopFlag = true; // System.out.println("K IS "+k); // System.out.println("IN BREAK "); break outerloop; } } // System.out.println("K IS "+k); } // System.out.println("OUT OF FOR LOOP"); // System.out.println("K IS "+k); if(insideLoopFlag==true){ lineNumber = String.valueOf(a0.get(k)); } // System.out.println("LINE NUMBER IS "+lineNumber); } } */ // } fragments[j] = fragments[j].replaceAll("\\n", " "); // System.out.println("\t\t" + fragments[j] + "..."); fragments[j] = fragments[j] + "...."; if (!(lineNumber.equals("null"))) { // System.out.println("in line number"); fragments[j] = fragments[j] + " at Line " + lineNumber; } } //Setting Results SearchResults sr = new SearchResults(); sr.setFilename(d.get("filename")); sr.setScore(hits[i].score); sr.setFragments(fragments); sr.setPath(filePath); sr.setContentType(d.get("contentType")); // sr.setLineNumber(lineNumber); searchResulsAL.add(sr); // } // writer.close(); reader.close(); } catch (Exception e) { System.out.println("Error searching in search index " + e + " : " + e.getMessage()); // break; } // } return searchResulsAL; }