List of usage examples for org.apache.lucene.index IndexReader document
public final Document document(int docID) throws IOException
n
th Document
in this index. From source file:com.helger.pd.indexer.lucene.PDLucene.java
License:Apache License
/** * Get the Lucene {@link Document} matching the specified ID * * @param nDocID//from w ww . j a v a 2 s . c o m * Document ID * @return <code>null</code> if no reader could be obtained or no such * document exists. * @throws IOException * On IO error */ @Nullable public Document getDocument(final int nDocID) throws IOException { _checkClosing(); if (s_aLogger.isDebugEnabled()) s_aLogger.debug("getDocument(" + nDocID + ")"); final IndexReader aReader = _getReader(); if (aReader == null) return null; return aReader.document(nDocID); }
From source file:com.ikon.servlet.admin.ListIndexesServlet.java
License:Open Source License
/** * List Lucene indexes//from w w w .j a v a2 s.co m */ @SuppressWarnings("unchecked") private void showLuceneDocument(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { boolean showTerms = WebUtils.getBoolean(request, "showTerms"); int id = WebUtils.getInt(request, "id", 0); FullTextSession ftSession = null; ReaderProvider rProv = null; Session session = null; IndexReader idx = null; List<Map<String, String>> fields = new ArrayList<Map<String, String>>(); try { session = HibernateUtil.getSessionFactory().openSession(); ftSession = Search.getFullTextSession(session); SearchFactory sFactory = ftSession.getSearchFactory(); rProv = sFactory.getReaderProvider(); DirectoryProvider<Directory>[] dirProv = sFactory.getDirectoryProviders(NodeDocument.class); idx = rProv.openReader(dirProv[0]); // Print Lucene documents if (!idx.isDeleted(id)) { Document doc = idx.document(id); String hibClass = null; for (Fieldable fld : doc.getFields()) { Map<String, String> field = new HashMap<String, String>(); field.put("name", fld.name()); field.put("value", fld.stringValue()); fields.add(field); if (fld.name().equals("_hibernate_class")) { hibClass = fld.stringValue(); } } /** * 1) Get all the terms using indexReader.terms() * 2) Process the term only if it belongs to the target field. * 3) Get all the docs using indexReader.termDocs(term); * 4) So, we have the term-doc pairs at this point. */ if (showTerms && NodeDocument.class.getCanonicalName().equals(hibClass)) { List<String> terms = new ArrayList<String>(); for (TermEnum te = idx.terms(); te.next();) { Term t = te.term(); if ("text".equals(t.field())) { for (TermDocs tds = idx.termDocs(t); tds.next();) { if (id == tds.doc()) { terms.add(t.text()); } } } } Map<String, String> field = new HashMap<String, String>(); field.put("name", "terms"); field.put("value", terms.toString()); fields.add(field); } } ServletContext sc = getServletContext(); sc.setAttribute("fields", fields); sc.setAttribute("id", id); sc.setAttribute("max", idx.maxDoc() - 1); sc.setAttribute("prev", id > 0); sc.setAttribute("next", id < idx.maxDoc() - 1); sc.setAttribute("showTerms", showTerms); sc.getRequestDispatcher("/admin/list_indexes.jsp").forward(request, response); } finally { if (rProv != null && idx != null) { rProv.closeReader(idx); } HibernateUtil.close(ftSession); HibernateUtil.close(session); } }
From source file:com.jamespot.glifpix.index.ResourceDocument.java
License:Open Source License
public static String getLiteral(IndexReader ir, String token) throws IOException { TermEnum te = ir.terms(new Term("token", token)); if (te.term().field().equalsIgnoreCase("token")) { TermDocs td = ir.termDocs(te.term()); if (td.next()) { int idDoc = td.doc(); Document doc = ir.document(idDoc); if (doc.get("token").equals(token)) { return doc.get("literal"); }//from w ww.j a va 2 s. c o m } } return null; }
From source file:com.jamespot.glifpix.index.StatsDocument.java
License:Open Source License
protected static StatsDocument read(IndexReader ir) throws IOException { TermEnum te = ir.terms(new Term("nbElements")); if (te.term().field().equalsIgnoreCase("nbElements")) { TermDocs td = ir.termDocs(te.term()); if (td.next()) { int idDoc = td.doc(); return new StatsDocument(ir.document(idDoc)); }/* ww w . ja va 2s.c om*/ throw new IOException("No readable StatsDocument"); } throw new IOException("No StatsDocument found"); }
From source file:com.leavesfly.lia.advsearching.BooksLikeThis.java
License:Apache License
public static void main(String[] args) throws IOException { Directory dir = TestUtil.getBookIndexDirectory(); IndexReader reader = IndexReader.open(dir); int numDocs = reader.maxDoc(); BooksLikeThis blt = new BooksLikeThis(reader); for (int i = 0; i < numDocs; i++) { // #1 System.out.println();/*from www. j a va 2s. c o m*/ Document doc = reader.document(i); System.out.println(doc.get("title")); Document[] docs = blt.docsLike(i, 10); // #2 if (docs.length == 0) { System.out.println(" None like this"); } for (Document likeThisDoc : docs) { System.out.println(" -> " + likeThisDoc.get("title")); } } reader.close(); dir.close(); }
From source file:com.leavesfly.lia.advsearching.CategorizerTest.java
License:Apache License
private void buildCategoryVectors() throws IOException { IndexReader reader = IndexReader.open(TestUtil.getBookIndexDirectory()); int maxDoc = reader.maxDoc(); for (int i = 0; i < maxDoc; i++) { if (!reader.isDeleted(i)) { Document doc = reader.document(i); String category = doc.get("category"); Map vectorMap = (Map) categoryMap.get(category); if (vectorMap == null) { vectorMap = new TreeMap(); categoryMap.put(category, vectorMap); }/*from w w w .j a v a2 s . co m*/ TermFreqVector termFreqVector = reader.getTermFreqVector(i, "subject"); addTermFreqToMap(vectorMap, termFreqVector); } } }
From source file:com.leavesfly.lia.advsearching.FunctionQueryTest.java
License:Apache License
public void testRecency() throws Throwable { Directory dir = TestUtil.getBookIndexDirectory(); IndexReader r = IndexReader.open(dir); IndexSearcher s = new IndexSearcher(r); s.setDefaultFieldSortScoring(true, true); QueryParser parser = new QueryParser(Version.LUCENE_30, "contents", new StandardAnalyzer(Version.LUCENE_30)); Query q = parser.parse("java in action"); // #A Query q2 = new RecencyBoostingQuery(q, // #B 2.0, 2 * 365, "pubmonthAsDay"); Sort sort = new Sort(new SortField[] { SortField.FIELD_SCORE, new SortField("title2", SortField.STRING) }); TopDocs hits = s.search(q2, null, 5, sort); for (int i = 0; i < hits.scoreDocs.length; i++) { Document doc = r.document(hits.scoreDocs[i].doc); System.out.println((1 + i) + ": " + doc.get("title") + ": pubmonth=" + doc.get("pubmonth") + " score=" + hits.scoreDocs[i].score); }/*from w w w .java 2 s.c o m*/ s.close(); r.close(); dir.close(); }
From source file:com.leavesfly.lia.tool.BooksMoreLikeThis.java
License:Apache License
public static void main(String[] args) throws Throwable { String indexDir = System.getProperty("index.dir"); FSDirectory directory = FSDirectory.open(new File(indexDir)); IndexReader reader = IndexReader.open(directory); IndexSearcher searcher = new IndexSearcher(reader); int numDocs = reader.maxDoc(); MoreLikeThis mlt = new MoreLikeThis(reader); // #A mlt.setFieldNames(new String[] { "title", "author" }); mlt.setMinTermFreq(1); // #B mlt.setMinDocFreq(1);/*from www . ja v a 2 s . c o m*/ for (int docID = 0; docID < numDocs; docID++) { // #C System.out.println(); Document doc = reader.document(docID); System.out.println(doc.get("title")); Query query = mlt.like(docID); // #D System.out.println(" query=" + query); TopDocs similarDocs = searcher.search(query, 10); if (similarDocs.totalHits == 0) System.out.println(" None like this"); for (int i = 0; i < similarDocs.scoreDocs.length; i++) { if (similarDocs.scoreDocs[i].doc != docID) { // #E doc = reader.document(similarDocs.scoreDocs[i].doc); System.out.println(" -> " + doc.getField("title").stringValue()); } } } searcher.close(); reader.close(); directory.close(); }
From source file:com.main.Searcher.java
public List<Bean> searching(String s1, String s2, String radioBtn) throws IOException, ParseException, InvalidTokenOffsetsException { //getting reference of directory Directory dir = FSDirectory.open(Paths.get(Index_Dir)); //Index reader - an interface for accessing a point-in-time view of a lucene index IndexReader reader = DirectoryReader.open(dir); IndexSearcher searcher = new IndexSearcher(reader); //analyzer with the default stop words, takes out the stop words Analyzer analyzer = new StandardAnalyzer(); String contents = "contents"; QueryParser parser = new QueryParser(contents, analyzer); int numOfDoc = reader.numDocs(); for (int i = 0; i < numOfDoc; i++) { Document d = reader.document(i); }/* w w w .j a v a2 s . c om*/ Query q1 = parser.parse(s1); Query q2 = parser.parse(s2); //conjuction, disjunction and negation BooleanQuery.Builder bq = new BooleanQuery.Builder(); //occur.must : both queries required in a doc if (radioBtn.equals("conjunction")) { bq.add(q1, BooleanClause.Occur.MUST); bq.add(q2, BooleanClause.Occur.MUST); bq.build(); } //occur.should: one of the q1 should be presen t in doc else if (radioBtn.equals("disjunction")) { bq.add(q1, BooleanClause.Occur.SHOULD); bq.add(q2, BooleanClause.Occur.SHOULD); bq.build(); } //negation: first should present , second should not else { bq.add(q1, BooleanClause.Occur.MUST); bq.add(q2, BooleanClause.Occur.MUST_NOT); bq.build(); } TopDocs hits = searcher.search(bq.build(), 10); Formatter formatter = new SimpleHTMLFormatter(); QueryScorer scorer = new QueryScorer(bq.build()); //used to markup highlighted terms found in the best sections of a cont Highlighter highlighter = new Highlighter(formatter, scorer); //It breaks cont up into same-size texts but does not split up spans Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, 10); //breaks cont up into same-size fragments with no concerns over spotting sentence boundaries. //set fragmenter to highlighter highlighter.setTextFragmenter(fragmenter); for (int i = 0; i < hits.scoreDocs.length; i++) { Bean bean = new Bean(); int outResult = hits.scoreDocs.length; bean.setNumFile(outResult); int docid = hits.scoreDocs[i].doc; double rank = hits.scoreDocs[i].score; bean.setRankSc(rank); Document doc = searcher.doc(docid); String name = doc.get("name"); String title = doc.get("title"); bean.setTitle(name); String path = doc.get("path"); bean.setPath(path); String cont = doc.get("contents"); //Create token stream TokenStream stream = TokenSources.getAnyTokenStream(reader, docid, "contents", analyzer); //Get highlighted cont fragments String[] frags = highlighter.getBestFragments(stream, cont, 10); ArrayList<String> dummy = new ArrayList<>(); for (String frag : frags) { dummy.add(frag); } bean.setContent(dummy); beanList.add(bean); } dir.close(); // } return beanList; }
From source file:com.main.Searcher.java
public List<Bean> searching(String s1) throws IOException, ParseException, InvalidTokenOffsetsException { //Get directory reference Directory dir = FSDirectory.open(Paths.get(Index_Dir)); //Index reader - an interface for accessing a point-in-time view of a lucene index IndexReader reader = DirectoryReader.open(dir); //CreateIndexReader reader = DirectoryReader.open(dir); lucene searcher. It search over a single IndexReader. IndexSearcher searcher = new IndexSearcher(reader); //analyzer with the default stop words Analyzer analyzer = new StandardAnalyzer(); //Query parser to be used for creating TermQuery String queries = null;/*from ww w .j a v a 2 s . co m*/ String queryString = null; //regular search String contents = "contents"; BufferedReader in = null; if (queries != null) { in = Files.newBufferedReader(Paths.get(queries), StandardCharsets.UTF_8); } else { in = new BufferedReader(new InputStreamReader(System.in, StandardCharsets.UTF_8)); } QueryParser parser = new QueryParser(contents, analyzer); int numOfDoc = reader.numDocs(); for (int i = 0; i < numOfDoc; i++) { Document d = reader.document(i); } Query q1 = parser.parse(s1); BooleanQuery.Builder bq = new BooleanQuery.Builder(); bq.add(q1, BooleanClause.Occur.MUST); //Search the lucene documents TopDocs hits = searcher.search(bq.build(), 10); // TopScoreDocCollector collector = TopScoreDocCollector.create(5); /** * Highlighter Code Start *** */ //Uses HTML <B></B> tag to highlight the searched terms Formatter formatter = new SimpleHTMLFormatter(); //It scores cont fragments by the number of unique q1 terms found //Basically the matching score in layman terms QueryScorer scorer = new QueryScorer(bq.build()); //used to markup highlighted terms found in the best sections of a cont Highlighter highlighter = new Highlighter(formatter, scorer); //It breaks cont up into same-size texts but does not split up spans Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, 10); //breaks cont up into same-size fragments with no concerns over spotting sentence boundaries. //set fragmenter to highlighter highlighter.setTextFragmenter(fragmenter); //Iterate over found results for (int i = 0; i < hits.scoreDocs.length; i++) { Bean bean = new Bean(); //int rank = hits.scoreDocs.length; int outResult = hits.scoreDocs.length; bean.setNumFile(outResult); int docid = hits.scoreDocs[i].doc; double rank = hits.scoreDocs[i].score; bean.setRankSc(rank); Document doc = searcher.doc(docid); // String title = doc.get("title"); String name = doc.get("name"); String title = doc.get("title"); bean.setTitle(name); String path = doc.get("path"); bean.setPath(path); String cont = doc.get("contents"); //Create token stream TokenStream stream = TokenSources.getAnyTokenStream(reader, docid, "contents", analyzer); //Get highlighted cont fragments String[] frags = highlighter.getBestFragments(stream, cont, 10); ArrayList<String> dummy = new ArrayList<>(); for (String frag : frags) { dummy.add(frag); } bean.setContent(dummy); beanList.add(bean); } dir.close(); // } return beanList; }