Example usage for org.apache.lucene.index IndexReader document

List of usage examples for org.apache.lucene.index IndexReader document

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader document.

Prototype




public final Document document(int docID) throws IOException 

Source Link

Document

Returns the stored fields of the nth Document in this index.

Usage

From source file:com.helger.pd.indexer.lucene.PDLucene.java

License:Apache License

/**
 * Get the Lucene {@link Document} matching the specified ID
 *
 * @param nDocID//from w  ww  .  j  a v  a 2  s . c  o  m
 *        Document ID
 * @return <code>null</code> if no reader could be obtained or no such
 *         document exists.
 * @throws IOException
 *         On IO error
 */
@Nullable
public Document getDocument(final int nDocID) throws IOException {
    _checkClosing();

    if (s_aLogger.isDebugEnabled())
        s_aLogger.debug("getDocument(" + nDocID + ")");

    final IndexReader aReader = _getReader();
    if (aReader == null)
        return null;
    return aReader.document(nDocID);
}

From source file:com.ikon.servlet.admin.ListIndexesServlet.java

License:Open Source License

/**
 * List Lucene indexes//from  w w w .j a  v a2  s.co  m
 */
@SuppressWarnings("unchecked")
private void showLuceneDocument(HttpServletRequest request, HttpServletResponse response)
        throws ServletException, IOException {
    boolean showTerms = WebUtils.getBoolean(request, "showTerms");
    int id = WebUtils.getInt(request, "id", 0);
    FullTextSession ftSession = null;
    ReaderProvider rProv = null;
    Session session = null;
    IndexReader idx = null;
    List<Map<String, String>> fields = new ArrayList<Map<String, String>>();

    try {
        session = HibernateUtil.getSessionFactory().openSession();
        ftSession = Search.getFullTextSession(session);
        SearchFactory sFactory = ftSession.getSearchFactory();
        rProv = sFactory.getReaderProvider();

        DirectoryProvider<Directory>[] dirProv = sFactory.getDirectoryProviders(NodeDocument.class);
        idx = rProv.openReader(dirProv[0]);

        // Print Lucene documents
        if (!idx.isDeleted(id)) {
            Document doc = idx.document(id);
            String hibClass = null;

            for (Fieldable fld : doc.getFields()) {
                Map<String, String> field = new HashMap<String, String>();
                field.put("name", fld.name());
                field.put("value", fld.stringValue());
                fields.add(field);

                if (fld.name().equals("_hibernate_class")) {
                    hibClass = fld.stringValue();
                }
            }

            /**
             * 1) Get all the terms using indexReader.terms()
             * 2) Process the term only if it belongs to the target field.
             * 3) Get all the docs using indexReader.termDocs(term);
             * 4) So, we have the term-doc pairs at this point.
             */
            if (showTerms && NodeDocument.class.getCanonicalName().equals(hibClass)) {
                List<String> terms = new ArrayList<String>();

                for (TermEnum te = idx.terms(); te.next();) {
                    Term t = te.term();

                    if ("text".equals(t.field())) {
                        for (TermDocs tds = idx.termDocs(t); tds.next();) {
                            if (id == tds.doc()) {
                                terms.add(t.text());
                            }
                        }
                    }
                }

                Map<String, String> field = new HashMap<String, String>();
                field.put("name", "terms");
                field.put("value", terms.toString());
                fields.add(field);
            }
        }

        ServletContext sc = getServletContext();
        sc.setAttribute("fields", fields);
        sc.setAttribute("id", id);
        sc.setAttribute("max", idx.maxDoc() - 1);
        sc.setAttribute("prev", id > 0);
        sc.setAttribute("next", id < idx.maxDoc() - 1);
        sc.setAttribute("showTerms", showTerms);
        sc.getRequestDispatcher("/admin/list_indexes.jsp").forward(request, response);
    } finally {
        if (rProv != null && idx != null) {
            rProv.closeReader(idx);
        }

        HibernateUtil.close(ftSession);
        HibernateUtil.close(session);
    }
}

From source file:com.jamespot.glifpix.index.ResourceDocument.java

License:Open Source License

public static String getLiteral(IndexReader ir, String token) throws IOException {
    TermEnum te = ir.terms(new Term("token", token));
    if (te.term().field().equalsIgnoreCase("token")) {
        TermDocs td = ir.termDocs(te.term());
        if (td.next()) {
            int idDoc = td.doc();
            Document doc = ir.document(idDoc);
            if (doc.get("token").equals(token)) {
                return doc.get("literal");
            }//from  w ww.j  a  va  2  s. c o  m
        }
    }
    return null;
}

From source file:com.jamespot.glifpix.index.StatsDocument.java

License:Open Source License

protected static StatsDocument read(IndexReader ir) throws IOException {
    TermEnum te = ir.terms(new Term("nbElements"));
    if (te.term().field().equalsIgnoreCase("nbElements")) {
        TermDocs td = ir.termDocs(te.term());
        if (td.next()) {
            int idDoc = td.doc();

            return new StatsDocument(ir.document(idDoc));
        }/*  ww w  .  ja  va  2s.c  om*/
        throw new IOException("No readable StatsDocument");
    }
    throw new IOException("No StatsDocument found");
}

From source file:com.leavesfly.lia.advsearching.BooksLikeThis.java

License:Apache License

public static void main(String[] args) throws IOException {
    Directory dir = TestUtil.getBookIndexDirectory();

    IndexReader reader = IndexReader.open(dir);
    int numDocs = reader.maxDoc();

    BooksLikeThis blt = new BooksLikeThis(reader);
    for (int i = 0; i < numDocs; i++) { // #1
        System.out.println();/*from www.  j a  va 2s. c o  m*/
        Document doc = reader.document(i);
        System.out.println(doc.get("title"));

        Document[] docs = blt.docsLike(i, 10); // #2
        if (docs.length == 0) {
            System.out.println("  None like this");
        }
        for (Document likeThisDoc : docs) {
            System.out.println("  -> " + likeThisDoc.get("title"));
        }
    }
    reader.close();
    dir.close();
}

From source file:com.leavesfly.lia.advsearching.CategorizerTest.java

License:Apache License

private void buildCategoryVectors() throws IOException {
    IndexReader reader = IndexReader.open(TestUtil.getBookIndexDirectory());

    int maxDoc = reader.maxDoc();

    for (int i = 0; i < maxDoc; i++) {
        if (!reader.isDeleted(i)) {
            Document doc = reader.document(i);
            String category = doc.get("category");

            Map vectorMap = (Map) categoryMap.get(category);
            if (vectorMap == null) {
                vectorMap = new TreeMap();
                categoryMap.put(category, vectorMap);
            }/*from w w  w  .j  a  v  a2  s . co m*/

            TermFreqVector termFreqVector = reader.getTermFreqVector(i, "subject");

            addTermFreqToMap(vectorMap, termFreqVector);
        }
    }
}

From source file:com.leavesfly.lia.advsearching.FunctionQueryTest.java

License:Apache License

public void testRecency() throws Throwable {
    Directory dir = TestUtil.getBookIndexDirectory();
    IndexReader r = IndexReader.open(dir);
    IndexSearcher s = new IndexSearcher(r);
    s.setDefaultFieldSortScoring(true, true);

    QueryParser parser = new QueryParser(Version.LUCENE_30, "contents",
            new StandardAnalyzer(Version.LUCENE_30));
    Query q = parser.parse("java in action"); // #A
    Query q2 = new RecencyBoostingQuery(q, // #B
            2.0, 2 * 365, "pubmonthAsDay");
    Sort sort = new Sort(new SortField[] { SortField.FIELD_SCORE, new SortField("title2", SortField.STRING) });
    TopDocs hits = s.search(q2, null, 5, sort);

    for (int i = 0; i < hits.scoreDocs.length; i++) {
        Document doc = r.document(hits.scoreDocs[i].doc);
        System.out.println((1 + i) + ": " + doc.get("title") + ": pubmonth=" + doc.get("pubmonth") + " score="
                + hits.scoreDocs[i].score);
    }/*from   w w  w .java  2  s.c o m*/
    s.close();
    r.close();
    dir.close();
}

From source file:com.leavesfly.lia.tool.BooksMoreLikeThis.java

License:Apache License

public static void main(String[] args) throws Throwable {

    String indexDir = System.getProperty("index.dir");
    FSDirectory directory = FSDirectory.open(new File(indexDir));
    IndexReader reader = IndexReader.open(directory);

    IndexSearcher searcher = new IndexSearcher(reader);

    int numDocs = reader.maxDoc();

    MoreLikeThis mlt = new MoreLikeThis(reader); // #A
    mlt.setFieldNames(new String[] { "title", "author" });
    mlt.setMinTermFreq(1); // #B
    mlt.setMinDocFreq(1);/*from www  . ja v  a 2 s  .  c  o  m*/

    for (int docID = 0; docID < numDocs; docID++) { // #C
        System.out.println();
        Document doc = reader.document(docID);
        System.out.println(doc.get("title"));

        Query query = mlt.like(docID); // #D
        System.out.println("  query=" + query);

        TopDocs similarDocs = searcher.search(query, 10);
        if (similarDocs.totalHits == 0)
            System.out.println("  None like this");
        for (int i = 0; i < similarDocs.scoreDocs.length; i++) {
            if (similarDocs.scoreDocs[i].doc != docID) { // #E
                doc = reader.document(similarDocs.scoreDocs[i].doc);
                System.out.println("  -> " + doc.getField("title").stringValue());
            }
        }
    }

    searcher.close();
    reader.close();
    directory.close();
}

From source file:com.main.Searcher.java

public List<Bean> searching(String s1, String s2, String radioBtn)
        throws IOException, ParseException, InvalidTokenOffsetsException {
    //getting reference of directory
    Directory dir = FSDirectory.open(Paths.get(Index_Dir));

    //Index reader - an interface for accessing a point-in-time view of a lucene index
    IndexReader reader = DirectoryReader.open(dir);

    IndexSearcher searcher = new IndexSearcher(reader);
    //analyzer with the default stop words, takes out the stop words
    Analyzer analyzer = new StandardAnalyzer();

    String contents = "contents";

    QueryParser parser = new QueryParser(contents, analyzer);

    int numOfDoc = reader.numDocs();

    for (int i = 0; i < numOfDoc; i++) {

        Document d = reader.document(i);

    }/* w w  w  .j a  v  a2  s  . c  om*/

    Query q1 = parser.parse(s1);
    Query q2 = parser.parse(s2);

    //conjuction, disjunction and negation
    BooleanQuery.Builder bq = new BooleanQuery.Builder();

    //occur.must : both queries required in a doc
    if (radioBtn.equals("conjunction")) {
        bq.add(q1, BooleanClause.Occur.MUST);
        bq.add(q2, BooleanClause.Occur.MUST);
        bq.build();
    } //occur.should: one of the q1 should be presen t in doc
    else if (radioBtn.equals("disjunction")) {
        bq.add(q1, BooleanClause.Occur.SHOULD);
        bq.add(q2, BooleanClause.Occur.SHOULD);
        bq.build();
    } //negation: first should present , second should not
    else {
        bq.add(q1, BooleanClause.Occur.MUST);
        bq.add(q2, BooleanClause.Occur.MUST_NOT);
        bq.build();
    }

    TopDocs hits = searcher.search(bq.build(), 10);

    Formatter formatter = new SimpleHTMLFormatter();

    QueryScorer scorer = new QueryScorer(bq.build());

    //used to markup highlighted terms found in the best sections of a cont
    Highlighter highlighter = new Highlighter(formatter, scorer);
    //It breaks cont up into same-size texts but does not split up spans
    Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, 10);
    //breaks cont up into same-size fragments with no concerns over spotting sentence boundaries.

    //set fragmenter to highlighter
    highlighter.setTextFragmenter(fragmenter);

    for (int i = 0; i < hits.scoreDocs.length; i++) {
        Bean bean = new Bean();

        int outResult = hits.scoreDocs.length;
        bean.setNumFile(outResult);
        int docid = hits.scoreDocs[i].doc;
        double rank = hits.scoreDocs[i].score;
        bean.setRankSc(rank);
        Document doc = searcher.doc(docid);

        String name = doc.get("name");
        String title = doc.get("title");
        bean.setTitle(name);

        String path = doc.get("path");
        bean.setPath(path);

        String cont = doc.get("contents");
        //Create token stream
        TokenStream stream = TokenSources.getAnyTokenStream(reader, docid, "contents", analyzer);
        //Get highlighted cont fragments
        String[] frags = highlighter.getBestFragments(stream, cont, 10);

        ArrayList<String> dummy = new ArrayList<>();
        for (String frag : frags) {

            dummy.add(frag);
        }

        bean.setContent(dummy);
        beanList.add(bean);
    }

    dir.close();
    // }
    return beanList;
}

From source file:com.main.Searcher.java

public List<Bean> searching(String s1) throws IOException, ParseException, InvalidTokenOffsetsException {
    //Get directory reference
    Directory dir = FSDirectory.open(Paths.get(Index_Dir));
    //Index reader - an interface for accessing a point-in-time view of a lucene index
    IndexReader reader = DirectoryReader.open(dir);
    //CreateIndexReader reader = DirectoryReader.open(dir); lucene searcher. It search over a single IndexReader.
    IndexSearcher searcher = new IndexSearcher(reader);
    //analyzer with the default stop words
    Analyzer analyzer = new StandardAnalyzer();
    //Query parser to be used for creating TermQuery

    String queries = null;/*from ww  w  .j a  v  a  2 s  . co  m*/
    String queryString = null; //regular search
    String contents = "contents";
    BufferedReader in = null;
    if (queries != null) {
        in = Files.newBufferedReader(Paths.get(queries), StandardCharsets.UTF_8);
    } else {
        in = new BufferedReader(new InputStreamReader(System.in, StandardCharsets.UTF_8));
    }
    QueryParser parser = new QueryParser(contents, analyzer);

    int numOfDoc = reader.numDocs();

    for (int i = 0; i < numOfDoc; i++) {

        Document d = reader.document(i);

    }

    Query q1 = parser.parse(s1);

    BooleanQuery.Builder bq = new BooleanQuery.Builder();

    bq.add(q1, BooleanClause.Occur.MUST);
    //Search the lucene documents
    TopDocs hits = searcher.search(bq.build(), 10);
    // TopScoreDocCollector collector = TopScoreDocCollector.create(5);
    /**
     * Highlighter Code Start ***
     */
    //Uses HTML &lt;B&gt;&lt;/B&gt; tag to highlight the searched terms
    Formatter formatter = new SimpleHTMLFormatter();
    //It scores cont fragments by the number of unique q1 terms found
    //Basically the matching score in layman terms
    QueryScorer scorer = new QueryScorer(bq.build());
    //used to markup highlighted terms found in the best sections of a cont
    Highlighter highlighter = new Highlighter(formatter, scorer);
    //It breaks cont up into same-size texts but does not split up spans
    Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, 10);
    //breaks cont up into same-size fragments with no concerns over spotting sentence boundaries.

    //set fragmenter to highlighter
    highlighter.setTextFragmenter(fragmenter);
    //Iterate over found results
    for (int i = 0; i < hits.scoreDocs.length; i++) {
        Bean bean = new Bean();
        //int rank = hits.scoreDocs.length;
        int outResult = hits.scoreDocs.length;
        bean.setNumFile(outResult);
        int docid = hits.scoreDocs[i].doc;
        double rank = hits.scoreDocs[i].score;
        bean.setRankSc(rank);
        Document doc = searcher.doc(docid);
        // String title = doc.get("title");
        String name = doc.get("name");
        String title = doc.get("title");
        bean.setTitle(name);

        String path = doc.get("path");
        bean.setPath(path);

        String cont = doc.get("contents");
        //Create token stream
        TokenStream stream = TokenSources.getAnyTokenStream(reader, docid, "contents", analyzer);
        //Get highlighted cont fragments
        String[] frags = highlighter.getBestFragments(stream, cont, 10);

        ArrayList<String> dummy = new ArrayList<>();
        for (String frag : frags) {

            dummy.add(frag);
        }

        bean.setContent(dummy);
        beanList.add(bean);
    }

    dir.close();
    // }
    return beanList;
}