Example usage for org.apache.lucene.index IndexReader maxDoc

List of usage examples for org.apache.lucene.index IndexReader maxDoc

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader maxDoc.

Prototype

public abstract int maxDoc();

Source Link

Document

Returns one greater than the largest possible document number.

Usage

From source file:titli.model.index.Indexer.java

License:BSD License

/**
 * index the given table//  ww  w . j  a  v a2 s. c om
 * @param table the table to be indexed
 * @throws TitliException if problems occur
 * 
 */
private void indexTable(Table table) throws TitliException {

    //long start = new Date().getTime();

    File tableIndexDir = IndexUtility.getIndexDirectoryForTable(table.getDatabaseName(), table.getName());
    String query = null;

    try {
        //RAMDirectory does not have a method to flush to the hard disk ! this is  bad !
        //RAMDirectory indexDir = new RAMDirectory(tableIndexDir);
        Directory dir = FSDirectory.getDirectory(tableIndexDir, true);

        //   specify the index directory
        IndexWriter indexWriter = new IndexWriter(dir, new StandardAnalyzer(), true);
        indexWriter.setMergeFactor(TitliConstants.INDEX_MERGE_FACTOR);
        indexWriter.setMaxBufferedDocs(TitliConstants.INDEX_MAX_BUFFERED_DOCS);

        //System.out.println("executing :   "+"SELECT * FROM  "+table.getName());

        query = getExtendedQuery(table);

        ResultSet rs = indexstmt.executeQuery(query);

        while (rs.next()) {
            //this is for compatibility with Nutch Parsers
            //RDBMSRecordParser parser = new RDBMSRecordParser(rs);
            //String content = parser.getParse(new Content()).getText();

            //indexWriter.addDocument(makeDocument(rs, table));
            makeDocument(rs, table, indexWriter);
        }

        Set<String> keySet = documentMap.keySet();
        Iterator<String> iterator = keySet.iterator();
        if (iterator.hasNext()) {
            String keyString = iterator.next();
            Map documentValueMap = documentMap.get(keyString);
            Document document = (Document) documentValueMap.get(TITLIDOC);
            indexWriter.addDocument(document);
        }

        indexWriter.optimize();
        indexWriter.close();
        dir.close();

        rs.close();

        IndexReader reader = null;
        try {
            reader = IndexReader.open(tableIndexDir);
        } catch (IOException e) {
            //throw new TitliIndexRefresherException("TITLI_S_030", "problem while creating index reader for database  :"+identifier.getDbName()+" table : "+identifier.getTableName(), e);
        }

        int maxDoc = reader.maxDoc();
        Document doc = null;

        int i;

        //find the doc with given columns and values
        for (i = 0; i < maxDoc; i++) {
            try {
                doc = reader.document(i);
            } catch (IOException e) {
                //throw new TitliIndexRefresherException("TITLI_S_030", "problem reading document from the index reader for database  :"+identifier.getDbName()+" table : "+identifier.getTableName(), e);
            }
        }

    } catch (IOException e) {
        throw new TitliIndexException("TITLI_S_009", "I/O problem with " + tableIndexDir, e);
    } catch (SQLException e) {
        throw new TitliIndexException("TITLI_S_010", "SQL problem while executing " + query, e);
    }

}

From source file:titli.model.index.IndexRefresher.java

License:BSD License

/**
 * check if a record with given unique key values already in the index  
 * @param identifier the record identifier
 * @return true if this record is already indexed otherwise false
 * @throws TitliException if problems occur
 *//*  ww w  . j a  v a2  s  .  c o  m*/
public boolean isIndexed(RecordIdentifier identifier) throws TitliException {
    boolean isIndexed = false;
    File indexDir = IndexUtility.getIndexDirectoryForTable(identifier.getDbName(), identifier.getTableName());
    IndexReader reader;

    try {
        FSDirectory dir = FSDirectory.getDirectory(indexDir, false);
        reader = IndexReader.open(dir);
    } catch (IOException e) {
        throw new TitliIndexRefresherException("TITLI_S_030",
                "problem while creating index reader for database  :" + identifier.getDbName() + " table : "
                        + identifier.getTableName(),
                e);
    }

    int maxDoc = reader.maxDoc();
    Document doc = null;
    int i;

    //find the doc with given columns and values
    for (i = 0; i < maxDoc; i++) {
        try {
            //ignore documents marked deleted
            if (reader.isDeleted(i)) {
                continue;
            }

            doc = reader.document(i);
        } catch (IOException e) {
            throw new TitliIndexRefresherException("TITLI_S_030",
                    "problem reading document from the index reader for database  :" + identifier.getDbName()
                            + " table : " + identifier.getTableName(),
                    e);
        }

        //this is not the doc we are looking for
        if (identifier.matches(doc)) {
            isIndexed = true;
            break;
        }

    }

    try {
        reader.close();
    } catch (IOException e) {
        throw new TitliIndexRefresherException("TITLI_S_030", "problem closing reader for database  :"
                + identifier.getDbName() + " table : " + identifier.getTableName(), e);
    }

    return isIndexed;

}

From source file:uk.ac.ebi.bioinvindex.search.hibernatesearch.SecureStudyFreeTextSearch.java

License:Creative Commons License

private void browse(BIIFilterQuery filterQuery, SearchFactory searchFactory,
        DirectoryProvider directoryProvider, List<Map<StudyBrowseField, String[]>> answer) {
    ReaderProvider readerProvider = searchFactory.getReaderProvider();
    IndexReader reader = readerProvider.openReader(directoryProvider);

    try {//from ww  w.ja v  a 2 s . co  m

        if (filterQuery.getFilters().size() > 0) {
            Filter filter = queryBuilder.buildFilter(filterQuery);

            DocIdSet docIdSet = filter.getDocIdSet(reader);

            DocIdSetIterator iterator = docIdSet.iterator();

            while (iterator.next()) {
                int i = iterator.doc();

                if (reader.isDeleted(i))
                    continue;

                Document document = reader.document(i);
                processDocument(answer, document);
            }

        } else {
            for (int i = 0; i < reader.maxDoc(); i++) {
                if (reader.isDeleted(i))
                    continue;

                Document document = reader.document(i);
                processDocument(answer, document);

            }
        }
    } catch (IOException e) {
        log.error("Cannot open index ", e);
        throw new SearchException("Cannot open index " + e.getMessage(), e);
    } finally {
        readerProvider.closeReader(reader);
    }
}

From source file:uk.ac.ebi.bioinvindex.search.hibernatesearch.StudyFreeTextSearchImpl.java

License:Creative Commons License

private void browse(BIIFilterQuery filterQuery, SearchFactory searchFactory,
        DirectoryProvider directoryProvider, List<Map<StudyBrowseField, String[]>> answer) {
    ReaderProvider readerProvider = searchFactory.getReaderProvider();
    IndexReader reader = readerProvider.openReader(directoryProvider);

    try {//  w w w  .  j  a va 2s  . co m

        if (filterQuery.getFilters().size() > 0) {
            Filter filter = queryBuilder.buildFilter(filterQuery);

            DocIdSet docIdSet = filter.getDocIdSet(reader);

            DocIdSetIterator iterator = docIdSet.iterator();

            while (iterator.next()) {
                int i = iterator.doc();

                if (reader.isDeleted(i))
                    continue;

                Document document = reader.document(i);
                processDocument(answer, document);
            }

        } else {
            for (int i = 0; i < reader.maxDoc(); i++) {
                if (reader.isDeleted(i))
                    continue;

                Document document = reader.document(i);
                processDocument(answer, document);

            }
        }
    } catch (IOException e) {
        e.printStackTrace();
    } finally {
        readerProvider.closeReader(reader);
    }
}

From source file:uk.ac.ebi.bioinvindex.search.StudySearchTest.java

License:Creative Commons License

@Test
@Ignore/*from  w w w .ja  v  a  2 s  .co m*/
public void test() throws Exception {

    Study study = buildStudy("study_acc1", "study to test");
    Assay assay = buildAssay("assay_acc2", "my assay type", study);

    //      study.addAssay(assay);
    Design design = buildDesign("study design");
    study.setDesign(design);

    //ToDo: Use AssayResult
    Characteristic char1 = buildCharacteristic("organism", "specie", PropertyRole.FACTOR);
    //      assay.addCascadedPropertyValue(buildCharacteristicValue("human", char1, "homo sapiens"));
    //      assay.addCascadedPropertyValue(buildCharacteristicValue("little mouse", char1, "Mus musculus"));

    Characteristic char2 = buildCharacteristic("size", null, PropertyRole.PROPERTY);
    //      assay.addCascadedPropertyValue(buildCharacteristicValue("big", char2, null));
    //      assay.addCascadedPropertyValue(buildCharacteristicValue("small", char2, null));

    EntityTransaction tnx = entityManager.getTransaction();
    tnx.begin();
    // MB: uses the new persistence API
    new StudyPersister(DaoFactory.getInstance(entityManager), null).persist(study);
    tnx.commit();

    FullTextSession session;
    Session deligate = (Session) entityManager.getDelegate();

    if (deligate instanceof FullTextSession) {
        session = (FullTextSession) deligate;
    } else {
        session = Search.createFullTextSession(deligate);
    }

    SearchFactory searchFactory = session.getSearchFactory();

    DirectoryProvider directoryProvider = searchFactory.getDirectoryProviders(Study.class)[0];

    ReaderProvider readerProvider = searchFactory.getReaderProvider();
    IndexReader reader = readerProvider.openReader(directoryProvider);

    BitSet bitSet = new BitSet(reader.maxDoc());

    //      Term term1 = new Term(FilterField.ORGANISM.getName(), "mouse");
    //      Term term2 = new Term(FilterField.ORGANISM.getName(), "little");

    Analyzer analyzer = new StandardAnalyzer();

    StringReader stringReader = new StringReader("first endpoint");
    TokenStream tokenStream = analyzer.tokenStream(null, stringReader);

    TermsFilter filter = new TermsFilter();
    while (true) {
        Token token = tokenStream.next();
        if (token == null)
            break;

        System.out.println("token = " + token);
        filter.addTerm(new Term(FilterField.ENDPOINT_NAME.getName(), token.termText()));

    }

    //      TermsFilter filter = new TermsFilter();
    //      filter.addTerm(term1);
    //      filter.addTerm(term2);

    BitSet bitSet1 = filter.bits(reader);
    System.out.println("bitSet1.cardinality() = " + bitSet1.cardinality());

    //      TermDocs termDocs = reader.termDocs(term);
    //      while (termDocs.next()) {
    //         bitSet.set(termDocs.doc());
    //      }
    //
    //      System.out.println("bitSet.cardinality() = " + bitSet.cardinality());

}

From source file:Util.DataManager.java

/**
 * Extracts anchors and uris from an Lucene-Index containing a dpedia_uri-field and an
 * anchor-field.//  www  .jav  a  2 s  . c o m
 * @param indexPath the Path to the Index 
 */
public void extracAnchors(String indexPath) {
    try {
        IndexReader iR = DirectoryReader.open(FSDirectory.open(new File(indexPath)));
        IndexSearcher searcher = new IndexSearcher(iR);
        TreeSet<String> set = new TreeSet<>();
        Document doc;
        for (int i = 0; i < iR.maxDoc(); i++) {
            doc = searcher.doc(i);
            set.add("<" + doc.get("dbpedia_uri") + ">" + "|" + doc.get("anchor"));
        }
        PrintWriter pw = new PrintWriter("anchors", "UTF-8");
        for (String s : set) {
            pw.println(s);
        }
        pw.close();
    } catch (IOException ex) {
        System.out.println("this should hopfluy never happen" + ex.getMessage());
    }
}

From source file:Util.Index_Handler.java

/**
 * extracts anchors from an Lucene Index with a "dpedia_uri" field and an "anchor" field.
 * @return errormessage or "done"/* w  ww.j a  va  2 s . c om*/
 */
public static String extracAnchors() {
    try {
        IndexReader iR = DirectoryReader.open(FSDirectory.open(new File("AnchorIndex")));
        IndexSearcher searcher = new IndexSearcher(iR);

        TreeSet<String> set = new TreeSet<>();
        Document doc;
        for (int i = 0; i < iR.maxDoc(); i++) {
            doc = searcher.doc(i);
            set.add("<" + doc.get("dbpedia_uri") + ">" + "|" + doc.get("anchor"));
        }
        PrintWriter pw = new PrintWriter("anchors.txt", "UTF-8");
        for (String s : set) {
            pw.println(s);
        }
        pw.close();
    } catch (IOException ex) {
        System.out.println("Error while extracting Anchors>" + ex.getMessage());
        return "Error while extracting Anchors>" + ex.getMessage();
    }
    return "done";
}