List of usage examples for org.apache.lucene.index IndexReader maxDoc
public abstract int maxDoc();
From source file:titli.model.index.Indexer.java
License:BSD License
/** * index the given table// ww w . j a v a2 s. c om * @param table the table to be indexed * @throws TitliException if problems occur * */ private void indexTable(Table table) throws TitliException { //long start = new Date().getTime(); File tableIndexDir = IndexUtility.getIndexDirectoryForTable(table.getDatabaseName(), table.getName()); String query = null; try { //RAMDirectory does not have a method to flush to the hard disk ! this is bad ! //RAMDirectory indexDir = new RAMDirectory(tableIndexDir); Directory dir = FSDirectory.getDirectory(tableIndexDir, true); // specify the index directory IndexWriter indexWriter = new IndexWriter(dir, new StandardAnalyzer(), true); indexWriter.setMergeFactor(TitliConstants.INDEX_MERGE_FACTOR); indexWriter.setMaxBufferedDocs(TitliConstants.INDEX_MAX_BUFFERED_DOCS); //System.out.println("executing : "+"SELECT * FROM "+table.getName()); query = getExtendedQuery(table); ResultSet rs = indexstmt.executeQuery(query); while (rs.next()) { //this is for compatibility with Nutch Parsers //RDBMSRecordParser parser = new RDBMSRecordParser(rs); //String content = parser.getParse(new Content()).getText(); //indexWriter.addDocument(makeDocument(rs, table)); makeDocument(rs, table, indexWriter); } Set<String> keySet = documentMap.keySet(); Iterator<String> iterator = keySet.iterator(); if (iterator.hasNext()) { String keyString = iterator.next(); Map documentValueMap = documentMap.get(keyString); Document document = (Document) documentValueMap.get(TITLIDOC); indexWriter.addDocument(document); } indexWriter.optimize(); indexWriter.close(); dir.close(); rs.close(); IndexReader reader = null; try { reader = IndexReader.open(tableIndexDir); } catch (IOException e) { //throw new TitliIndexRefresherException("TITLI_S_030", "problem while creating index reader for database :"+identifier.getDbName()+" table : "+identifier.getTableName(), e); } int maxDoc = reader.maxDoc(); Document doc = null; int i; //find the doc with given columns and values for (i = 0; i < maxDoc; i++) { try { doc = reader.document(i); } catch (IOException e) { //throw new TitliIndexRefresherException("TITLI_S_030", "problem reading document from the index reader for database :"+identifier.getDbName()+" table : "+identifier.getTableName(), e); } } } catch (IOException e) { throw new TitliIndexException("TITLI_S_009", "I/O problem with " + tableIndexDir, e); } catch (SQLException e) { throw new TitliIndexException("TITLI_S_010", "SQL problem while executing " + query, e); } }
From source file:titli.model.index.IndexRefresher.java
License:BSD License
/** * check if a record with given unique key values already in the index * @param identifier the record identifier * @return true if this record is already indexed otherwise false * @throws TitliException if problems occur *//* ww w . j a v a2 s . c o m*/ public boolean isIndexed(RecordIdentifier identifier) throws TitliException { boolean isIndexed = false; File indexDir = IndexUtility.getIndexDirectoryForTable(identifier.getDbName(), identifier.getTableName()); IndexReader reader; try { FSDirectory dir = FSDirectory.getDirectory(indexDir, false); reader = IndexReader.open(dir); } catch (IOException e) { throw new TitliIndexRefresherException("TITLI_S_030", "problem while creating index reader for database :" + identifier.getDbName() + " table : " + identifier.getTableName(), e); } int maxDoc = reader.maxDoc(); Document doc = null; int i; //find the doc with given columns and values for (i = 0; i < maxDoc; i++) { try { //ignore documents marked deleted if (reader.isDeleted(i)) { continue; } doc = reader.document(i); } catch (IOException e) { throw new TitliIndexRefresherException("TITLI_S_030", "problem reading document from the index reader for database :" + identifier.getDbName() + " table : " + identifier.getTableName(), e); } //this is not the doc we are looking for if (identifier.matches(doc)) { isIndexed = true; break; } } try { reader.close(); } catch (IOException e) { throw new TitliIndexRefresherException("TITLI_S_030", "problem closing reader for database :" + identifier.getDbName() + " table : " + identifier.getTableName(), e); } return isIndexed; }
From source file:uk.ac.ebi.bioinvindex.search.hibernatesearch.SecureStudyFreeTextSearch.java
License:Creative Commons License
private void browse(BIIFilterQuery filterQuery, SearchFactory searchFactory, DirectoryProvider directoryProvider, List<Map<StudyBrowseField, String[]>> answer) { ReaderProvider readerProvider = searchFactory.getReaderProvider(); IndexReader reader = readerProvider.openReader(directoryProvider); try {//from ww w.ja v a 2 s . co m if (filterQuery.getFilters().size() > 0) { Filter filter = queryBuilder.buildFilter(filterQuery); DocIdSet docIdSet = filter.getDocIdSet(reader); DocIdSetIterator iterator = docIdSet.iterator(); while (iterator.next()) { int i = iterator.doc(); if (reader.isDeleted(i)) continue; Document document = reader.document(i); processDocument(answer, document); } } else { for (int i = 0; i < reader.maxDoc(); i++) { if (reader.isDeleted(i)) continue; Document document = reader.document(i); processDocument(answer, document); } } } catch (IOException e) { log.error("Cannot open index ", e); throw new SearchException("Cannot open index " + e.getMessage(), e); } finally { readerProvider.closeReader(reader); } }
From source file:uk.ac.ebi.bioinvindex.search.hibernatesearch.StudyFreeTextSearchImpl.java
License:Creative Commons License
private void browse(BIIFilterQuery filterQuery, SearchFactory searchFactory, DirectoryProvider directoryProvider, List<Map<StudyBrowseField, String[]>> answer) { ReaderProvider readerProvider = searchFactory.getReaderProvider(); IndexReader reader = readerProvider.openReader(directoryProvider); try {// w w w . j a va 2s . co m if (filterQuery.getFilters().size() > 0) { Filter filter = queryBuilder.buildFilter(filterQuery); DocIdSet docIdSet = filter.getDocIdSet(reader); DocIdSetIterator iterator = docIdSet.iterator(); while (iterator.next()) { int i = iterator.doc(); if (reader.isDeleted(i)) continue; Document document = reader.document(i); processDocument(answer, document); } } else { for (int i = 0; i < reader.maxDoc(); i++) { if (reader.isDeleted(i)) continue; Document document = reader.document(i); processDocument(answer, document); } } } catch (IOException e) { e.printStackTrace(); } finally { readerProvider.closeReader(reader); } }
From source file:uk.ac.ebi.bioinvindex.search.StudySearchTest.java
License:Creative Commons License
@Test @Ignore/*from w w w .ja v a 2 s .co m*/ public void test() throws Exception { Study study = buildStudy("study_acc1", "study to test"); Assay assay = buildAssay("assay_acc2", "my assay type", study); // study.addAssay(assay); Design design = buildDesign("study design"); study.setDesign(design); //ToDo: Use AssayResult Characteristic char1 = buildCharacteristic("organism", "specie", PropertyRole.FACTOR); // assay.addCascadedPropertyValue(buildCharacteristicValue("human", char1, "homo sapiens")); // assay.addCascadedPropertyValue(buildCharacteristicValue("little mouse", char1, "Mus musculus")); Characteristic char2 = buildCharacteristic("size", null, PropertyRole.PROPERTY); // assay.addCascadedPropertyValue(buildCharacteristicValue("big", char2, null)); // assay.addCascadedPropertyValue(buildCharacteristicValue("small", char2, null)); EntityTransaction tnx = entityManager.getTransaction(); tnx.begin(); // MB: uses the new persistence API new StudyPersister(DaoFactory.getInstance(entityManager), null).persist(study); tnx.commit(); FullTextSession session; Session deligate = (Session) entityManager.getDelegate(); if (deligate instanceof FullTextSession) { session = (FullTextSession) deligate; } else { session = Search.createFullTextSession(deligate); } SearchFactory searchFactory = session.getSearchFactory(); DirectoryProvider directoryProvider = searchFactory.getDirectoryProviders(Study.class)[0]; ReaderProvider readerProvider = searchFactory.getReaderProvider(); IndexReader reader = readerProvider.openReader(directoryProvider); BitSet bitSet = new BitSet(reader.maxDoc()); // Term term1 = new Term(FilterField.ORGANISM.getName(), "mouse"); // Term term2 = new Term(FilterField.ORGANISM.getName(), "little"); Analyzer analyzer = new StandardAnalyzer(); StringReader stringReader = new StringReader("first endpoint"); TokenStream tokenStream = analyzer.tokenStream(null, stringReader); TermsFilter filter = new TermsFilter(); while (true) { Token token = tokenStream.next(); if (token == null) break; System.out.println("token = " + token); filter.addTerm(new Term(FilterField.ENDPOINT_NAME.getName(), token.termText())); } // TermsFilter filter = new TermsFilter(); // filter.addTerm(term1); // filter.addTerm(term2); BitSet bitSet1 = filter.bits(reader); System.out.println("bitSet1.cardinality() = " + bitSet1.cardinality()); // TermDocs termDocs = reader.termDocs(term); // while (termDocs.next()) { // bitSet.set(termDocs.doc()); // } // // System.out.println("bitSet.cardinality() = " + bitSet.cardinality()); }
From source file:Util.DataManager.java
/** * Extracts anchors and uris from an Lucene-Index containing a dpedia_uri-field and an * anchor-field.// www .jav a 2 s . c o m * @param indexPath the Path to the Index */ public void extracAnchors(String indexPath) { try { IndexReader iR = DirectoryReader.open(FSDirectory.open(new File(indexPath))); IndexSearcher searcher = new IndexSearcher(iR); TreeSet<String> set = new TreeSet<>(); Document doc; for (int i = 0; i < iR.maxDoc(); i++) { doc = searcher.doc(i); set.add("<" + doc.get("dbpedia_uri") + ">" + "|" + doc.get("anchor")); } PrintWriter pw = new PrintWriter("anchors", "UTF-8"); for (String s : set) { pw.println(s); } pw.close(); } catch (IOException ex) { System.out.println("this should hopfluy never happen" + ex.getMessage()); } }
From source file:Util.Index_Handler.java
/** * extracts anchors from an Lucene Index with a "dpedia_uri" field and an "anchor" field. * @return errormessage or "done"/* w ww.j a va 2 s . c om*/ */ public static String extracAnchors() { try { IndexReader iR = DirectoryReader.open(FSDirectory.open(new File("AnchorIndex"))); IndexSearcher searcher = new IndexSearcher(iR); TreeSet<String> set = new TreeSet<>(); Document doc; for (int i = 0; i < iR.maxDoc(); i++) { doc = searcher.doc(i); set.add("<" + doc.get("dbpedia_uri") + ">" + "|" + doc.get("anchor")); } PrintWriter pw = new PrintWriter("anchors.txt", "UTF-8"); for (String s : set) { pw.println(s); } pw.close(); } catch (IOException ex) { System.out.println("Error while extracting Anchors>" + ex.getMessage()); return "Error while extracting Anchors>" + ex.getMessage(); } return "done"; }