List of usage examples for org.apache.lucene.index IndexReader numDocs
public abstract int numDocs();
From source file:org.hibernate.search.test.shards.DirectorySelectionTest.java
License:Open Source License
public void testDirectoryProviderForQuery() throws Exception { IndexReader indexReader = indexReaderAccessor.open(Product.class); try {//from w w w . j a va 2s .c o m Assert.assertEquals(2, indexReader.numDocs()); } finally { indexReaderAccessor.close(indexReader); } indexReader = indexReaderAccessor.open("Products.0"); try { Assert.assertEquals(1, indexReader.numDocs()); } finally { indexReaderAccessor.close(indexReader); } indexReader = indexReaderAccessor.open("Products.1"); try { Assert.assertEquals(1, indexReader.numDocs()); } finally { indexReaderAccessor.close(indexReader); } }
From source file:org.hibernate.search.test.shards.ShardsTest.java
License:Open Source License
public void testInternalSharding() throws Exception { Session s = openSession();//from ww w.j ava 2s. com Transaction tx = s.beginTransaction(); Animal a = new Animal(); a.setId(1); a.setName("Elephant"); s.persist(a); a = new Animal(); a.setId(2); a.setName("Bear"); s.persist(a); tx.commit(); s.clear(); FSDirectory animal00Directory = FSDirectory.open(new File(getBaseIndexDir(), "Animal00")); try { IndexReader reader = IndexReader.open(animal00Directory); try { int num = reader.numDocs(); assertEquals(1, num); } finally { reader.close(); } } finally { animal00Directory.close(); } FSDirectory animal01Directory = FSDirectory.open(new File(getBaseIndexDir(), "Animal.1")); try { IndexReader reader = IndexReader.open(animal01Directory); try { int num = reader.numDocs(); assertEquals(1, num); } finally { reader.close(); } } finally { animal01Directory.close(); } tx = s.beginTransaction(); a = (Animal) s.get(Animal.class, 1); a.setName("Mouse"); tx.commit(); s.clear(); animal01Directory = FSDirectory.open(new File(getBaseIndexDir(), "Animal.1")); try { IndexReader reader = IndexReader.open(animal01Directory); try { int num = reader.numDocs(); assertEquals(1, num); TermDocs docs = reader.termDocs(new Term("name", "mouse")); assertTrue(docs.next()); org.apache.lucene.document.Document doc = reader.document(docs.doc()); assertFalse(docs.next()); } finally { reader.close(); } } finally { animal01Directory.close(); } tx = s.beginTransaction(); FullTextSession fts = Search.getFullTextSession(s); QueryParser parser = new QueryParser(getTargetLuceneVersion(), "id", SearchTestCase.stopAnalyzer); List results = fts.createFullTextQuery(parser.parse("name:mouse OR name:bear")).list(); assertEquals("Either double insert, single update, or query fails with shards", 2, results.size()); for (Object o : results) { s.delete(o); } tx.commit(); s.close(); }
From source file:org.hibernate.search.test.TransactionTest.java
License:Open Source License
private int getDocumentNumber() throws IOException { IndexReader reader = IndexReader.open(getDirectory(Document.class), false); try {// www .ja v a 2 s. com return reader.numDocs(); } finally { reader.close(); } }
From source file:org.hippoecm.repository.FacetedNavigationEngineImpl.java
License:Apache License
public Result doView(String queryName, QueryImpl initialQuery, ContextImpl contextImpl, List<KeyValue<String, String>> facetsQueryList, List<FacetRange> rangeQuery, QueryImpl openQuery, Map<String, Map<String, Count>> resultset, Map<String, String> inheritedFilter, HitsRequested hitsRequested) throws UnsupportedOperationException, IllegalArgumentException { NamespaceMappings nsMappings = getNamespaceMappings(); IndexReader indexReader = null; try {/*from ww w . j ava2 s . c om*/ indexReader = getIndexReader(false); IndexSearcher searcher = new IndexSearcher(indexReader); SetDocIdSetBuilder matchingDocsSetBuilder = new SetDocIdSetBuilder(); BooleanQuery facetsQuery = new FacetsQuery(facetsQueryList, nsMappings).getQuery(); matchingDocsSetBuilder.add(filterDocIdSetPlainLuceneQuery(facetsQuery, indexReader)); BooleanQuery facetRangeQuery = new FacetRangeQuery(rangeQuery, nsMappings, this).getQuery(); matchingDocsSetBuilder.add(filterDocIdSetPlainLuceneQuery(facetRangeQuery, indexReader)); BooleanQuery inheritedFilterQuery = new InheritedFilterQuery(inheritedFilter, nsMappings).getQuery(); matchingDocsSetBuilder.add(filterDocIdSetPlainLuceneQuery(inheritedFilterQuery, indexReader)); org.apache.lucene.search.Query initialLuceneQuery = null; if (initialQuery != null && initialQuery.scopes != null && initialQuery.scopes.length > 0) { if (initialQuery.scopes.length == 1) { initialLuceneQuery = new TermQuery( new Term(ServicingFieldNames.HIPPO_PATH, initialQuery.scopes[0])); } else { initialLuceneQuery = new BooleanQuery(true); for (String scope : initialQuery.scopes) { ((BooleanQuery) initialLuceneQuery) .add(new TermQuery(new Term(ServicingFieldNames.HIPPO_PATH, scope)), Occur.SHOULD); } } } matchingDocsSetBuilder.add(filterDocIdSetPlainLuceneQuery(initialLuceneQuery, indexReader)); FacetFiltersQuery facetFiltersQuery = null; if (initialQuery != null && initialQuery.facetFilters != null) { facetFiltersQuery = new FacetFiltersQuery(initialQuery.facetFilters, nsMappings, this.getTextAnalyzer(), this.getSynonymProvider()); } final BooleanQuery authorizationQuery = contextImpl.getAuthorizationQuery(); if (authorizationQuery != null) { final DocIdSet authorisationIdSet = contextImpl.getAuthorisationIdSet(indexReader); if (authorisationIdSet != null) { matchingDocsSetBuilder.add(authorisationIdSet); } } if (resultset != null) { // If there are more than one facet in the 'resultset' we return an empty result as this is not allowed if (resultset.size() > 1) { log.error("The resultset cannot contain multiple facets"); return new ResultImpl(0, null); } int cardinality = 0; for (String namespacedFacet : resultset.keySet()) { // Not a search involving scoring, thus compute bitsets for facetFiltersQuery & freeSearchInjectedSort if (facetFiltersQuery != null) { if (facetFiltersQuery.isPlainLuceneQuery()) { matchingDocsSetBuilder .add(filterDocIdSetPlainLuceneQuery(facetFiltersQuery.getQuery(), indexReader)); } else { matchingDocsSetBuilder .add(filterDocIdSetJackRabbitQuery(facetFiltersQuery.getQuery(), indexReader)); } } if (openQuery != null) { QueryAndSort queryAndSort = openQuery.getLuceneQueryAndSort(contextImpl); // open query is always a jackrabbit query matchingDocsSetBuilder.add(filterDocIdSetJackRabbitQuery(queryAndSort.query, indexReader)); } OpenBitSet matchingDocs = matchingDocsSetBuilder.toBitSet(); cardinality = (int) matchingDocs.cardinality(); /* * Nodes not having this facet, still should be counted if they are a hit * in the query without this facet. Therefor, first get the count query without * FacetPropExistsQuery. */ int numHits = 0; if (hitsRequested.isFixedDrillPath()) { // only in the case of the fixed drillpath we use the count where the facet does not need to exist numHits = (int) matchingDocs.cardinality(); } ParsedFacet parsedFacet; try { parsedFacet = ParsedFacet.getInstance(namespacedFacet); } catch (Exception e) { log.error("Error parsing facet: ", e); return new ResultImpl(0, null); } String propertyName = ServicingNameFormat.getInteralPropertyPathName(nsMappings, parsedFacet.getNamespacedProperty()); /* * facetPropExists: the node must have the property as facet */ matchingDocsSetBuilder.add(filterDocIdSetPlainLuceneQuery( new FacetPropExistsQuery(propertyName).getQuery(), indexReader)); matchingDocs = matchingDocsSetBuilder.toBitSet(); cardinality = (int) matchingDocs.cardinality(); // this method populates the facetValueCountMap for the current facet // index reader is instance of JackrabbitIndexReader : we need the wrapped multi-index reader as // cache key : since during deletes only, the backing index reader can stay the same, we // also need to use numDocs to be sure we get the right cached values Object[] keyObjects = { matchingDocs, propertyName, parsedFacet, indexReader.getCoreCacheKey(), indexReader.numDocs() }; FVCKey fvcKey = new FVCKey(keyObjects); Map<String, Count> facetValueCountMap = facetValueCountCache.getIfPresent(fvcKey); if (facetValueCountMap == null) { facetValueCountMap = new HashMap<String, Count>(); populateFacetValueCountMap(propertyName, parsedFacet, facetValueCountMap, matchingDocs, indexReader); facetValueCountCache.put(fvcKey, facetValueCountMap); log.debug("Caching new facet value count map"); } else { log.debug("Reusing previously cached facet value count map"); } Map<String, Count> resultFacetValueCountMap = resultset.get(namespacedFacet); resultFacetValueCountMap.putAll(facetValueCountMap); // set the numHits value if (hitsRequested.isFixedDrillPath()) { return new ResultImpl(numHits, null); } } return new ResultImpl(cardinality, null); } else { // resultset is null, so search for HippoNodeType.HIPPO_RESULTSET if (!hitsRequested.isResultRequested()) { // No search with SCORING involved, this everything can be done with BitSet's if (facetFiltersQuery != null && facetFiltersQuery.getQuery().clauses().size() > 0) { matchingDocsSetBuilder .add(filterDocIdSetPlainLuceneQuery(facetFiltersQuery.getQuery(), indexReader)); } if (openQuery != null) { QueryAndSort queryAndSort = openQuery.getLuceneQueryAndSort(contextImpl); matchingDocsSetBuilder.add(filterDocIdSetJackRabbitQuery(queryAndSort.query, indexReader)); } int size = (int) matchingDocsSetBuilder.toBitSet().cardinality(); return new ResultImpl(size, null); } else { BooleanQuery searchQuery = new BooleanQuery(false); Sort freeSearchInjectedSort = null; if (facetFiltersQuery != null && facetFiltersQuery.getQuery().clauses().size() > 0) { searchQuery.add(facetFiltersQuery.getQuery(), Occur.MUST); } if (openQuery != null) { QueryAndSort queryAndSort = openQuery.getLuceneQueryAndSort(contextImpl); if (queryAndSort.query != null) { searchQuery.add(queryAndSort.query, Occur.MUST); } freeSearchInjectedSort = queryAndSort.sort; } Set<String> fieldNames = new HashSet<String>(); fieldNames.add(FieldNames.UUID); FieldSelector fieldSelector = new SetBasedFieldSelector(fieldNames, new HashSet<String>()); int fetchTotal = hitsRequested.getOffset() + hitsRequested.getLimit(); Sort sort = null; if (freeSearchInjectedSort != null) { // we already have a sort from the xpath or sql free search. Use this one sort = freeSearchInjectedSort; } else if (hitsRequested.getOrderByList().size() > 0) { List<Path> orderPropertiesList = new ArrayList<Path>(); List<Boolean> ascSpecsList = new ArrayList<Boolean>(); for (OrderBy orderBy : hitsRequested.getOrderByList()) { try { Name orderByProp = NameFactoryImpl.getInstance().create(orderBy.getName()); boolean isAscending = !orderBy.isDescending(); orderPropertiesList.add(createPath(orderByProp)); ascSpecsList.add(isAscending); } catch (IllegalArgumentException e) { log.warn("Skip property '{}' because cannot create a Name for it: {}", orderBy.getName(), e.toString()); } } if (orderPropertiesList.size() > 0) { Path[] orderProperties = orderPropertiesList .toArray(new Path[orderPropertiesList.size()]); boolean[] ascSpecs = new boolean[ascSpecsList.size()]; int i = 0; for (Boolean b : ascSpecsList) { ascSpecs[i] = b; i++; } sort = new Sort(createSortFields(orderProperties, ascSpecs, new String[orderProperties.length])); } } boolean sortScoreAscending = false; // if the sort is on score descending, we can set it to null as this is the default and more efficient if (sort != null && sort.getSort().length == 1 && sort.getSort()[0].getType() == SortField.SCORE) { if (sort.getSort()[0].getReverse()) { sortScoreAscending = true; } else { // we can skip sort as it is on score descending sort = null; } } TopDocs tfDocs; org.apache.lucene.search.Query query = searchQuery; if (searchQuery.clauses().size() == 0) { // add a match all query // searchQuery.add(new MatchAllDocsQuery(), Occur.MUST); query = new MatchAllDocsQuery(); } if (sort == null) { // when sort == null, use this search without search as is more efficient Filter filterToApply = new DocIdSetFilter(matchingDocsSetBuilder.toBitSet()); tfDocs = searcher.search(query, filterToApply, fetchTotal); } else { if (sortScoreAscending) { // we need the entire searchQuery because scoring is involved Filter filterToApply = new DocIdSetFilter(matchingDocsSetBuilder.toBitSet()); tfDocs = searcher.search(query, filterToApply, fetchTotal, sort); } else { // because we have at least one explicit sort, scoring can be skipped. We can use cached bitsets combined with a match all query if (facetFiltersQuery != null) { matchingDocsSetBuilder.add( filterDocIdSetPlainLuceneQuery(facetFiltersQuery.getQuery(), indexReader)); } if (openQuery != null) { QueryAndSort queryAndSort = openQuery.getLuceneQueryAndSort(contextImpl); matchingDocsSetBuilder .add(filterDocIdSetJackRabbitQuery(queryAndSort.query, indexReader)); } Filter filterToApply = new DocIdSetFilter(matchingDocsSetBuilder.toBitSet()); // set query to MatchAllDocsQuery because we have everything as filter now query = new MatchAllDocsQuery(); tfDocs = searcher.search(query, filterToApply, fetchTotal, sort); } } ScoreDoc[] hits = tfDocs.scoreDocs; int position = hitsRequested.getOffset(); // LinkedHashSet because ordering should be kept! Set<NodeId> nodeIdHits = new LinkedHashSet<NodeId>(); while (position < hits.length) { Document d = indexReader.document(hits[position].doc, fieldSelector); Field uuidField = d.getField(FieldNames.UUID); if (uuidField != null) { nodeIdHits.add(NodeId.valueOf(uuidField.stringValue())); } position++; } return new ResultImpl(nodeIdHits.size(), nodeIdHits); } } } catch (IllegalNameException e) { log.error("Error during creating view: ", e); } catch (IOException e) { log.error("Error during creating view: ", e); } finally { if (indexReader != null) { try { // do not call indexReader.close() as ref counting is taken care of by // org.apache.jackrabbit.core.query.lucene.Util#closeOrRelease Util.closeOrRelease(indexReader); } catch (IOException e) { log.error("Exception while closing index reader", e); } } } return new ResultImpl(0, null); }
From source file:org.hippoecm.repository.query.lucene.util.CachingMultiReaderQueryFilter.java
License:Apache License
private DocIdSet getIndexReaderDocIdSet(final IndexReader reader, IndexReader cacheKey) throws IOException { ValidityBitSet validityBitSet = cache.get(cacheKey); if (validityBitSet != null) { // unfortunately, Jackrabbit can return a ReadOnlyIndexReader which is the same instance as used previously, // but still happened to be changed through it's ***deleted*** bitset : This is a optimisation. // See AbstractIndex#getReadOnlyIndexReader. This is why even though we use an IDENTITY as cachekey, we // now still need to check whether the cached bit set is really still valid. We can only do this by checking // numDocs as when a doc id gets deleted in the ReadOnlyIndexReader, numDocs decreases if (reader.numDocs() == validityBitSet.numDocs) { log.debug("Return cached bitSet for reader '{}'", reader); return validityBitSet.bitSet; } else {/*from w w w. ja v a 2 s.co m*/ log.debug("ReadOnlyIndexReader '{}' deleted bitset got changed. Cached entry not valid any more", reader); cache.remove(cacheKey); } } // no synchronization needed: worst case scenario two concurrent thread do it both OpenBitSet docIdSet = createDocIdSet(reader); cache.put(cacheKey, new ValidityBitSet(reader.numDocs(), docIdSet)); return docIdSet; }
From source file:org.iis.ut.artificialplagiarismcreator.tools.FileFromIndexEctractor.java
public static void main(String[] args) throws IOException { IndexReader ireader = IndexReader.open(new SimpleFSDirectory(new File(indexPath))); for (int i = 0; i < ireader.numDocs(); i++) { String text = ireader.document(i).get("TEXT"); BufferedWriter writer = new BufferedWriter( new FileWriter("/Users/Sam/Education/MyMasterThesis/Codes/evaluations/SimorghI/src/" + ireader.document(i).get("DOCID"))); writer.write(text);/*from ww w. j a va 2 s . c o m*/ writer.close(); } }
From source file:org.infoglue.cms.controllers.kernel.impl.simple.LuceneController.java
License:Open Source License
public Map getIndexInformation() { Map info = new HashMap(); try {/* w ww. ja v a 2 s.c om*/ String index = CmsPropertyHandler.getContextRootPath() + File.separator + "lucene" + File.separator + "index"; boolean indexExists = IndexReader.indexExists(new File(index)); if (!indexExists) { try { File INDEX_DIR = new File(index); IndexWriter writer = new IndexWriter(INDEX_DIR, new StandardAnalyzer()); logger.info("Indexing to directory '" + INDEX_DIR + "'..."); writer.deleteDocuments(new Term("initializer", "true")); logger.info("Optimizing..."); writer.optimize(); writer.close(); } catch (Exception e) { logger.error("Error creating index:" + e.getMessage(), e); } } IndexReader reader = IndexReader.open(index); int maxDoc = reader.maxDoc(); int numDoc = reader.numDocs(); long lastModified = IndexReader.lastModified(index); info.put("maxDoc", new Integer(maxDoc)); info.put("numDoc", new Integer(numDoc)); info.put("lastModified", new Date(lastModified)); reader.close(); } catch (Exception e) { logger.error("Error creating index:" + e.getMessage(), e); } return info; }
From source file:org.jetbrains.idea.maven.server.embedder.Maven2ServerIndexerImpl.java
License:Apache License
@Override public void processArtifacts(int indexId, MavenServerIndicesProcessor processor) throws MavenServerIndexerException { try {//from w w w. j a va 2 s . co m final int CHUNK_SIZE = 10000; IndexReader r = getIndex(indexId).getIndexReader(); int total = r.numDocs(); List<IndexedMavenId> result = new ArrayList<IndexedMavenId>(Math.min(CHUNK_SIZE, total)); for (int i = 0; i < total; i++) { if (r.isDeleted(i)) { continue; } Document doc = r.document(i); String uinfo = doc.get(ArtifactInfo.UINFO); if (uinfo == null) { continue; } String[] uInfoParts = uinfo.split("\\|"); if (uInfoParts.length < 3) { continue; } String groupId = uInfoParts[0]; String artifactId = uInfoParts[1]; String version = uInfoParts[2]; String packaging = doc.get(ArtifactInfo.PACKAGING); String description = doc.get(ArtifactInfo.DESCRIPTION); result.add(new IndexedMavenId(groupId, artifactId, version, packaging, description)); if (result.size() == CHUNK_SIZE) { processor.processArtifacts(result); result.clear(); } } if (!result.isEmpty()) { processor.processArtifacts(result); } } catch (Exception e) { throw new MavenServerIndexerException(wrapException(e)); } }
From source file:org.jetbrains.idea.maven.server.Maven3ServerIndexerImpl.java
License:Apache License
@Override public void processArtifacts(int indexId, MavenServerIndicesProcessor processor) throws RemoteException, MavenServerIndexerException { try {/*from www . j a v a 2 s. c om*/ final int CHUNK_SIZE = 10000; IndexReader r = getIndex(indexId).getIndexReader(); int total = r.numDocs(); List<IndexedMavenId> result = new ArrayList<IndexedMavenId>(Math.min(CHUNK_SIZE, total)); for (int i = 0; i < total; i++) { if (r.isDeleted(i)) { continue; } Document doc = r.document(i); String uinfo = doc.get(SEARCH_TERM_COORDINATES); if (uinfo == null) { continue; } String[] uInfoParts = uinfo.split("\\|"); if (uInfoParts.length < 3) { continue; } String groupId = uInfoParts[0]; String artifactId = uInfoParts[1]; String version = uInfoParts[2]; if (groupId == null || artifactId == null || version == null) { continue; } String packaging = doc.get(ArtifactInfo.PACKAGING); String description = doc.get(ArtifactInfo.DESCRIPTION); result.add(new IndexedMavenId(groupId, artifactId, version, packaging, description)); if (result.size() == CHUNK_SIZE) { processor.processArtifacts(result); result.clear(); } } if (!result.isEmpty()) { processor.processArtifacts(result); } } catch (Exception e) { throw new MavenServerIndexerException(wrapException(e)); } }
From source file:org.musicbrainz.search.index.ReleaseGroupIndexTest.java
License:Open Source License
/** * Basic test of all fields//from ww w .j av a 2s. c om * * @throws Exception */ @Test public void testIndexReleaseGroupFields() throws Exception { addReleaseGroupTwo(); RAMDirectory ramDir = new RAMDirectory(); createIndex(ramDir); IndexReader ir = DirectoryReader.open(ramDir); assertEquals(2, ir.numDocs()); { Document doc = ir.document(1); assertEquals(1, doc.getFields(ReleaseGroupIndexField.RELEASEGROUP.getName()).length); assertEquals("Crocodiles", doc.getField(ReleaseGroupIndexField.RELEASEGROUP.getName()).stringValue()); assertEquals("efd2ace2-b3b9-305f-8a53-9803595c0e37", doc.getField(ReleaseGroupIndexField.RELEASEGROUP_ID.getName()).stringValue()); assertEquals(1, doc.getFields(ReleaseGroupIndexField.RELEASE.getName()).length); assertEquals("Crocodiles (bonus disc)", doc.getField(ReleaseGroupIndexField.RELEASE.getName()).stringValue()); checkTerm(ir, ReleaseGroupIndexField.ARTIST_ID, "ccd4879c-5e88-4385-b131-bf65296bf245"); } ir.close(); }