List of usage examples for org.apache.lucene.index IndexReader maxDoc
public abstract int maxDoc();
From source file:org.apache.maven.index.context.DefaultIndexingContext.java
License:Apache License
public synchronized void rebuildGroups() throws IOException { final IndexSearcher is = acquireIndexSearcher(); try {/* w w w . ja v a2 s .co m*/ final IndexReader r = is.getIndexReader(); Set<String> rootGroups = new LinkedHashSet<String>(); Set<String> allGroups = new LinkedHashSet<String>(); int numDocs = r.maxDoc(); Bits liveDocs = MultiFields.getLiveDocs(r); for (int i = 0; i < numDocs; i++) { if (liveDocs != null && !liveDocs.get(i)) { continue; } Document d = r.document(i); String uinfo = d.get(ArtifactInfo.UINFO); if (uinfo != null) { ArtifactInfo info = IndexUtils.constructArtifactInfo(d, this); rootGroups.add(info.getRootGroup()); allGroups.add(info.getGroupId()); } } setRootGroups(rootGroups); setAllGroups(allGroups); optimize(); } finally { releaseIndexSearcher(is); } }
From source file:org.apache.maven.index.DefaultScannerListener.java
License:Apache License
private void initialize(IndexingContext ctx) throws IOException, CorruptIndexException { final IndexSearcher indexSearcher = ctx.acquireIndexSearcher(); try {/* ww w . j a v a 2 s .c om*/ final IndexReader r = indexSearcher.getIndexReader(); Bits liveDocs = MultiFields.getLiveDocs(r); for (int i = 0; i < r.maxDoc(); i++) { if (liveDocs == null || liveDocs.get(i)) { Document d = r.document(i); String uinfo = d.get(ArtifactInfo.UINFO); if (uinfo != null) { // if ctx is receiving updates (in other words, is a proxy), // there is no need to build a huge Set of strings with all uinfo's // as deletion detection in those cases have no effect. Also, the // removeDeletedArtifacts() method, that uses info gathered in this set // is invoked with same condition. As indexes of Central are getting huge, // the set grows enormously too, but is actually not used if (!ctx.isReceivingUpdates()) { uinfos.add(uinfo); } // add all existing groupIds to the lists, as they will // not be "discovered" and would be missing from the new list.. String groupId = uinfo.substring(0, uinfo.indexOf('|')); int n = groupId.indexOf('.'); groups.add(n == -1 ? groupId : groupId.substring(0, n)); allGroups.add(groupId); } } } } finally { ctx.releaseIndexSearcher(indexSearcher); } }
From source file:org.apache.maven.index.incremental.DefaultIncrementalHandler.java
License:Apache License
private List<Integer> getIndexChunk(IndexPackingRequest request, Date timestamp) throws IOException { final List<Integer> chunk = new ArrayList<>(); final IndexReader r = request.getIndexReader(); Bits liveDocs = MultiFields.getLiveDocs(r); for (int i = 0; i < r.maxDoc(); i++) { if (liveDocs == null || liveDocs.get(i)) { Document d = r.document(i); String lastModified = d.get(ArtifactInfo.LAST_MODIFIED); if (lastModified != null) { Date t = new Date(Long.parseLong(lastModified)); // Only add documents that were added after the last time we indexed if (t.after(timestamp)) { chunk.add(i);//from w w w . j a v a 2 s.com } } } } return chunk; }
From source file:org.apache.maven.index.Nexus737NexusIndexerTest.java
License:Apache License
public void testValidateUINFOs() throws Exception { IndexReader reader = context.acquireIndexSearcher().getIndexReader(); Bits liveDocs = MultiFields.getLiveDocs(reader); int foundCount = 0; for (int i = 0; i < reader.maxDoc(); i++) { if (liveDocs == null || liveDocs.get(i)) { Document document = reader.document(i); String uinfo = document.get(ArtifactInfo.UINFO); if ("org.sonatype.nexus|nexus-webapp|1.0.0-SNAPSHOT|NA|jar".equals(uinfo) || "org.sonatype.nexus|nexus-webapp|1.0.0-SNAPSHOT|bundle|zip".equals(uinfo) || "org.sonatype.nexus|nexus-webapp|1.0.0-SNAPSHOT|bundle|tar.gz".equals(uinfo)) { foundCount++;//from ww w . j a v a2 s . c o m } } } assertEquals(foundCount, 3); }
From source file:org.apache.maven.index.updater.DefaultIndexUpdater.java
License:Apache License
private static void filterDirectory(final Directory directory, final DocumentFilter filter) throws IOException { IndexReader r = null; IndexWriter w = null;// w w w . ja v a 2 s . c o m try { r = DirectoryReader.open(directory); w = new NexusIndexWriter(directory, new NexusAnalyzer(), false); Bits liveDocs = MultiFields.getLiveDocs(r); int numDocs = r.maxDoc(); for (int i = 0; i < numDocs; i++) { if (liveDocs != null && !liveDocs.get(i)) { continue; } Document d = r.document(i); if (!filter.accept(d)) { boolean success = w.tryDeleteDocument(r, i); // FIXME handle deletion failure } } w.commit(); } finally { IndexUtils.close(r); IndexUtils.close(w); } w = null; try { // analyzer is unimportant, since we are not adding/searching to/on index, only reading/deleting w = new NexusIndexWriter(directory, new NexusAnalyzer(), false); w.commit(); } finally { IndexUtils.close(w); } }
From source file:org.apache.maven.index.updater.IndexDataTest.java
License:Apache License
private Map<String, ArtifactInfo> readIndex(IndexReader r1) throws CorruptIndexException, IOException { Map<String, ArtifactInfo> map = new HashMap<String, ArtifactInfo>(); for (int i = 0; i < r1.maxDoc(); i++) { Document document = r1.document(i); ArtifactInfo ai = IndexUtils.constructArtifactInfo(document, context); if (ai != null) { map.put(ai.getUinfo(), ai);/*from w ww .j ava 2 s .c o m*/ } } return map; }
From source file:org.apache.maven.index.updater.IndexDataWriter.java
License:Apache License
public int writeDocuments(IndexReader r, List<Integer> docIndexes) throws IOException { int n = 0;//from w w w . j a v a2 s . c om Bits liveDocs = MultiFields.getLiveDocs(r); if (docIndexes == null) { for (int i = 0; i < r.maxDoc(); i++) { if (liveDocs == null || liveDocs.get(i)) { if (writeDocument(r.document(i))) { n++; } } } } else { for (int i : docIndexes) { if (liveDocs == null || liveDocs.get(i)) { if (writeDocument(r.document(i))) { n++; } } } } return n; }
From source file:org.apache.maven.indexer.examples.BasicUsageExample.java
License:Apache License
public void perform() throws IOException, ComponentLookupException, InvalidVersionSpecificationException { // Files where local cache is (if any) and Lucene Index should be located File centralLocalCache = new File("target/central-cache"); File centralIndexDir = new File("target/central-index"); // Creators we want to use (search for fields it defines) List<IndexCreator> indexers = new ArrayList<>(); indexers.add(plexusContainer.lookup(IndexCreator.class, "min")); indexers.add(plexusContainer.lookup(IndexCreator.class, "jarContent")); indexers.add(plexusContainer.lookup(IndexCreator.class, "maven-plugin")); // Create context for central repository index centralContext = indexer.createIndexingContext("central-context", "central", centralLocalCache, centralIndexDir, "http://repo1.maven.org/maven2", null, true, true, indexers); // Update the index (incremental update will happen if this is not 1st run and files are not deleted) // This whole block below should not be executed on every app start, but rather controlled by some configuration // since this block will always emit at least one HTTP GET. Central indexes are updated once a week, but // other index sources might have different index publishing frequency. // Preferred frequency is once a week. if (true) {//from w w w . j av a 2 s . co m System.out.println("Updating Index..."); System.out.println("This might take a while on first run, so please be patient!"); // Create ResourceFetcher implementation to be used with IndexUpdateRequest // Here, we use Wagon based one as shorthand, but all we need is a ResourceFetcher implementation TransferListener listener = new AbstractTransferListener() { public void transferStarted(TransferEvent transferEvent) { System.out.print(" Downloading " + transferEvent.getResource().getName()); } public void transferProgress(TransferEvent transferEvent, byte[] buffer, int length) { } public void transferCompleted(TransferEvent transferEvent) { System.out.println(" - Done"); } }; ResourceFetcher resourceFetcher = new WagonHelper.WagonFetcher(httpWagon, listener, null, null); Date centralContextCurrentTimestamp = centralContext.getTimestamp(); IndexUpdateRequest updateRequest = new IndexUpdateRequest(centralContext, resourceFetcher); IndexUpdateResult updateResult = indexUpdater.fetchAndUpdateIndex(updateRequest); if (updateResult.isFullUpdate()) { System.out.println("Full update happened!"); } else if (updateResult.getTimestamp().equals(centralContextCurrentTimestamp)) { System.out.println("No update needed, index is up to date!"); } else { System.out.println("Incremental update happened, change covered " + centralContextCurrentTimestamp + " - " + updateResult.getTimestamp() + " period."); } System.out.println(); } System.out.println(); System.out.println("Using index"); System.out.println("==========="); System.out.println(); // ==== // Case: // dump all the GAVs // NOTE: will not actually execute do this below, is too long to do (Central is HUGE), but is here as code // example if (false) { final IndexSearcher searcher = centralContext.acquireIndexSearcher(); try { final IndexReader ir = searcher.getIndexReader(); Bits liveDocs = MultiFields.getLiveDocs(ir); for (int i = 0; i < ir.maxDoc(); i++) { if (liveDocs == null || liveDocs.get(i)) { final Document doc = ir.document(i); final ArtifactInfo ai = IndexUtils.constructArtifactInfo(doc, centralContext); System.out.println(ai.getGroupId() + ":" + ai.getArtifactId() + ":" + ai.getVersion() + ":" + ai.getClassifier() + " (sha1=" + ai.getSha1() + ")"); } } } finally { centralContext.releaseIndexSearcher(searcher); } } // ==== // Case: // Search for all GAVs with known G and A and having version greater than V final GenericVersionScheme versionScheme = new GenericVersionScheme(); final String versionString = "1.5.0"; final Version version = versionScheme.parseVersion(versionString); // construct the query for known GA final Query groupIdQ = indexer.constructQuery(MAVEN.GROUP_ID, new SourcedSearchExpression("org.sonatype.nexus")); final Query artifactIdQ = indexer.constructQuery(MAVEN.ARTIFACT_ID, new SourcedSearchExpression("nexus-api")); final BooleanQuery query = new BooleanQuery(); query.add(groupIdQ, Occur.MUST); query.add(artifactIdQ, Occur.MUST); // we want "jar" artifacts only query.add(indexer.constructQuery(MAVEN.PACKAGING, new SourcedSearchExpression("jar")), Occur.MUST); // we want main artifacts only (no classifier) // Note: this below is unfinished API, needs fixing query.add(indexer.constructQuery(MAVEN.CLASSIFIER, new SourcedSearchExpression(Field.NOT_PRESENT)), Occur.MUST_NOT); // construct the filter to express "V greater than" final ArtifactInfoFilter versionFilter = new ArtifactInfoFilter() { public boolean accepts(final IndexingContext ctx, final ArtifactInfo ai) { try { final Version aiV = versionScheme.parseVersion(ai.getVersion()); // Use ">=" if you are INCLUSIVE return aiV.compareTo(version) > 0; } catch (InvalidVersionSpecificationException e) { // do something here? be safe and include? return true; } } }; System.out.println( "Searching for all GAVs with G=org.sonatype.nexus and nexus-api and having V greater than 1.5.0"); final IteratorSearchRequest request = new IteratorSearchRequest(query, Collections.singletonList(centralContext), versionFilter); final IteratorSearchResponse response = indexer.searchIterator(request); for (ArtifactInfo ai : response) { System.out.println(ai.toString()); } // Case: // Use index // Searching for some artifact Query gidQ = indexer.constructQuery(MAVEN.GROUP_ID, new SourcedSearchExpression("org.apache.maven.indexer")); Query aidQ = indexer.constructQuery(MAVEN.ARTIFACT_ID, new SourcedSearchExpression("indexer-artifact")); BooleanQuery bq = new BooleanQuery(); bq.add(gidQ, Occur.MUST); bq.add(aidQ, Occur.MUST); searchAndDump(indexer, "all artifacts under GA org.apache.maven.indexer:indexer-artifact", bq); // Searching for some main artifact bq = new BooleanQuery(); bq.add(gidQ, Occur.MUST); bq.add(aidQ, Occur.MUST); // bq.add( nexusIndexer.constructQuery( MAVEN.CLASSIFIER, new SourcedSearchExpression( "*" ) ), Occur.MUST_NOT // ); searchAndDump(indexer, "main artifacts under GA org.apache.maven.indexer:indexer-artifact", bq); // doing sha1 search searchAndDump(indexer, "SHA1 7ab67e6b20e5332a7fb4fdf2f019aec4275846c2", indexer.constructQuery(MAVEN.SHA1, new SourcedSearchExpression("7ab67e6b20e5332a7fb4fdf2f019aec4275846c2"))); searchAndDump(indexer, "SHA1 7ab67e6b20 (partial hash)", indexer.constructQuery(MAVEN.SHA1, new UserInputSearchExpression("7ab67e6b20"))); // doing classname search (incomplete classname) searchAndDump(indexer, "classname DefaultNexusIndexer (note: Central does not publish classes in the index)", indexer.constructQuery(MAVEN.CLASSNAMES, new UserInputSearchExpression("DefaultNexusIndexer"))); // doing search for all "canonical" maven plugins latest versions bq = new BooleanQuery(); bq.add(indexer.constructQuery(MAVEN.PACKAGING, new SourcedSearchExpression("maven-plugin")), Occur.MUST); bq.add(indexer.constructQuery(MAVEN.GROUP_ID, new SourcedSearchExpression("org.apache.maven.plugins")), Occur.MUST); searchGroupedAndDump(indexer, "all \"canonical\" maven plugins", bq, new GAGrouping()); // doing search for all archetypes latest versions searchGroupedAndDump(indexer, "all maven archetypes (latest versions)", indexer.constructQuery(MAVEN.PACKAGING, new SourcedSearchExpression("maven-archetype")), new GAGrouping()); // close cleanly indexer.closeIndexingContext(centralContext, false); }
From source file:org.apache.nifi.provenance.index.lucene.LuceneEventIndex.java
License:Apache License
long getMaxEventId(final String partitionName) { final List<File> allDirectories = getDirectoryManager().getDirectories(0L, Long.MAX_VALUE, partitionName); if (allDirectories.isEmpty()) { return -1L; }/* www .j ava 2s.c o m*/ Collections.sort(allDirectories, DirectoryUtils.NEWEST_INDEX_FIRST); for (final File directory : allDirectories) { final EventIndexSearcher searcher; try { searcher = indexManager.borrowIndexSearcher(directory); } catch (final IOException ioe) { logger.warn( "Unable to read from Index Directory {}. Will assume that the index is incomplete and not consider this index when determining max event ID", directory); continue; } try { final IndexReader reader = searcher.getIndexSearcher().getIndexReader(); final int maxDocId = reader.maxDoc() - 1; final Document document = reader.document(maxDocId); final long eventId = document.getField(SearchableFields.Identifier.getSearchableFieldName()) .numericValue().longValue(); logger.info( "Determined that Max Event ID indexed for Partition {} is approximately {} based on index {}", partitionName, eventId, directory); return eventId; } catch (final IOException ioe) { logger.warn( "Unable to search Index Directory {}. Will assume that the index is incomplete and not consider this index when determining max event ID", directory, ioe); } finally { indexManager.returnIndexSearcher(searcher); } } return -1L; }
From source file:org.apache.nutch.indexer.IndexSorter.java
License:Apache License
private static int[] oldToNew(IndexReader reader) throws IOException { int readerMax = reader.maxDoc(); DocScore[] newToOld = new DocScore[readerMax]; // use site, an indexed, un-tokenized field to get boost byte[] boosts = reader.norms("site"); for (int oldDoc = 0; oldDoc < readerMax; oldDoc++) { float score; if (reader.isDeleted(oldDoc)) { score = 0.0f;//from w w w . j a v a 2s.com } else { score = Similarity.decodeNorm(boosts[oldDoc]); } DocScore docScore = new DocScore(); docScore.oldDoc = oldDoc; docScore.score = score; newToOld[oldDoc] = docScore; } Arrays.sort(newToOld); int[] oldToNew = new int[readerMax]; for (int newDoc = 0; newDoc < readerMax; newDoc++) { DocScore docScore = newToOld[newDoc]; oldToNew[docScore.oldDoc] = docScore.score > 0.0f ? newDoc : -1; } return oldToNew; }