List of usage examples for org.apache.lucene.index IndexReader document
public final Document document(int docID) throws IOException
n
th Document
in this index. From source file:org.compass.core.lucene.support.ResourceHelper.java
License:Apache License
/** * Converts terms docs into an array of resources. *///from www .j a v a2 s .co m public static Resource[] hitsToResourceArray(final TermDocs termDocs, IndexReader indexReader, LuceneSearchEngine searchEngine) throws IOException { ArrayList<Resource> list = new ArrayList<Resource>(); while (termDocs.next()) { list.add(new LuceneResource(indexReader.document(termDocs.doc()), termDocs.doc(), searchEngine.getSearchEngineFactory())); } return list.toArray(new Resource[list.size()]); }
From source file:org.cosmo.common.record.SearchResult.java
License:Apache License
public void assertAndCorrectIds(Search search, LongArrayList ids) throws Exception { IndexReader reader = search.reader(); for (int i = 0; i < ids.size(); i++) { long docId = ids.get(i); Document doc = reader.document((int) docId); long docRecordId = Long.valueOf(doc.get("id")); if (docRecordId != docId) { //System.out.println("docId:" + docId + " recordId: " + docRecordId); ids.set(i, docRecordId);//from w w w . j av a 2 s . co m } } }
From source file:org.creativecommons.nutch.CCDeleteUnlicensedTool.java
License:Apache License
/** Delete pages without CC licenes. */ public int deleteUnlicensed() throws IOException { int deleteCount = 0; for (int index = 0; index < readers.length; index++) { IndexReader reader = readers[index]; int readerMax = reader.maxDoc(); for (int doc = 0; doc < readerMax; doc++) { if (!reader.isDeleted(doc)) { Document document = reader.document(doc); if (document.get(CCIndexingFilter.FIELD) == null) { // no CC fields reader.deleteDocument(doc); // delete it deleteCount++;//from www . j av a 2 s . c o m } } } } return deleteCount; }
From source file:org.dspace.search.DSIndexer.java
License:BSD License
/** * Iterates over all documents in the Lucene index and verifies they * are in database, if not, they are removed. * * @param context/*www.j a va2 s . com*/ * @throws IOException * @throws SQLException */ public static void cleanIndex(Context context) throws IOException, SQLException { IndexReader reader = DSQuery.getIndexReader(); Bits liveDocs = MultiFields.getLiveDocs(reader); for (int i = 0; i < reader.numDocs(); i++) { if (!liveDocs.get(i)) { // document is deleted... log.debug("Encountered deleted doc: " + i); } else { Document doc = reader.document(i); String handle = doc.get("handle"); if (!StringUtils.isEmpty(handle)) { DSpaceObject o = HandleManager.resolveToObject(context, handle); if (o == null) { log.info("Deleting: " + handle); /* Use IndexWriter to delete, its easier to manage write.lock */ DSIndexer.unIndexContent(context, handle); } else { context.removeCached(o, o.getID()); log.debug("Keeping: " + handle); } } } } }
From source file:org.dspace.search.DSIndexer.java
License:BSD License
/** * Is stale checks the lastModified time stamp in the database and the index * to determine if the index is stale.//www .j a va2 s . c o m * * @param lastModified * @throws SQLException * @throws IOException */ private static boolean requiresIndexing(Term t, Date lastModified) throws SQLException, IOException { boolean reindexItem = false; boolean inIndex = false; IndexReader ir = DSQuery.getIndexReader(); Bits liveDocs = MultiFields.getLiveDocs(ir); DocsEnum docs = MultiFields.getTermDocsEnum(ir, liveDocs, t.field(), t.bytes()); int id; if (docs != null) { while ((id = docs.nextDoc()) != DocsEnum.NO_MORE_DOCS) { inIndex = true; Document doc = ir.document(id); IndexableField lastIndexed = doc.getField(LAST_INDEXED_FIELD); if (lastIndexed == null || Long.parseLong(lastIndexed.stringValue()) < lastModified.getTime()) { reindexItem = true; } } } return reindexItem || !inIndex; }
From source file:org.dspace.search.LuceneIndex.java
License:BSD License
/** * Is stale checks the lastModified time stamp in the database and the index * to determine if the index is stale.//w w w . j a v a2 s. c om * * @param lastModified * @throws SQLException * @throws IOException */ @Override public boolean isDocumentStale(String documentKey, Date lastModified) throws IOException { boolean reindexItem = false; boolean inIndex = false; IndexReader ir = getSearcher().getIndexReader(); Term t = new Term("handle", documentKey); AtomicReader ar = (AtomicReader) ir; DocsEnum docsE = ar.termDocsEnum(t); int docId; while ((docId = docsE.nextDoc()) != DocsEnum.NO_MORE_DOCS) { inIndex = true; Document doc = ir.document(docId); IndexableField lastIndexed = doc.getField(LAST_INDEXED_FIELD); if (lastIndexed == null || Long.parseLong(lastIndexed.stringValue()) < lastModified.getTime()) { reindexItem = true; } } return reindexItem || !inIndex; }
From source file:org.eclipse.mylyn.internal.tasks.index.core.TaskListIndex.java
License:Open Source License
/** * Indicates if the given task matches the given pattern string. Uses the backing index to detect a match by looking * for tasks that match the given pattern string. The results of the search are cached such that future calls to * this method using the same pattern string do not require use of the backing index, making this method very * efficient for multiple calls with the same pattern string. Cached results for a given pattern string are * discarded if this method is called with a different pattern string. * //from w w w.j a v a2 s . c o m * @param task * the task to match * @param patternString * the pattern used to detect a match */ public boolean matches(ITask task, String patternString) { if (patternString.equals(COMMAND_RESET_INDEX)) { reindex(); } Lock readLock = indexReaderLock.readLock(); readLock.lock(); try { IndexReader indexReader = getIndexReader(); if (indexReader != null) { Set<String> hits; final boolean needIndexHit; synchronized (this) { needIndexHit = lastResults == null || (lastPatternString == null || !lastPatternString.equals(patternString)); } if (needIndexHit) { this.lastPatternString = patternString; hits = new HashSet<String>(); IndexSearcher indexSearcher = new IndexSearcher(indexReader); try { Query query = computeQuery(patternString); TopDocs results = indexSearcher.search(query, maxMatchSearchHits); for (ScoreDoc scoreDoc : results.scoreDocs) { Document document = indexReader.document(scoreDoc.doc); hits.add(document.get(FIELD_IDENTIFIER.getIndexKey())); } } catch (IOException e) { StatusHandler.log(new Status(IStatus.ERROR, TasksIndexCore.ID_PLUGIN, "Unexpected failure within task list index", e)); //$NON-NLS-1$ } finally { try { indexSearcher.close(); } catch (IOException e) { // ignore } } } else { hits = lastResults; } synchronized (this) { if (this.indexReader == indexReader) { this.lastPatternString = patternString; this.lastResults = hits; } } String taskIdentifier = task.getHandleIdentifier(); return hits != null && hits.contains(taskIdentifier); } } finally { readLock.unlock(); } return false; }
From source file:org.eclipse.mylyn.internal.tasks.index.core.TaskListIndex.java
License:Open Source License
/** * finds tasks that match the given pattern string * /*from w w w .j a va 2 s.c o m*/ * @param patternString * the pattern string, used to match tasks * @param collector * the collector that receives tasks * @param resultsLimit * the maximum number of tasks to find. Specifying a limit enables the index to be more efficient since * it can skip over matching tasks that do not score highly enough. Specify {@link Integer#MAX_VALUE} if * there should be no limit. */ public void find(String patternString, TaskCollector collector, int resultsLimit) { Assert.isNotNull(patternString); Assert.isNotNull(collector); Assert.isTrue(resultsLimit > 0); Lock readLock = indexReaderLock.readLock(); readLock.lock(); try { IndexReader indexReader = getIndexReader(); if (indexReader != null) { IndexSearcher indexSearcher = new IndexSearcher(indexReader); try { Query query = computeQuery(patternString); TopDocs results = indexSearcher.search(query, resultsLimit); for (ScoreDoc scoreDoc : results.scoreDocs) { Document document = indexReader.document(scoreDoc.doc); String taskIdentifier = document.get(FIELD_IDENTIFIER.getIndexKey()); AbstractTask task = taskList.getTask(taskIdentifier); if (task != null) { collector.collect(task); } } } catch (IOException e) { StatusHandler.log(new Status(IStatus.ERROR, TasksIndexCore.ID_PLUGIN, "Unexpected failure within task list index", e)); //$NON-NLS-1$ } finally { try { indexSearcher.close(); } catch (IOException e) { // ignore } } } } finally { readLock.unlock(); } }
From source file:org.eclipse.mylyn.versions.tasks.mapper.internal.ChangeSetIndexer.java
License:Open Source License
public int search(ITask task, String scmRepositoryUrl, int resultsLimit, IChangeSetCollector collector) throws CoreException { int count = 0; IndexReader indexReader = getIndexReader(); if (indexReader != null) { IndexSearcher indexSearcher = new IndexSearcher(indexReader); try {//from ww w . j ava 2 s .c om Query query = createQuery(task, scmRepositoryUrl); TopDocs results = indexSearcher.search(query, resultsLimit); for (ScoreDoc scoreDoc : results.scoreDocs) { Document document = indexReader.document(scoreDoc.doc); count++; if (count > resultsLimit) break; String revision = document.getField(IndexedFields.REVISION.getIndexKey()).stringValue(); String repositoryUrl = document.getField(IndexedFields.REPOSITORY.getIndexKey()).stringValue(); collector.collect(revision, repositoryUrl); } } catch (IOException e) { // StatusHandler.log(new Status(IStatus.ERROR, org.eclipse.mylyn.versions.tasks.ui.internal.TaPLUGIN_ID, //"Unexpected failure within task list index", e)); //$NON-NLS-1$ } finally { try { indexSearcher.close(); } catch (IOException e) { // ignore } } } return count; }
From source file:org.elasticsearch.test.unit.common.compress.CompressIndexInputOutputTests.java
License:Apache License
private void verify(IndexWriter writer) throws Exception { CheckIndex checkIndex = new CheckIndex(writer.getDirectory()); CheckIndex.Status status = checkIndex.checkIndex(); assertThat(status.clean, equalTo(true)); IndexReader reader = DirectoryReader.open(writer, true); final Bits liveDocs = MultiFields.getLiveDocs(reader); for (int i = 0; i < reader.maxDoc(); i++) { if (liveDocs != null && !liveDocs.get(i)) { continue; }// w ww. jav a 2 s .com Document document = reader.document(i); checkDoc(document); DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor("id", "field", "count"); reader.document(i, visitor); document = visitor.getDocument(); checkDoc(document); } for (int i = 0; i < 100; i++) { int doc = ThreadLocalRandom.current().nextInt(reader.maxDoc()); if (liveDocs != null && !liveDocs.get(i)) { continue; } Document document = reader.document(doc); checkDoc(document); DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor("id", "field", "count"); reader.document(doc, visitor); document = visitor.getDocument(); checkDoc(document); } }