Example usage for org.apache.lucene.index IndexReader document

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader document.

Prototype




public final Document document(int docID) throws IOException

Source Link

Document

Returns the stored fields of the n^th Document in this index.

Usage

From source file:org.compass.core.lucene.support.ResourceHelper.java

License:Apache License

/**
 * Converts terms docs into an array of resources.
 *///from www .j a  v  a2  s  .co m
public static Resource[] hitsToResourceArray(final TermDocs termDocs, IndexReader indexReader,
        LuceneSearchEngine searchEngine) throws IOException {
    ArrayList<Resource> list = new ArrayList<Resource>();
    while (termDocs.next()) {
        list.add(new LuceneResource(indexReader.document(termDocs.doc()), termDocs.doc(),
                searchEngine.getSearchEngineFactory()));
    }
    return list.toArray(new Resource[list.size()]);
}

From source file:org.cosmo.common.record.SearchResult.java

License:Apache License

public void assertAndCorrectIds(Search search, LongArrayList ids) throws Exception {
    IndexReader reader = search.reader();
    for (int i = 0; i < ids.size(); i++) {
        long docId = ids.get(i);
        Document doc = reader.document((int) docId);
        long docRecordId = Long.valueOf(doc.get("id"));
        if (docRecordId != docId) {
            //System.out.println("docId:" + docId + " recordId: " + docRecordId);
            ids.set(i, docRecordId);//from  w  w  w . j av  a 2  s  . co m
        }
    }
}

From source file:org.creativecommons.nutch.CCDeleteUnlicensedTool.java

License:Apache License

/** Delete pages without CC licenes. */
public int deleteUnlicensed() throws IOException {
    int deleteCount = 0;
    for (int index = 0; index < readers.length; index++) {
        IndexReader reader = readers[index];
        int readerMax = reader.maxDoc();
        for (int doc = 0; doc < readerMax; doc++) {
            if (!reader.isDeleted(doc)) {
                Document document = reader.document(doc);
                if (document.get(CCIndexingFilter.FIELD) == null) { // no CC fields
                    reader.deleteDocument(doc); // delete it
                    deleteCount++;//from  www .  j  av  a 2  s  . c  o  m
                }
            }
        }
    }
    return deleteCount;
}

From source file:org.dspace.search.DSIndexer.java

License:BSD License

/**
 * Iterates over all documents in the Lucene index and verifies they
 * are in database, if not, they are removed.
 *
 * @param context/*www.j a va2  s  .  com*/
 * @throws IOException
 * @throws SQLException
 */
public static void cleanIndex(Context context) throws IOException, SQLException {

    IndexReader reader = DSQuery.getIndexReader();

    Bits liveDocs = MultiFields.getLiveDocs(reader);

    for (int i = 0; i < reader.numDocs(); i++) {
        if (!liveDocs.get(i)) {
            // document is deleted...
            log.debug("Encountered deleted doc: " + i);
        } else {
            Document doc = reader.document(i);
            String handle = doc.get("handle");
            if (!StringUtils.isEmpty(handle)) {
                DSpaceObject o = HandleManager.resolveToObject(context, handle);

                if (o == null) {
                    log.info("Deleting: " + handle);
                    /* Use IndexWriter to delete, its easier to manage write.lock */
                    DSIndexer.unIndexContent(context, handle);
                } else {
                    context.removeCached(o, o.getID());
                    log.debug("Keeping: " + handle);
                }
            }
        }
    }
}

From source file:org.dspace.search.DSIndexer.java

License:BSD License

/**
* Is stale checks the lastModified time stamp in the database and the index
* to determine if the index is stale.//www .j a va2 s  . c  o m
*
* @param lastModified
* @throws SQLException
* @throws IOException
*/
private static boolean requiresIndexing(Term t, Date lastModified) throws SQLException, IOException {

    boolean reindexItem = false;
    boolean inIndex = false;

    IndexReader ir = DSQuery.getIndexReader();
    Bits liveDocs = MultiFields.getLiveDocs(ir);
    DocsEnum docs = MultiFields.getTermDocsEnum(ir, liveDocs, t.field(), t.bytes());

    int id;
    if (docs != null) {
        while ((id = docs.nextDoc()) != DocsEnum.NO_MORE_DOCS) {
            inIndex = true;
            Document doc = ir.document(id);

            IndexableField lastIndexed = doc.getField(LAST_INDEXED_FIELD);

            if (lastIndexed == null || Long.parseLong(lastIndexed.stringValue()) < lastModified.getTime()) {
                reindexItem = true;
            }
        }
    }
    return reindexItem || !inIndex;
}

From source file:org.dspace.search.LuceneIndex.java

License:BSD License

/**
* Is stale checks the lastModified time stamp in the database and the index
* to determine if the index is stale.//w w  w .  j a v a2 s. c om
*
* @param lastModified
* @throws SQLException
* @throws IOException
*/
@Override
public boolean isDocumentStale(String documentKey, Date lastModified) throws IOException {

    boolean reindexItem = false;
    boolean inIndex = false;

    IndexReader ir = getSearcher().getIndexReader();
    Term t = new Term("handle", documentKey);
    AtomicReader ar = (AtomicReader) ir;
    DocsEnum docsE = ar.termDocsEnum(t);

    int docId;
    while ((docId = docsE.nextDoc()) != DocsEnum.NO_MORE_DOCS) {
        inIndex = true;
        Document doc = ir.document(docId);

        IndexableField lastIndexed = doc.getField(LAST_INDEXED_FIELD);

        if (lastIndexed == null || Long.parseLong(lastIndexed.stringValue()) < lastModified.getTime()) {
            reindexItem = true;
        }
    }

    return reindexItem || !inIndex;
}

From source file:org.eclipse.mylyn.internal.tasks.index.core.TaskListIndex.java

License:Open Source License

/**
 * Indicates if the given task matches the given pattern string. Uses the backing index to detect a match by looking
 * for tasks that match the given pattern string. The results of the search are cached such that future calls to
 * this method using the same pattern string do not require use of the backing index, making this method very
 * efficient for multiple calls with the same pattern string. Cached results for a given pattern string are
 * discarded if this method is called with a different pattern string.
 * //from w  w  w.j  a v a2  s .  c  o m
 * @param task
 *            the task to match
 * @param patternString
 *            the pattern used to detect a match
 */
public boolean matches(ITask task, String patternString) {
    if (patternString.equals(COMMAND_RESET_INDEX)) {
        reindex();
    }
    Lock readLock = indexReaderLock.readLock();
    readLock.lock();
    try {

        IndexReader indexReader = getIndexReader();
        if (indexReader != null) {
            Set<String> hits;

            final boolean needIndexHit;
            synchronized (this) {
                needIndexHit = lastResults == null
                        || (lastPatternString == null || !lastPatternString.equals(patternString));
            }
            if (needIndexHit) {
                this.lastPatternString = patternString;

                hits = new HashSet<String>();

                IndexSearcher indexSearcher = new IndexSearcher(indexReader);
                try {
                    Query query = computeQuery(patternString);
                    TopDocs results = indexSearcher.search(query, maxMatchSearchHits);
                    for (ScoreDoc scoreDoc : results.scoreDocs) {
                        Document document = indexReader.document(scoreDoc.doc);
                        hits.add(document.get(FIELD_IDENTIFIER.getIndexKey()));
                    }
                } catch (IOException e) {
                    StatusHandler.log(new Status(IStatus.ERROR, TasksIndexCore.ID_PLUGIN,
                            "Unexpected failure within task list index", e)); //$NON-NLS-1$
                } finally {
                    try {
                        indexSearcher.close();
                    } catch (IOException e) {
                        // ignore
                    }
                }

            } else {
                hits = lastResults;
            }
            synchronized (this) {
                if (this.indexReader == indexReader) {
                    this.lastPatternString = patternString;
                    this.lastResults = hits;
                }
            }
            String taskIdentifier = task.getHandleIdentifier();
            return hits != null && hits.contains(taskIdentifier);
        }

    } finally {
        readLock.unlock();
    }
    return false;
}

From source file:org.eclipse.mylyn.internal.tasks.index.core.TaskListIndex.java

License:Open Source License

/**
 * finds tasks that match the given pattern string
 * /*from  w w w .j a va 2 s.c  o m*/
 * @param patternString
 *            the pattern string, used to match tasks
 * @param collector
 *            the collector that receives tasks
 * @param resultsLimit
 *            the maximum number of tasks to find. Specifying a limit enables the index to be more efficient since
 *            it can skip over matching tasks that do not score highly enough. Specify {@link Integer#MAX_VALUE} if
 *            there should be no limit.
 */
public void find(String patternString, TaskCollector collector, int resultsLimit) {
    Assert.isNotNull(patternString);
    Assert.isNotNull(collector);
    Assert.isTrue(resultsLimit > 0);

    Lock readLock = indexReaderLock.readLock();
    readLock.lock();
    try {
        IndexReader indexReader = getIndexReader();
        if (indexReader != null) {
            IndexSearcher indexSearcher = new IndexSearcher(indexReader);
            try {
                Query query = computeQuery(patternString);
                TopDocs results = indexSearcher.search(query, resultsLimit);
                for (ScoreDoc scoreDoc : results.scoreDocs) {
                    Document document = indexReader.document(scoreDoc.doc);
                    String taskIdentifier = document.get(FIELD_IDENTIFIER.getIndexKey());
                    AbstractTask task = taskList.getTask(taskIdentifier);
                    if (task != null) {
                        collector.collect(task);
                    }
                }
            } catch (IOException e) {
                StatusHandler.log(new Status(IStatus.ERROR, TasksIndexCore.ID_PLUGIN,
                        "Unexpected failure within task list index", e)); //$NON-NLS-1$
            } finally {
                try {
                    indexSearcher.close();
                } catch (IOException e) {
                    // ignore
                }
            }
        }
    } finally {
        readLock.unlock();
    }
}

From source file:org.eclipse.mylyn.versions.tasks.mapper.internal.ChangeSetIndexer.java

License:Open Source License

public int search(ITask task, String scmRepositoryUrl, int resultsLimit, IChangeSetCollector collector)
        throws CoreException {
    int count = 0;
    IndexReader indexReader = getIndexReader();
    if (indexReader != null) {
        IndexSearcher indexSearcher = new IndexSearcher(indexReader);
        try {//from  ww  w . j ava 2  s .c  om
            Query query = createQuery(task, scmRepositoryUrl);
            TopDocs results = indexSearcher.search(query, resultsLimit);
            for (ScoreDoc scoreDoc : results.scoreDocs) {
                Document document = indexReader.document(scoreDoc.doc);
                count++;
                if (count > resultsLimit)
                    break;

                String revision = document.getField(IndexedFields.REVISION.getIndexKey()).stringValue();
                String repositoryUrl = document.getField(IndexedFields.REPOSITORY.getIndexKey()).stringValue();

                collector.collect(revision, repositoryUrl);
            }
        } catch (IOException e) {
            //            StatusHandler.log(new Status(IStatus.ERROR, org.eclipse.mylyn.versions.tasks.ui.internal.TaPLUGIN_ID,
            //"Unexpected failure within task list index", e)); //$NON-NLS-1$
        } finally {
            try {
                indexSearcher.close();
            } catch (IOException e) {
                // ignore
            }
        }
    }
    return count;

}

From source file:org.elasticsearch.test.unit.common.compress.CompressIndexInputOutputTests.java

License:Apache License

private void verify(IndexWriter writer) throws Exception {
    CheckIndex checkIndex = new CheckIndex(writer.getDirectory());
    CheckIndex.Status status = checkIndex.checkIndex();
    assertThat(status.clean, equalTo(true));
    IndexReader reader = DirectoryReader.open(writer, true);
    final Bits liveDocs = MultiFields.getLiveDocs(reader);
    for (int i = 0; i < reader.maxDoc(); i++) {
        if (liveDocs != null && !liveDocs.get(i)) {
            continue;
        }//  w  ww.  jav  a  2  s .com
        Document document = reader.document(i);
        checkDoc(document);
        DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor("id", "field", "count");
        reader.document(i, visitor);
        document = visitor.getDocument();
        checkDoc(document);
    }
    for (int i = 0; i < 100; i++) {
        int doc = ThreadLocalRandom.current().nextInt(reader.maxDoc());
        if (liveDocs != null && !liveDocs.get(i)) {
            continue;
        }
        Document document = reader.document(doc);
        checkDoc(document);
        DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor("id", "field", "count");
        reader.document(doc, visitor);
        document = visitor.getDocument();
        checkDoc(document);
    }
}