Example usage for org.apache.lucene.document Document iterator

Introduction

In this page you can find the example usage for org.apache.lucene.document Document iterator.

Prototype

@Override
    public Iterator<IndexableField> iterator()

Source Link

Usage

From source file:dk.dbc.opensearch.fedora.search.WriteAheadLogTest.java

License:Open Source License

@Test
public void testInitializeRecoversUncomittedFiles() throws Exception {
    File comitting = new File(folder.getRoot(), "writeaheadlog.committing");
    File writeaheadlog = new File(folder.getRoot(), "writeaheadlog.log");
    RandomAccessFile comittingRaf = new RandomAccessFile(comitting, "rwd");
    RandomAccessFile writeaheadlogRaf = new RandomAccessFile(writeaheadlog, "rwd");

    String pid1 = "obj:1";
    Document doc1 = makeLuceneDocument(pid1);

    String pid2 = "obj:2";
    Document doc2a = makeLuceneDocument(pid2, new Pair<String, String>("field", "value1"));
    Document doc2b = makeLuceneDocument(pid2, new Pair<String, String>("field", "value2"));

    String pid3 = "obj:3";
    Document doc3 = makeLuceneDocument(pid3);

    // Given a writer with one document

    writer.updateDocument(WriteAheadLog.getPidTerm(pid1), doc1);
    writer.commit();/*from  w w  w  .  ja v  a2s . co m*/

    // And a comitting file with that document deleted and a new document added

    WriteAheadLog.writeDocumentData(comittingRaf, pid1, null);
    WriteAheadLog.writeDocumentData(comittingRaf, pid2, doc2a);

    // And a log file with one new document and one updated document

    WriteAheadLog.writeDocumentData(writeaheadlogRaf, pid2, doc2b);
    WriteAheadLog.writeDocumentData(writeaheadlogRaf, pid3, doc3);

    comittingRaf.close();
    writeaheadlogRaf.close();

    // Initialize the WAL to recover the lost files

    WriteAheadLog wal = new WriteAheadLog(writer, folder.getRoot(), 1000, true);
    int recovered = wal.initialize();
    assertEquals(4, recovered);

    // Verify that

    IndexReader reader = DirectoryReader.open(writer, false);
    IndexSearcher searcher = new IndexSearcher(reader);

    TopDocs result = searcher.search(new TermQuery(WriteAheadLog.getPidTerm(pid1)), 100);
    assertEquals(0, result.scoreDocs.length);
    System.out.println("");

    result = searcher.search(new TermQuery(WriteAheadLog.getPidTerm(pid2)), 100);
    assertEquals(1, result.scoreDocs.length);
    Document doc2 = reader.document(result.scoreDocs[0].doc);

    Iterator<IndexableField> it1 = doc2b.iterator();
    Iterator<IndexableField> it2 = doc2.iterator();
    do {
        IndexableField expected = it1.next();
        IndexableField actual = it2.next();
        assertEquals(expected.fieldType().stored(), actual.fieldType().stored());
        if (!(expected instanceof LongField)) {
            assertEquals(expected.fieldType().indexed(), actual.fieldType().indexed());
            assertEquals(expected.fieldType().omitNorms(), actual.fieldType().omitNorms());
            assertEquals(expected.fieldType().indexOptions(), actual.fieldType().indexOptions());
        }
        assertEquals(expected.name(), actual.name());
        assertEquals(expected.stringValue(), actual.stringValue());
        assertEquals(expected.numericValue(), actual.numericValue());
    } while (it1.hasNext() && it2.hasNext());

    //        assertEquals( doc2b.toString(), doc2.toString() );

    result = searcher.search(new TermQuery(WriteAheadLog.getPidTerm(pid3)), 100);
    assertEquals(1, result.scoreDocs.length);

}

From source file:lucene.security.index.SecureAtomicReaderTestBase.java

License:Apache License

@Test
public void testDocumentFetch() throws IOException {
    SecureAtomicReader secureReader = getSecureReader();
    {/*w  w  w. ja v a2s. c  om*/
        Document document = secureReader.document(0);
        Set<String> allowed = new HashSet<String>();
        allowed.add("test");
        allowed.add("info");
        allowed.add(getAccessControlFactory().getDiscoverFieldName());
        allowed.add(getAccessControlFactory().getReadFieldName());
        for (IndexableField field : document) {
            assertTrue(allowed.contains(field.name()));
        }
    }
    {
        Document document = secureReader.document(1);
        Set<String> allowed = new HashSet<String>();
        allowed.add("info");
        for (IndexableField field : document) {
            assertTrue(allowed.contains(field.name()));
        }
    }
    {
        Document document = secureReader.document(2);
        Set<String> allowed = new HashSet<String>();
        allowed.add("test");
        allowed.add("info");
        allowed.add(getAccessControlFactory().getDiscoverFieldName());
        allowed.add(getAccessControlFactory().getReadFieldName());
        for (IndexableField field : document) {
            assertTrue(allowed.contains(field.name()));
        }
    }
    {
        Document document = secureReader.document(3);
        Iterator<IndexableField> iterator = document.iterator();
        assertFalse(iterator.hasNext());
    }

    secureReader.close();
}

From source file:sh.isaac.provider.query.lucene.indexers.SemanticIndexer.java

License:Apache License

/**
 * Adds the fields./*from w ww .  j av  a 2  s  .c  o  m*/
 *
 * @param chronicle the chronicle
 * @param doc the doc
 */
@Override
protected void addFields(Chronology chronicle, Document doc) {
    final SemanticChronology semanticChronology = (SemanticChronology) chronicle;

    doc.add(new TextField(FIELD_SEMANTIC_ASSEMBLAGE_SEQUENCE, semanticChronology.getAssemblageNid() + "",
            Field.Store.NO));

    for (final Object sv : semanticChronology.getVersionList()) {
        if (sv instanceof DynamicVersion) {
            final DynamicVersion dsv = (DynamicVersion) sv;
            final Integer[] columns = this.lric.whatColumnsToIndex(dsv.getAssemblageNid());

            if (columns != null) {
                final int dataColCount = dsv.getData().length;

                for (final int col : columns) {
                    final DynamicData dataCol = (col >= dataColCount) ? null : dsv.getData(col);

                    // Only pass in a column number if we were asked to index more than one column for this sememe
                    handleType(doc, dataCol, (columns.length > 1) ? col : -1);
                }
            }
        }

        // TODO enhance the index configuration to allow us to configure Static sememes as indexed, or not indexed
        // static sememe types are never more than 1 column, always pass -1
        else if (sv instanceof StringVersion) {
            final StringVersion ssv = (StringVersion) sv;

            handleType(doc, new DynamicStringImpl(ssv.getString()), -1);
            incrementIndexedItemCount("Sememe String");
        } else if (sv instanceof LongVersion) {
            final LongVersion lsv = (LongVersion) sv;

            handleType(doc, new DynamicLongImpl(lsv.getLongValue()), -1);
            incrementIndexedItemCount("Sememe Long");
        } else if (sv instanceof ComponentNidVersion) {
            final ComponentNidVersion csv = (ComponentNidVersion) sv;

            handleType(doc, new DynamicNidImpl(csv.getComponentNid()), -1);
            incrementIndexedItemCount("Sememe Component Nid");
        } else if (sv instanceof LogicGraphVersion) {
            final LogicGraphVersion lgsv = (LogicGraphVersion) sv;
            final OpenIntHashSet css = new OpenIntHashSet();

            lgsv.getLogicalExpression().processDepthFirst((LogicNode logicNode, TreeNodeVisitData data) -> {
                logicNode.addConceptsReferencedByNode(css);
            });
            css.forEachKey(sequence -> {
                handleType(doc, new DynamicNidImpl(sequence), -1);
                return true;
            });
        } else {
            LOG.error(
                    "Unexpected type handed to addFields in Sememe Indexer: " + semanticChronology.toString());
        }
    }

    // Due to indexing all of the versions, we may have added duplicate field name/value combinations to the document.
    // Remove the dupes.
    final Iterator<IndexableField> it = doc.iterator();
    final HashSet<String> uniqueFields = new HashSet<>();

    while (it.hasNext()) {
        final IndexableField field = it.next();
        final String temp = field.name() + "::" + field.stringValue();

        if (uniqueFields.contains(temp)) {
            it.remove();
        } else {
            uniqueFields.add(temp);
        }
    }
}