Example usage for org.apache.lucene.index IndexWriter addDocument

List of usage examples for org.apache.lucene.index IndexWriter addDocument

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter addDocument.

Prototype

public long addDocument(Iterable<? extends IndexableField> doc) throws IOException 

Source Link

Document

Adds a document to this index.

Usage

From source file:com.spike.text.lucene.util.LuceneTestBookIndexingUtil.java

License:Apache License

public static void main(String[] args) throws IOException {
    String dataDir = LuceneAppConstants.BOOK_DATA_DIR;
    String indexDir = LuceneAppConstants.BOOK_INDEX_DIR;

    List<File> results = new ArrayList<File>();
    findFiles(results, new File(dataDir));
    System.out.println(results.size() + " books to index");
    Directory directory = FSDirectory.open(Paths.get(indexDir));

    IndexWriterConfig config = new IndexWriterConfig(new MyStandardAnalyzer());
    config.setCommitOnClose(true);/*from w  w  w  .j  ava  2 s  .c  o m*/
    IndexWriter indexWriter = new IndexWriter(directory, config);

    for (File file : results) {
        Document document = getDocument(dataDir, file);
        indexWriter.addDocument(document);
    }

    indexWriter.close();
    directory.close();
}

From source file:com.sun.javaee.blueprints.carstore.search.UpdateIndex.java

License:Berkeley License

public void updateDocTag(String indexFile, String sxTagField, String tagString, String sxDocId, String type)
        throws IOException {
    if (bDebug)// www  .ja v  a 2 s .com
        System.out.println("Tagging document:" + sxDocId + " with \"" + sxTagField + " - " + tagString + "\"");
    Document doc = deleteIndex(indexFile, sxDocId);

    /*
    // get document to update, so data can be added
    SearchIndex si=new SearchIndex();
    si.query(indexFile, sxDocId, "uid");
            
    Hits hits=si.getHitsNative();
    // should only have one return
    if(hits.length() > 1) {
    // exception, should only be one
       throw new IllegalStateException("Should only have one document in index with uid=" + sxDocId);
    }
            
    Document doc=(Document)hits.doc(0);
    if(bDebug) System.out.println("HAVE DOC " + doc);
            
    // Read index and delete targeted doc through a term
    IndexReader reader=IndexReader.open(indexFile);
    // delete document by term
    int del=reader.deleteDocuments(new Term("uid", sxDocId));
    if(bDebug) {
    System.out.println("return Number of items deleted:"  + del);
    int deleted=0;
    for(int ii=0; ii < reader.numDocs(); ii++) {
        if(reader.isDeleted(ii)) {
            deleted++;
        }
    }
    if(bDebug) System.out.println("Number of deleted items in the whole index:" + deleted);
    }
    reader.close();
    */

    // update document with tag information or add to tag that exists
    // NOTE: The tag information should be persisted in another place, 
    // incase indexes need to be rebuilt
    Field field = doc.getField(sxTagField);
    if (field == null) {
        // create new tag field
        field = new Field(sxTagField, tagString, Field.Store.YES, Field.Index.TOKENIZED);
    } else {
        if (type.equals(APPEND_FIELD)) {
            // get existing field and append new tag with space
            tagString = field.stringValue() + " " + tagString;
        }
        doc.removeField(sxTagField);
        field = new Field(sxTagField, tagString, Field.Store.YES, Field.Index.TOKENIZED);
    }

    doc.add(field);
    if (bDebug)
        System.out.println("Added field \n" + field + " doc to index = \n" + doc);
    // open writer to re-add document (no update in Lucene)
    Analyzer analyzer = new StandardAnalyzer();
    IndexWriter writer = new IndexWriter(indexFile, analyzer, false);
    if (bDebug)
        System.out.println("Before optimize = " + writer.docCount());
    writer.optimize();
    if (bDebug)
        System.out.println("Before add = " + writer.docCount());
    writer.addDocument(doc);
    if (bDebug)
        System.out.println("after add = " + writer.docCount());
    writer.close();
}

From source file:com.sun.socialsite.business.impl.LuceneSearchManagerImpl.java

License:Open Source License

/**
 * @return false if the index entry was not updated because it
 * was already current; true otherwise./* w  w  w. j a va2 s.c  om*/
 */
public boolean addToIndex(final App app) throws IOException {

    boolean needNewEntry = true;

    String key = getKey(app);
    String url = app.getURL().toExternalForm();
    String title = app.getTitle();
    String description = app.getDescription();

    IndexReader reader = IndexReader.open(indexDir);
    TermDocs termDocs = reader.termDocs(new Term("key", key));
    while (termDocs.next()) {
        Document existingDoc = reader.document(termDocs.doc());
        if (areEqual("app", existingDoc.get("class")) && areEqual(url, existingDoc.get("url"))
                && areEqual(title, existingDoc.get("title"))
                && areEqual(description, existingDoc.get("description"))) {
            needNewEntry = false;
        }
    }
    termDocs.close();
    reader.close();

    if (needNewEntry) {
        Document newDoc = new Document();
        newDoc.add(new Field("key", key, Field.Store.YES, Field.Index.UN_TOKENIZED));
        newDoc.add(new Field("class", "app", Field.Store.YES, Field.Index.UN_TOKENIZED));
        newDoc.add(new Field("url", url, Field.Store.YES, Field.Index.TOKENIZED));
        if (title != null)
            newDoc.add(new Field("title", title, Field.Store.YES, Field.Index.TOKENIZED));
        if (description != null)
            newDoc.add(new Field("description", description, Field.Store.YES, Field.Index.TOKENIZED));

        IndexWriter writer = null;
        try {
            writer = new IndexWriter(indexDir, analyzer, false);
            writer.deleteDocuments(new Term("key", key)); // Delete old entry, if present
            writer.addDocument(newDoc);
        } finally {
            if (writer != null)
                try {
                    writer.close();
                } catch (Exception e) {
                }
            ;
        }

        log.trace(String.format("Indexed app[url=%s,title=%s,description=%s]", url, title, description));
    }

    return needNewEntry;
}

From source file:com.sun.socialsite.business.impl.LuceneSearchManagerImpl.java

License:Open Source License

/**
 * @return false if the index entry was not updated because it
 * was already current; true otherwise.//from   w  w  w.ja v a2  s  .c om
 */
public boolean addToIndex(final Group group) throws IOException {

    boolean needNewEntry = true;

    String key = getKey(group);
    String handle = group.getHandle();
    String name = group.getName();
    String description = group.getDescription();

    IndexReader reader = IndexReader.open(indexDir);
    TermDocs termDocs = reader.termDocs(new Term("key", key));
    while (termDocs.next()) {
        Document existingDoc = reader.document(termDocs.doc());
        if (areEqual("group", existingDoc.get("class")) && areEqual(handle, existingDoc.get("handle"))
                && areEqual(name, existingDoc.get("name"))
                && areEqual(description, existingDoc.get("description"))) {
            needNewEntry = false;
        }
    }
    termDocs.close();
    reader.close();

    if (needNewEntry) {
        Document newDoc = new Document();
        newDoc.add(new Field("key", key, Field.Store.YES, Field.Index.UN_TOKENIZED));
        newDoc.add(new Field("class", "group", Field.Store.YES, Field.Index.UN_TOKENIZED));
        newDoc.add(new Field("handle", handle, Field.Store.YES, Field.Index.TOKENIZED));
        newDoc.add(new Field("name", name, Field.Store.YES, Field.Index.TOKENIZED));
        if (description != null)
            newDoc.add(new Field("description", description, Field.Store.YES, Field.Index.TOKENIZED));

        IndexWriter writer = null;
        try {
            writer = new IndexWriter(indexDir, analyzer, false);
            writer.deleteDocuments(new Term("key", key)); // Delete old entry, if present
            writer.addDocument(newDoc);
        } finally {
            if (writer != null)
                try {
                    writer.close();
                } catch (Exception e) {
                }
            ;
        }

        log.trace(String.format("Indexed group[handle=%s,name=%s,description=%s]", name, handle, description));
    }

    return needNewEntry;
}

From source file:com.sun.socialsite.business.impl.LuceneSearchManagerImpl.java

License:Open Source License

/**
 * @return false if the index entry was not updated because it
 * was already current; true otherwise./*from   w  ww  . j av  a2s . com*/
 */
public boolean addToIndex(final Profile profile) throws IOException {

    boolean needNewEntry = true;

    String key = getKey(profile);
    String userId = profile.getUserId();
    String firstName = profile.getFirstName();
    String middleName = profile.getMiddleName();
    String lastName = profile.getLastName();
    String nickName = profile.getNickName();
    String primaryEmail = profile.getPrimaryEmail();
    String displayName = profile.getDisplayName();

    IndexReader reader = IndexReader.open(indexDir);
    TermDocs termDocs = reader.termDocs(new Term("key", key));
    while (termDocs.next()) {
        Document existingDoc = reader.document(termDocs.doc());
        if (areEqual("profile", existingDoc.get("class")) && areEqual(userId, existingDoc.get("userId"))
                && areEqual(firstName, existingDoc.get("firstName"))
                && areEqual(middleName, existingDoc.get("middleName"))
                && areEqual(lastName, existingDoc.get("lastName"))
                && areEqual(nickName, existingDoc.get("nickName"))
                && areEqual(primaryEmail, existingDoc.get("primaryEmail"))
                && areEqual(displayName, existingDoc.get("displayName"))) {
            needNewEntry = false;
        }
    }
    termDocs.close();
    reader.close();

    if (needNewEntry) {
        Document newDoc = new Document();
        newDoc.add(new Field("key", key, Field.Store.YES, Field.Index.UN_TOKENIZED));
        newDoc.add(new Field("class", "profile", Field.Store.YES, Field.Index.UN_TOKENIZED));
        newDoc.add(new Field("userId", userId, Field.Store.YES, Field.Index.UN_TOKENIZED));
        if (firstName != null)
            newDoc.add(new Field("firstName", firstName, Field.Store.YES, Field.Index.TOKENIZED));
        if (middleName != null)
            newDoc.add(new Field("middleName", middleName, Field.Store.YES, Field.Index.TOKENIZED));
        if (lastName != null)
            newDoc.add(new Field("lastName", lastName, Field.Store.YES, Field.Index.TOKENIZED));
        if (nickName != null)
            newDoc.add(new Field("nickName", nickName, Field.Store.YES, Field.Index.TOKENIZED));
        if (primaryEmail != null)
            newDoc.add(new Field("primaryEmail", primaryEmail, Field.Store.YES, Field.Index.UN_TOKENIZED));
        if (displayName != null)
            newDoc.add(new Field("displayName", displayName, Field.Store.YES, Field.Index.TOKENIZED));

        IndexWriter writer = null;
        try {
            writer = new IndexWriter(indexDir, analyzer, false);
            writer.deleteDocuments(new Term("key", key)); // Delete old entry, if present
            writer.addDocument(newDoc);
        } finally {
            if (writer != null)
                try {
                    writer.close();
                } catch (Exception e) {
                }
            ;
        }

        log.trace(String.format(
                "Indexed profile[userId=%s,firstName=%s,lastName=%s,nickName=%s,primaryEmail=%s,displayName=%s]",
                userId, firstName, lastName, nickName, primaryEmail, displayName));
    }

    return needNewEntry;
}

From source file:com.svenjacobs.lugaene.GaeDirectoryTest.java

License:Apache License

@Test
public void wholeCycle() throws Exception {

    // Index/*ww  w . ja  v  a2 s .  c o m*/

    final Directory directory = new GaeDirectory("Test");
    final Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_44);

    final IndexWriterConfig config = GaeIndexWriterConfigHelper.create(Version.LUCENE_44, analyzer);
    final IndexWriter indexWriter = new IndexWriter(directory, config);

    final Document doc1 = new Document();

    doc1.add(new StringField(FIELD_TITLE, "Title1", Field.Store.YES));
    doc1.add(new TextField(FIELD_CONTENTS, "keyword1 keyword2 lorem ipsum", Field.Store.NO));

    indexWriter.addDocument(doc1);

    final Document doc2 = new Document();

    doc2.add(new StringField(FIELD_TITLE, "Title2", Field.Store.YES));
    doc2.add(new TextField(FIELD_CONTENTS, "keyword3 keyword4 lorem ipsum", Field.Store.NO));

    indexWriter.addDocument(doc2);

    indexWriter.close();

    // Search

    final DirectoryReader reader = DirectoryReader.open(directory);
    final IndexSearcher searcher = new IndexSearcher(reader);

    final BooleanQuery query = new BooleanQuery();

    query.add(new TermQuery(new Term(FIELD_TITLE, "Title1")), BooleanClause.Occur.MUST);
    query.add(new TermQuery(new Term(FIELD_CONTENTS, "lorem")), BooleanClause.Occur.MUST);

    ScoreDoc[] hits = searcher.search(query, 100).scoreDocs;

    assertThat(hits.length, is(1));
    assertThat(searcher.doc(hits[0].doc).get(FIELD_TITLE), is("Title1"));

    hits = searcher.search(new TermQuery(new Term(FIELD_CONTENTS, "lorem")), 100).scoreDocs;

    assertThat(hits.length, is(2));
    assertThat(searcher.doc(hits[0].doc).get(FIELD_TITLE), is("Title1"));
    assertThat(searcher.doc(hits[1].doc).get(FIELD_TITLE), is("Title2"));

    hits = searcher.search(new TermQuery(new Term(FIELD_CONTENTS, "keyword3")), 100).scoreDocs;

    assertThat(hits.length, is(1));
    assertThat(searcher.doc(hits[0].doc).get(FIELD_TITLE), is("Title2"));
}

From source file:com.sxc.lucene.analysis.codec.MetaphoneAnalyzerTest.java

License:Apache License

public void testKoolKat() throws Exception {
    RAMDirectory directory = new RAMDirectory();
    Analyzer analyzer = new MetaphoneReplacementAnalyzer();
    IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_47, analyzer);
    IndexWriter writer = new IndexWriter(directory, indexWriterConfig);
    Document doc = new Document();
    doc.add(new TextField("contents", "cool cat", Field.Store.YES));
    writer.addDocument(doc);
    writer.close();/* w w w .j a  va  2  s.c o m*/
    IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(directory));
    Query query = new QueryParser(Version.LUCENE_47, "contents", analyzer).parse("kool kat");
    TopDocs hits = searcher.search(query, 1);
    assertEquals(1, hits.totalHits);
    int docID = hits.scoreDocs[0].doc;
    doc = searcher.doc(docID);
    assertEquals("cool cat", doc.get("contents"));
    searcher.getIndexReader().close();
}

From source file:com.sxc.lucene.analysis.synonym.SynonymAnalyzerTest.java

License:Apache License

public void setUp() throws Exception {
    RAMDirectory directory = new RAMDirectory();
    IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_47, synonymAnalyzer);
    IndexWriter writer = new IndexWriter(directory, indexWriterConfig);
    Document doc = new Document();
    doc.add(new TextField("content", "The quick brown fox jumps over the lazy dog", Field.Store.YES)); //#2
    writer.addDocument(doc);

    writer.close();//  w w  w . j  a  v  a 2  s  .co m

    searcher = new IndexSearcher(DirectoryReader.open(directory));
}

From source file:com.sxc.lucene.index.IndexingTest.java

License:Apache License

protected void setUp() throws Exception { // 1
    directory = FSDirectory.open(new File("D:/programming/lucene/indexingTest"));

    IndexWriter writer = getWriter(); // 2
    writer.deleteAll();//w ww  .  ja  v  a 2s.c  o m

    for (int i = 0; i < ids.length; i++) { // 3
        Document doc = new Document();
        doc.add(new StringField("id", ids[i], Field.Store.YES));
        doc.add(new StringField("country", unindexed[i], Field.Store.YES));
        doc.add(new TextField("contents", unstored[i], Field.Store.NO));
        doc.add(new TextField("city", text[i], Field.Store.YES));

        writer.addDocument(doc);
    }
    writer.close();
}

From source file:com.sxc.lucene.searching.PhraseQueryTest.java

License:Apache License

protected void setUp() throws IOException {
    dir = FSDirectory.open(new File("D:/programming/lucene/PhraseQueryTest"));
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_47,
            new SmartChineseAnalyzer(Version.LUCENE_47));
    config.setOpenMode(OpenMode.CREATE);
    IndexWriter writer = new IndexWriter(dir, config);

    Document doc = new Document();
    doc.add(new TextField("field", // 1
            "the quick brown fox jumped over the lazy dog", // 1
            Field.Store.YES)); // 1
    writer.addDocument(doc);
    writer.close();//w  ww  . ja va2s  . c om

    searcher = new IndexSearcher(DirectoryReader.open(dir));
}