Example usage for org.apache.lucene.index IndexWriter addDocument

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter addDocument.

Prototype

public long addDocument(Iterable<? extends IndexableField> doc) throws IOException

Source Link

Document

Adds a document to this index.

Usage

From source file:com.foundationdb.lucene.SimpleTest.java

License:Open Source License

@Test
public void indexBasic() throws Exception {
    StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_44);
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_44, analyzer);
    // recreate the index on each execution
    config.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    config.setCodec(new FDBCodec());
    FDBDirectory dir = createDirectoryForMethod();
    IndexWriter writer = new IndexWriter(dir, config);
    try {//from  w w w  .j  a v a2 s .c  om
        writer.addDocument(Arrays.asList(new TextField("title", "The title of my first document", Store.YES),
                new TextField("content", "The content of the first document", Store.NO)));

        writer.addDocument(Arrays.asList(new TextField("title", "The title of the second document", Store.YES),
                new TextField("content", "And this is the content", Store.NO)));
    } finally {
        writer.close();
    }
    assertDocumentsAreThere(dir, 2);
}

From source file:com.fuerve.villageelder.actions.results.SearchResultItemTest.java

License:Apache License

private void buildDummyIndex(final Directory indexDirectory, final Directory taxonomyDirectory)
        throws IOException {
    IndexWriterConfig iwc = new IndexWriterConfig(Lucene.LUCENE_VERSION, Lucene.getPerFieldAnalyzer());
    iwc.setOpenMode(OpenMode.CREATE);//from   w w w.j  av  a 2s  .  c om
    IndexWriter iw = new IndexWriter(indexDirectory, iwc);
    TaxonomyWriter tw = new DirectoryTaxonomyWriter(taxonomyDirectory, OpenMode.CREATE);
    List<CategoryPath> categories = new ArrayList<CategoryPath>();
    FacetFields facetFields = new FacetFields(tw);

    Document doc = new Document();
    categories.clear();
    doc.add(new StringField("Author", "foo", Store.YES));
    categories.add(new CategoryPath("Author", "foo"));
    doc.add(new LongField("RevisionNumber", 50L, Store.YES));
    doc.add(new StringField("Revision", "50", Store.YES));
    doc.add(new TextField("Message", "stuff", Store.YES));
    iw.addDocument(doc);
    facetFields.addFields(doc, categories);

    doc = new Document();
    facetFields = new FacetFields(tw);
    categories.clear();
    doc.add(new StringField("Author", "bar", Store.YES));
    categories.add(new CategoryPath("Author", "bar"));
    doc.add(new LongField("RevisionNumber", 5000L, Store.YES));
    doc.add(new StringField("Revision", "5000", Store.YES));
    doc.add(new TextField("Message", "stuff", Store.YES));
    iw.addDocument(doc);
    facetFields.addFields(doc, categories);

    tw.commit();
    tw.close();
    iw.commit();
    iw.close();
}

From source file:com.fuerve.villageelder.indexing.IndexerTest.java

License:Apache License

/**
 * Test method for {@link com.fuerve.villageelder.indexing.Indexer#Indexer(org.apache.lucene.store.Directory, org.apache.lucene.store.Directory, org.apache.lucene.index.IndexWriterConfig.OpenMode)}.
 *//*w  w w  .  j a va 2 s .com*/
@Test
public final void testIndexerDirectoryDirectoryOpenMode() throws Exception {
    RAMDirectory indexDirectory = new RAMDirectory();
    RAMDirectory taxonomyDirectory = new RAMDirectory();

    Field idField = IndexManager.class.getDeclaredField("indexDirectory");
    Field tdField = IndexManager.class.getDeclaredField("taxonomyDirectory");
    Field iwField = IndexManager.class.getDeclaredField("indexWriter");
    Field twField = IndexManager.class.getDeclaredField("taxonomyWriter");
    Field stField = IndexManager.class.getDeclaredField("stringDirectories");
    Field initField = IndexManager.class.getDeclaredField("initialized");
    Field imField = Indexer.class.getDeclaredField("indexManager");

    idField.setAccessible(true);
    tdField.setAccessible(true);
    iwField.setAccessible(true);
    twField.setAccessible(true);
    stField.setAccessible(true);
    initField.setAccessible(true);
    imField.setAccessible(true);

    Indexer target = new Indexer(indexDirectory, taxonomyDirectory, OpenMode.CREATE);
    target.initializeIndex();
    IndexManager testManager = (IndexManager) imField.get(target);

    TaxonomyWriter tw = (TaxonomyWriter) twField.get(testManager);
    IndexWriter iw = (IndexWriter) iwField.get(testManager);
    tw.addCategory(new CategoryPath("test/stuff", '/'));
    Document doc = new Document();
    doc.add(new LongField("testfield", 1000L, Store.YES));
    iw.addDocument(doc);
    target.dispose();

    // TEST: Initializing an index, disposing it and initializing another
    // index instance on the same Directories results in loading the same
    // index.
    Indexer target2 = new Indexer(indexDirectory, taxonomyDirectory, OpenMode.APPEND);
    target2.initializeIndex();
    testManager = (IndexManager) imField.get(target2);
    iw = (IndexWriter) iwField.get(testManager);
    tw = (TaxonomyWriter) twField.get(testManager);
    assertEquals(1, iw.numDocs());
    assertEquals(3, tw.getSize());
    target2.dispose();
}

From source file:com.fuerve.villageelder.indexing.IndexManagerTest.java

License:Apache License

/**
 * Test method for {@link com.fuerve.villageelder.indexing.IndexManager#IndexManager(org.apache.lucene.store.Directory, org.apache.lucene.store.Directory, org.apache.lucene.index.IndexWriterConfig.OpenMode)}.
 *///from   ww  w  .ja va  2s  .c  o m
@Test
public final void testIndexManagerDirectoryDirectoryOpenMode() throws Exception {
    RAMDirectory indexDirectory = new RAMDirectory();
    RAMDirectory taxonomyDirectory = new RAMDirectory();

    Field idField = IndexManager.class.getDeclaredField("indexDirectory");
    Field tdField = IndexManager.class.getDeclaredField("taxonomyDirectory");
    Field iwField = IndexManager.class.getDeclaredField("indexWriter");
    Field twField = IndexManager.class.getDeclaredField("taxonomyWriter");
    Field stField = IndexManager.class.getDeclaredField("stringDirectories");
    Field initField = IndexManager.class.getDeclaredField("initialized");

    idField.setAccessible(true);
    tdField.setAccessible(true);
    iwField.setAccessible(true);
    twField.setAccessible(true);
    stField.setAccessible(true);
    initField.setAccessible(true);

    IndexManager target = new IndexManager(indexDirectory, taxonomyDirectory, OpenMode.CREATE);
    target.initializeIndex();
    TaxonomyWriter tw = (TaxonomyWriter) twField.get(target);
    IndexWriter iw = (IndexWriter) iwField.get(target);
    tw.addCategory(new CategoryPath("test/stuff", '/'));
    Document doc = new Document();
    doc.add(new LongField("testfield", 1000L, Store.YES));
    iw.addDocument(doc);
    target.dispose();

    // TEST: Initializing an index, disposing it and initializing another
    // index instance on the same Directories results in loading the same
    // index.
    IndexManager target2 = new IndexManager(indexDirectory, taxonomyDirectory, OpenMode.APPEND);
    target2.initializeIndex();
    iw = (IndexWriter) iwField.get(target2);
    tw = (TaxonomyWriter) twField.get(target2);
    assertEquals(1, iw.numDocs());
    assertEquals(3, tw.getSize());
    target2.dispose();
}

From source file:com.fuerve.villageelder.search.SearchQueryParserTest.java

License:Apache License

private IndexReader buildDummyIndex() throws IOException {
    RAMDirectory indexDirectory = new RAMDirectory();

    IndexWriterConfig iwc = new IndexWriterConfig(Lucene.LUCENE_VERSION, Lucene.getPerFieldAnalyzer());
    iwc.setOpenMode(OpenMode.CREATE);//ww w .  jav a 2s.c om
    IndexWriter iw = new IndexWriter(indexDirectory, iwc);

    Document doc = new Document();
    doc.add(new StringField("Author", "foo", Field.Store.YES));
    doc.add(new LongField("RevisionNumber", 50L, Field.Store.YES));
    doc.add(new StringField("Revision", "50", Field.Store.YES));
    doc.add(new TextField("Message", "stuff", Field.Store.YES));
    iw.addDocument(doc);

    doc = new Document();
    doc.add(new StringField("Author", "bar", Field.Store.YES));
    doc.add(new LongField("RevisionNumber", 5000L, Field.Store.YES));
    doc.add(new StringField("Revision", "5000", Field.Store.YES));
    doc.add(new TextField("Message", "stuff", Field.Store.YES));
    iw.addDocument(doc);
    iw.commit();
    iw.close();

    DirectoryReader result = DirectoryReader.open(indexDirectory);
    return result;
}

From source file:com.fun.sb.demo.lucene.IndexFiles.java

License:Apache License

/** Indexes a single document */
static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {
    try (InputStream stream = Files.newInputStream(file)) {
        // make a new, empty document
        Document doc = new Document();

        // Add the path of the file as a field named "path".  Use a
        // field that is indexed (i.e. searchable), but don't tokenize 
        // the field into separate words and don't index term frequency
        // or positional information:
        Field pathField = new StringField("path", file.toString(), Field.Store.YES);
        doc.add(pathField);//from   w  w w .  ja v  a2s  .c  o m

        // Add the last modified date of the file a field named "modified".
        // Use a LongPoint that is indexed (i.e. efficiently filterable with
        // PointRangeQuery).  This indexes to milli-second resolution, which
        // is often too fine.  You could instead create a number based on
        // year/month/day/hour/minutes/seconds, down the resolution you require.
        // For example the long value 2011021714 would mean
        // February 17, 2011, 2-3 PM.
        //      doc.add(new LongPoint("modified", lastModified));

        // Add the contents of the file to a field named "contents".  Specify a Reader,
        // so that the text of the file is tokenized and indexed, but not stored.
        // Note that FileReader expects the file to be in UTF-8 encoding.
        // If that's not the case searching for special characters will fail.
        doc.add(new TextField("contents",
                new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))));

        if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
            // New index, so we just add the document (no old document can be there):
            System.out.println("adding " + file);
            writer.addDocument(doc);
        } else {
            // Existing index (an old copy of this document may have been indexed) so 
            // we use updateDocument instead to replace the old one matching the exact 
            // path, if present:
            System.out.println("updating " + file);
            writer.updateDocument(new Term("path", file.toString()), doc);
        }
    }
}

From source file:com.gauronit.tagmata.core.Indexer.java

License:Open Source License

public String createIndex(String indexDisplayName) {
    try {/*from w w  w . jav  a2  s .co m*/
        UUID uuid = UUID.randomUUID();
        String indexName = uuid.toString().substring(uuid.toString().length() - 8, uuid.toString().length());
        IndexWriter iw = new IndexWriter(FSDirectory.open(new File(indexDir + File.separator + indexName)),
                new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));
        iw.prepareCommit();
        iw.commit();
        iw.close();
        iw = null;

        Document doc = new Document();
        doc.add(new Field("displayName", indexDisplayName, Store.YES, Index.NOT_ANALYZED));
        doc.add(new Field("indexName", indexName, Store.YES, Index.NOT_ANALYZED));

        IndexWriter mainIndexWriter = new IndexWriter(
                FSDirectory.open(new File(indexDir + File.separator + MAIN_INDEX)),
                new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));
        mainIndexWriter.addDocument(doc);
        mainIndexWriter.commit();
        mainIndexWriter.close();
        mainIndexWriter = null;
        return indexName;
    } catch (IOException ex) {
        Logger.getLogger(Indexer.class.getName()).log(Level.SEVERE, null, ex);
        return null;
    }
}

From source file:com.gitblit.LuceneExecutor.java

License:Apache License

/**
 * This completely indexes the repository and will destroy any existing
 * index./*from   w  w  w.ja v  a 2  s.co m*/
 * 
 * @param repositoryName
 * @param repository
 * @return IndexResult
 */
public IndexResult reindex(RepositoryModel model, Repository repository) {
    IndexResult result = new IndexResult();
    if (!deleteIndex(model.name)) {
        return result;
    }
    try {
        String[] encodings = storedSettings.getStrings(Keys.web.blobEncodings).toArray(new String[0]);
        FileBasedConfig config = getConfig(repository);
        Set<String> indexedCommits = new TreeSet<String>();
        IndexWriter writer = getIndexWriter(model.name);
        // build a quick lookup of tags
        Map<String, List<String>> tags = new HashMap<String, List<String>>();
        for (RefModel tag : JGitUtils.getTags(repository, false, -1)) {
            if (!tag.isAnnotatedTag()) {
                // skip non-annotated tags
                continue;
            }
            if (!tags.containsKey(tag.getObjectId())) {
                tags.put(tag.getReferencedObjectId().getName(), new ArrayList<String>());
            }
            tags.get(tag.getReferencedObjectId().getName()).add(tag.displayName);
        }

        ObjectReader reader = repository.newObjectReader();

        // get the local branches
        List<RefModel> branches = JGitUtils.getLocalBranches(repository, true, -1);

        // sort them by most recently updated
        Collections.sort(branches, new Comparator<RefModel>() {
            @Override
            public int compare(RefModel ref1, RefModel ref2) {
                return ref2.getDate().compareTo(ref1.getDate());
            }
        });

        // reorder default branch to first position
        RefModel defaultBranch = null;
        ObjectId defaultBranchId = JGitUtils.getDefaultBranch(repository);
        for (RefModel branch : branches) {
            if (branch.getObjectId().equals(defaultBranchId)) {
                defaultBranch = branch;
                break;
            }
        }
        branches.remove(defaultBranch);
        branches.add(0, defaultBranch);

        // walk through each branch
        for (RefModel branch : branches) {

            boolean indexBranch = false;
            if (model.indexedBranches.contains(com.gitblit.Constants.DEFAULT_BRANCH)
                    && branch.equals(defaultBranch)) {
                // indexing "default" branch
                indexBranch = true;
            } else if (IssueUtils.GB_ISSUES.equals(branch)) {
                // skip the GB_ISSUES branch because it is indexed later
                // note: this is different than updateIndex
                indexBranch = false;
            } else {
                // normal explicit branch check
                indexBranch = model.indexedBranches.contains(branch.getName());
            }

            // if this branch is not specifically indexed then skip
            if (!indexBranch) {
                continue;
            }

            String branchName = branch.getName();
            RevWalk revWalk = new RevWalk(reader);
            RevCommit tip = revWalk.parseCommit(branch.getObjectId());
            String tipId = tip.getId().getName();

            String keyName = getBranchKey(branchName);
            config.setString(CONF_ALIAS, null, keyName, branchName);
            config.setString(CONF_BRANCH, null, keyName, tipId);

            // index the blob contents of the tree
            TreeWalk treeWalk = new TreeWalk(repository);
            treeWalk.addTree(tip.getTree());
            treeWalk.setRecursive(true);

            Map<String, ObjectId> paths = new TreeMap<String, ObjectId>();
            while (treeWalk.next()) {
                // ensure path is not in a submodule
                if (treeWalk.getFileMode(0) != FileMode.GITLINK) {
                    paths.put(treeWalk.getPathString(), treeWalk.getObjectId(0));
                }
            }

            ByteArrayOutputStream os = new ByteArrayOutputStream();
            byte[] tmp = new byte[32767];

            RevWalk commitWalk = new RevWalk(reader);
            commitWalk.markStart(tip);

            RevCommit commit;
            while ((paths.size() > 0) && (commit = commitWalk.next()) != null) {
                TreeWalk diffWalk = new TreeWalk(reader);
                int parentCount = commit.getParentCount();
                switch (parentCount) {
                case 0:
                    diffWalk.addTree(new EmptyTreeIterator());
                    break;
                case 1:
                    diffWalk.addTree(getTree(commitWalk, commit.getParent(0)));
                    break;
                default:
                    // skip merge commits
                    continue;
                }
                diffWalk.addTree(getTree(commitWalk, commit));
                diffWalk.setFilter(ANY_DIFF);
                diffWalk.setRecursive(true);
                while ((paths.size() > 0) && diffWalk.next()) {
                    String path = diffWalk.getPathString();
                    if (!paths.containsKey(path)) {
                        continue;
                    }

                    // remove path from set
                    ObjectId blobId = paths.remove(path);
                    result.blobCount++;

                    // index the blob metadata
                    String blobAuthor = getAuthor(commit);
                    String blobCommitter = getCommitter(commit);
                    String blobDate = DateTools.timeToString(commit.getCommitTime() * 1000L, Resolution.MINUTE);

                    Document doc = new Document();
                    doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.blob.name(), Store.YES,
                            Index.NOT_ANALYZED_NO_NORMS));
                    doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.ANALYZED));
                    doc.add(new Field(FIELD_COMMIT, commit.getName(), Store.YES, Index.ANALYZED));
                    doc.add(new Field(FIELD_PATH, path, Store.YES, Index.ANALYZED));
                    doc.add(new Field(FIELD_DATE, blobDate, Store.YES, Index.NO));
                    doc.add(new Field(FIELD_AUTHOR, blobAuthor, Store.YES, Index.ANALYZED));
                    doc.add(new Field(FIELD_COMMITTER, blobCommitter, Store.YES, Index.ANALYZED));

                    // determine extension to compare to the extension
                    // blacklist
                    String ext = null;
                    String name = path.toLowerCase();
                    if (name.indexOf('.') > -1) {
                        ext = name.substring(name.lastIndexOf('.') + 1);
                    }

                    // index the blob content
                    if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) {
                        ObjectLoader ldr = repository.open(blobId, Constants.OBJ_BLOB);
                        InputStream in = ldr.openStream();
                        int n;
                        while ((n = in.read(tmp)) > 0) {
                            os.write(tmp, 0, n);
                        }
                        in.close();
                        byte[] content = os.toByteArray();
                        String str = StringUtils.decodeString(content, encodings);
                        doc.add(new Field(FIELD_CONTENT, str, Store.YES, Index.ANALYZED));
                        os.reset();
                    }

                    // add the blob to the index
                    writer.addDocument(doc);
                }
            }

            os.close();

            // index the tip commit object
            if (indexedCommits.add(tipId)) {
                Document doc = createDocument(tip, tags.get(tipId));
                doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.ANALYZED));
                writer.addDocument(doc);
                result.commitCount += 1;
                result.branchCount += 1;
            }

            // traverse the log and index the previous commit objects
            RevWalk historyWalk = new RevWalk(reader);
            historyWalk.markStart(historyWalk.parseCommit(tip.getId()));
            RevCommit rev;
            while ((rev = historyWalk.next()) != null) {
                String hash = rev.getId().getName();
                if (indexedCommits.add(hash)) {
                    Document doc = createDocument(rev, tags.get(hash));
                    doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.ANALYZED));
                    writer.addDocument(doc);
                    result.commitCount += 1;
                }
            }
        }

        // finished
        reader.release();

        // this repository has a gb-issues branch, index all issues
        if (IssueUtils.getIssuesBranch(repository) != null) {
            List<IssueModel> issues = IssueUtils.getIssues(repository, null);
            if (issues.size() > 0) {
                result.branchCount += 1;
            }
            for (IssueModel issue : issues) {
                result.issueCount++;
                Document doc = createDocument(issue);
                writer.addDocument(doc);
            }
        }

        // commit all changes and reset the searcher
        config.setInt(CONF_INDEX, null, CONF_VERSION, INDEX_VERSION);
        config.save();
        writer.commit();
        resetIndexSearcher(model.name);
        result.success();
    } catch (Exception e) {
        logger.error("Exception while reindexing " + model.name, e);
    }
    return result;
}

From source file:com.gitblit.LuceneExecutor.java

License:Apache License

/**
 * Incrementally update the index with the specified commit for the
 * repository.//from w  ww. j a  v  a 2  s . c  o  m
 * 
 * @param repositoryName
 * @param repository
 * @param branch
 *            the fully qualified branch name (e.g. refs/heads/master)
 * @param commit
 * @return true, if successful
 */
private IndexResult index(String repositoryName, Repository repository, String branch, RevCommit commit) {
    IndexResult result = new IndexResult();
    try {
        String[] encodings = storedSettings.getStrings(Keys.web.blobEncodings).toArray(new String[0]);
        List<PathChangeModel> changedPaths = JGitUtils.getFilesInCommit(repository, commit);
        String revDate = DateTools.timeToString(commit.getCommitTime() * 1000L, Resolution.MINUTE);
        IndexWriter writer = getIndexWriter(repositoryName);
        for (PathChangeModel path : changedPaths) {
            if (path.isSubmodule()) {
                continue;
            }
            // delete the indexed blob
            deleteBlob(repositoryName, branch, path.name);

            // re-index the blob
            if (!ChangeType.DELETE.equals(path.changeType)) {
                result.blobCount++;
                Document doc = new Document();
                doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.blob.name(), Store.YES,
                        Index.NOT_ANALYZED));
                doc.add(new Field(FIELD_BRANCH, branch, Store.YES, Index.ANALYZED));
                doc.add(new Field(FIELD_COMMIT, commit.getName(), Store.YES, Index.ANALYZED));
                doc.add(new Field(FIELD_PATH, path.path, Store.YES, Index.ANALYZED));
                doc.add(new Field(FIELD_DATE, revDate, Store.YES, Index.NO));
                doc.add(new Field(FIELD_AUTHOR, getAuthor(commit), Store.YES, Index.ANALYZED));
                doc.add(new Field(FIELD_COMMITTER, getCommitter(commit), Store.YES, Index.ANALYZED));

                // determine extension to compare to the extension
                // blacklist
                String ext = null;
                String name = path.name.toLowerCase();
                if (name.indexOf('.') > -1) {
                    ext = name.substring(name.lastIndexOf('.') + 1);
                }

                if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) {
                    // read the blob content
                    String str = JGitUtils.getStringContent(repository, commit.getTree(), path.path, encodings);
                    if (str != null) {
                        doc.add(new Field(FIELD_CONTENT, str, Store.YES, Index.ANALYZED));
                        writer.addDocument(doc);
                    }
                }
            }
        }
        writer.commit();

        // get any annotated commit tags
        List<String> commitTags = new ArrayList<String>();
        for (RefModel ref : JGitUtils.getTags(repository, false, -1)) {
            if (ref.isAnnotatedTag() && ref.getReferencedObjectId().equals(commit.getId())) {
                commitTags.add(ref.displayName);
            }
        }

        // create and write the Lucene document
        Document doc = createDocument(commit, commitTags);
        doc.add(new Field(FIELD_BRANCH, branch, Store.YES, Index.ANALYZED));
        result.commitCount++;
        result.success = index(repositoryName, doc);
    } catch (Exception e) {
        logger.error(MessageFormat.format("Exception while indexing commit {0} in {1}",
                commit.getId().getName(), repositoryName), e);
    }
    return result;
}

From source file:com.gitblit.LuceneExecutor.java

License:Apache License

/**
 * Incrementally index an object for the repository.
 * /*from   w  w  w.j  a v  a 2  s . c  o  m*/
 * @param repositoryName
 * @param doc
 * @return true, if successful
 */
private boolean index(String repositoryName, Document doc) {
    try {
        IndexWriter writer = getIndexWriter(repositoryName);
        writer.addDocument(doc);
        writer.commit();
        resetIndexSearcher(repositoryName);
        return true;
    } catch (Exception e) {
        logger.error(
                MessageFormat.format("Exception while incrementally updating {0} Lucene index", repositoryName),
                e);
    }
    return false;
}