Example usage for org.apache.lucene.index IndexWriter addDocument

List of usage examples for org.apache.lucene.index IndexWriter addDocument

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter addDocument.

Prototype

public long addDocument(Iterable<? extends IndexableField> doc) throws IOException 

Source Link

Document

Adds a document to this index.

Usage

From source file:com.foundationdb.lucene.SimpleTest.java

License:Open Source License

@Test
public void indexBasic() throws Exception {
    StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_44);
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_44, analyzer);
    // recreate the index on each execution
    config.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    config.setCodec(new FDBCodec());
    FDBDirectory dir = createDirectoryForMethod();
    IndexWriter writer = new IndexWriter(dir, config);
    try {//from  w w w  .j  a v a2 s .c  om
        writer.addDocument(Arrays.asList(new TextField("title", "The title of my first document", Store.YES),
                new TextField("content", "The content of the first document", Store.NO)));

        writer.addDocument(Arrays.asList(new TextField("title", "The title of the second document", Store.YES),
                new TextField("content", "And this is the content", Store.NO)));
    } finally {
        writer.close();
    }
    assertDocumentsAreThere(dir, 2);
}

From source file:com.fuerve.villageelder.actions.results.SearchResultItemTest.java

License:Apache License

private void buildDummyIndex(final Directory indexDirectory, final Directory taxonomyDirectory)
        throws IOException {
    IndexWriterConfig iwc = new IndexWriterConfig(Lucene.LUCENE_VERSION, Lucene.getPerFieldAnalyzer());
    iwc.setOpenMode(OpenMode.CREATE);//from   w w w.j  av  a 2s  .  c om
    IndexWriter iw = new IndexWriter(indexDirectory, iwc);
    TaxonomyWriter tw = new DirectoryTaxonomyWriter(taxonomyDirectory, OpenMode.CREATE);
    List<CategoryPath> categories = new ArrayList<CategoryPath>();
    FacetFields facetFields = new FacetFields(tw);

    Document doc = new Document();
    categories.clear();
    doc.add(new StringField("Author", "foo", Store.YES));
    categories.add(new CategoryPath("Author", "foo"));
    doc.add(new LongField("RevisionNumber", 50L, Store.YES));
    doc.add(new StringField("Revision", "50", Store.YES));
    doc.add(new TextField("Message", "stuff", Store.YES));
    iw.addDocument(doc);
    facetFields.addFields(doc, categories);

    doc = new Document();
    facetFields = new FacetFields(tw);
    categories.clear();
    doc.add(new StringField("Author", "bar", Store.YES));
    categories.add(new CategoryPath("Author", "bar"));
    doc.add(new LongField("RevisionNumber", 5000L, Store.YES));
    doc.add(new StringField("Revision", "5000", Store.YES));
    doc.add(new TextField("Message", "stuff", Store.YES));
    iw.addDocument(doc);
    facetFields.addFields(doc, categories);

    tw.commit();
    tw.close();
    iw.commit();
    iw.close();
}

From source file:com.fuerve.villageelder.indexing.IndexerTest.java

License:Apache License

/**
 * Test method for {@link com.fuerve.villageelder.indexing.Indexer#Indexer(org.apache.lucene.store.Directory, org.apache.lucene.store.Directory, org.apache.lucene.index.IndexWriterConfig.OpenMode)}.
 *//*w  w w  .  j a va 2 s .com*/
@Test
public final void testIndexerDirectoryDirectoryOpenMode() throws Exception {
    RAMDirectory indexDirectory = new RAMDirectory();
    RAMDirectory taxonomyDirectory = new RAMDirectory();

    Field idField = IndexManager.class.getDeclaredField("indexDirectory");
    Field tdField = IndexManager.class.getDeclaredField("taxonomyDirectory");
    Field iwField = IndexManager.class.getDeclaredField("indexWriter");
    Field twField = IndexManager.class.getDeclaredField("taxonomyWriter");
    Field stField = IndexManager.class.getDeclaredField("stringDirectories");
    Field initField = IndexManager.class.getDeclaredField("initialized");
    Field imField = Indexer.class.getDeclaredField("indexManager");

    idField.setAccessible(true);
    tdField.setAccessible(true);
    iwField.setAccessible(true);
    twField.setAccessible(true);
    stField.setAccessible(true);
    initField.setAccessible(true);
    imField.setAccessible(true);

    Indexer target = new Indexer(indexDirectory, taxonomyDirectory, OpenMode.CREATE);
    target.initializeIndex();
    IndexManager testManager = (IndexManager) imField.get(target);

    TaxonomyWriter tw = (TaxonomyWriter) twField.get(testManager);
    IndexWriter iw = (IndexWriter) iwField.get(testManager);
    tw.addCategory(new CategoryPath("test/stuff", '/'));
    Document doc = new Document();
    doc.add(new LongField("testfield", 1000L, Store.YES));
    iw.addDocument(doc);
    target.dispose();

    // TEST: Initializing an index, disposing it and initializing another
    // index instance on the same Directories results in loading the same
    // index.
    Indexer target2 = new Indexer(indexDirectory, taxonomyDirectory, OpenMode.APPEND);
    target2.initializeIndex();
    testManager = (IndexManager) imField.get(target2);
    iw = (IndexWriter) iwField.get(testManager);
    tw = (TaxonomyWriter) twField.get(testManager);
    assertEquals(1, iw.numDocs());
    assertEquals(3, tw.getSize());
    target2.dispose();
}

From source file:com.fuerve.villageelder.indexing.IndexManagerTest.java

License:Apache License

/**
 * Test method for {@link com.fuerve.villageelder.indexing.IndexManager#IndexManager(org.apache.lucene.store.Directory, org.apache.lucene.store.Directory, org.apache.lucene.index.IndexWriterConfig.OpenMode)}.
 *///from   ww  w  .ja va  2s  .c  o m
@Test
public final void testIndexManagerDirectoryDirectoryOpenMode() throws Exception {
    RAMDirectory indexDirectory = new RAMDirectory();
    RAMDirectory taxonomyDirectory = new RAMDirectory();

    Field idField = IndexManager.class.getDeclaredField("indexDirectory");
    Field tdField = IndexManager.class.getDeclaredField("taxonomyDirectory");
    Field iwField = IndexManager.class.getDeclaredField("indexWriter");
    Field twField = IndexManager.class.getDeclaredField("taxonomyWriter");
    Field stField = IndexManager.class.getDeclaredField("stringDirectories");
    Field initField = IndexManager.class.getDeclaredField("initialized");

    idField.setAccessible(true);
    tdField.setAccessible(true);
    iwField.setAccessible(true);
    twField.setAccessible(true);
    stField.setAccessible(true);
    initField.setAccessible(true);

    IndexManager target = new IndexManager(indexDirectory, taxonomyDirectory, OpenMode.CREATE);
    target.initializeIndex();
    TaxonomyWriter tw = (TaxonomyWriter) twField.get(target);
    IndexWriter iw = (IndexWriter) iwField.get(target);
    tw.addCategory(new CategoryPath("test/stuff", '/'));
    Document doc = new Document();
    doc.add(new LongField("testfield", 1000L, Store.YES));
    iw.addDocument(doc);
    target.dispose();

    // TEST: Initializing an index, disposing it and initializing another
    // index instance on the same Directories results in loading the same
    // index.
    IndexManager target2 = new IndexManager(indexDirectory, taxonomyDirectory, OpenMode.APPEND);
    target2.initializeIndex();
    iw = (IndexWriter) iwField.get(target2);
    tw = (TaxonomyWriter) twField.get(target2);
    assertEquals(1, iw.numDocs());
    assertEquals(3, tw.getSize());
    target2.dispose();
}

From source file:com.fuerve.villageelder.search.SearchQueryParserTest.java

License:Apache License

private IndexReader buildDummyIndex() throws IOException {
    RAMDirectory indexDirectory = new RAMDirectory();

    IndexWriterConfig iwc = new IndexWriterConfig(Lucene.LUCENE_VERSION, Lucene.getPerFieldAnalyzer());
    iwc.setOpenMode(OpenMode.CREATE);//ww w .  jav a 2s.c om
    IndexWriter iw = new IndexWriter(indexDirectory, iwc);

    Document doc = new Document();
    doc.add(new StringField("Author", "foo", Field.Store.YES));
    doc.add(new LongField("RevisionNumber", 50L, Field.Store.YES));
    doc.add(new StringField("Revision", "50", Field.Store.YES));
    doc.add(new TextField("Message", "stuff", Field.Store.YES));
    iw.addDocument(doc);

    doc = new Document();
    doc.add(new StringField("Author", "bar", Field.Store.YES));
    doc.add(new LongField("RevisionNumber", 5000L, Field.Store.YES));
    doc.add(new StringField("Revision", "5000", Field.Store.YES));
    doc.add(new TextField("Message", "stuff", Field.Store.YES));
    iw.addDocument(doc);
    iw.commit();
    iw.close();

    DirectoryReader result = DirectoryReader.open(indexDirectory);
    return result;
}

From source file:com.fun.sb.demo.lucene.IndexFiles.java

License:Apache License

/** Indexes a single document */
static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {
    try (InputStream stream = Files.newInputStream(file)) {
        // make a new, empty document
        Document doc = new Document();

        // Add the path of the file as a field named "path".  Use a
        // field that is indexed (i.e. searchable), but don't tokenize 
        // the field into separate words and don't index term frequency
        // or positional information:
        Field pathField = new StringField("path", file.toString(), Field.Store.YES);
        doc.add(pathField);//from   w  w w .  ja v  a2s  .c  o m

        // Add the last modified date of the file a field named "modified".
        // Use a LongPoint that is indexed (i.e. efficiently filterable with
        // PointRangeQuery).  This indexes to milli-second resolution, which
        // is often too fine.  You could instead create a number based on
        // year/month/day/hour/minutes/seconds, down the resolution you require.
        // For example the long value 2011021714 would mean
        // February 17, 2011, 2-3 PM.
        //      doc.add(new LongPoint("modified", lastModified));

        // Add the contents of the file to a field named "contents".  Specify a Reader,
        // so that the text of the file is tokenized and indexed, but not stored.
        // Note that FileReader expects the file to be in UTF-8 encoding.
        // If that's not the case searching for special characters will fail.
        doc.add(new TextField("contents",
                new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))));

        if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
            // New index, so we just add the document (no old document can be there):
            System.out.println("adding " + file);
            writer.addDocument(doc);
        } else {
            // Existing index (an old copy of this document may have been indexed) so 
            // we use updateDocument instead to replace the old one matching the exact 
            // path, if present:
            System.out.println("updating " + file);
            writer.updateDocument(new Term("path", file.toString()), doc);
        }
    }
}

From source file:com.gauronit.tagmata.core.Indexer.java

License:Open Source License

public String createIndex(String indexDisplayName) {
    try {/*from w w  w . jav  a2  s .co m*/
        UUID uuid = UUID.randomUUID();
        String indexName = uuid.toString().substring(uuid.toString().length() - 8, uuid.toString().length());
        IndexWriter iw = new IndexWriter(FSDirectory.open(new File(indexDir + File.separator + indexName)),
                new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));
        iw.prepareCommit();
        iw.commit();
        iw.close();
        iw = null;

        Document doc = new Document();
        doc.add(new Field("displayName", indexDisplayName, Store.YES, Index.NOT_ANALYZED));
        doc.add(new Field("indexName", indexName, Store.YES, Index.NOT_ANALYZED));

        IndexWriter mainIndexWriter = new IndexWriter(
                FSDirectory.open(new File(indexDir + File.separator + MAIN_INDEX)),
                new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));
        mainIndexWriter.addDocument(doc);
        mainIndexWriter.commit();
        mainIndexWriter.close();
        mainIndexWriter = null;
        return indexName;
    } catch (IOException ex) {
        Logger.getLogger(Indexer.class.getName()).log(Level.SEVERE, null, ex);
        return null;
    }
}

From source file:com.gitblit.LuceneExecutor.java

License:Apache License

/**
 * This completely indexes the repository and will destroy any existing
 * index./*from   w  w  w.ja v  a 2  s.co m*/
 * 
 * @param repositoryName
 * @param repository
 * @return IndexResult
 */
public IndexResult reindex(RepositoryModel model, Repository repository) {
    IndexResult result = new IndexResult();
    if (!deleteIndex(model.name)) {
        return result;
    }
    try {
        String[] encodings = storedSettings.getStrings(Keys.web.blobEncodings).toArray(new String[0]);
        FileBasedConfig config = getConfig(repository);
        Set<String> indexedCommits = new TreeSet<String>();
        IndexWriter writer = getIndexWriter(model.name);
        // build a quick lookup of tags
        Map<String, List<String>> tags = new HashMap<String, List<String>>();
        for (RefModel tag : JGitUtils.getTags(repository, false, -1)) {
            if (!tag.isAnnotatedTag()) {
                // skip non-annotated tags
                continue;
            }
            if (!tags.containsKey(tag.getObjectId())) {
                tags.put(tag.getReferencedObjectId().getName(), new ArrayList<String>());
            }
            tags.get(tag.getReferencedObjectId().getName()).add(tag.displayName);
        }

        ObjectReader reader = repository.newObjectReader();

        // get the local branches
        List<RefModel> branches = JGitUtils.getLocalBranches(repository, true, -1);

        // sort them by most recently updated
        Collections.sort(branches, new Comparator<RefModel>() {
            @Override
            public int compare(RefModel ref1, RefModel ref2) {
                return ref2.getDate().compareTo(ref1.getDate());
            }
        });

        // reorder default branch to first position
        RefModel defaultBranch = null;
        ObjectId defaultBranchId = JGitUtils.getDefaultBranch(repository);
        for (RefModel branch : branches) {
            if (branch.getObjectId().equals(defaultBranchId)) {
                defaultBranch = branch;
                break;
            }
        }
        branches.remove(defaultBranch);
        branches.add(0, defaultBranch);

        // walk through each branch
        for (RefModel branch : branches) {

            boolean indexBranch = false;
            if (model.indexedBranches.contains(com.gitblit.Constants.DEFAULT_BRANCH)
                    && branch.equals(defaultBranch)) {
                // indexing "default" branch
                indexBranch = true;
            } else if (IssueUtils.GB_ISSUES.equals(branch)) {
                // skip the GB_ISSUES branch because it is indexed later
                // note: this is different than updateIndex
                indexBranch = false;
            } else {
                // normal explicit branch check
                indexBranch = model.indexedBranches.contains(branch.getName());
            }

            // if this branch is not specifically indexed then skip
            if (!indexBranch) {
                continue;
            }

            String branchName = branch.getName();
            RevWalk revWalk = new RevWalk(reader);
            RevCommit tip = revWalk.parseCommit(branch.getObjectId());
            String tipId = tip.getId().getName();

            String keyName = getBranchKey(branchName);
            config.setString(CONF_ALIAS, null, keyName, branchName);
            config.setString(CONF_BRANCH, null, keyName, tipId);

            // index the blob contents of the tree
            TreeWalk treeWalk = new TreeWalk(repository);
            treeWalk.addTree(tip.getTree());
            treeWalk.setRecursive(true);

            Map<String, ObjectId> paths = new TreeMap<String, ObjectId>();
            while (treeWalk.next()) {
                // ensure path is not in a submodule
                if (treeWalk.getFileMode(0) != FileMode.GITLINK) {
                    paths.put(treeWalk.getPathString(), treeWalk.getObjectId(0));
                }
            }

            ByteArrayOutputStream os = new ByteArrayOutputStream();
            byte[] tmp = new byte[32767];

            RevWalk commitWalk = new RevWalk(reader);
            commitWalk.markStart(tip);

            RevCommit commit;
            while ((paths.size() > 0) && (commit = commitWalk.next()) != null) {
                TreeWalk diffWalk = new TreeWalk(reader);
                int parentCount = commit.getParentCount();
                switch (parentCount) {
                case 0:
                    diffWalk.addTree(new EmptyTreeIterator());
                    break;
                case 1:
                    diffWalk.addTree(getTree(commitWalk, commit.getParent(0)));
                    break;
                default:
                    // skip merge commits
                    continue;
                }
                diffWalk.addTree(getTree(commitWalk, commit));
                diffWalk.setFilter(ANY_DIFF);
                diffWalk.setRecursive(true);
                while ((paths.size() > 0) && diffWalk.next()) {
                    String path = diffWalk.getPathString();
                    if (!paths.containsKey(path)) {
                        continue;
                    }

                    // remove path from set
                    ObjectId blobId = paths.remove(path);
                    result.blobCount++;

                    // index the blob metadata
                    String blobAuthor = getAuthor(commit);
                    String blobCommitter = getCommitter(commit);
                    String blobDate = DateTools.timeToString(commit.getCommitTime() * 1000L, Resolution.MINUTE);

                    Document doc = new Document();
                    doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.blob.name(), Store.YES,
                            Index.NOT_ANALYZED_NO_NORMS));
                    doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.ANALYZED));
                    doc.add(new Field(FIELD_COMMIT, commit.getName(), Store.YES, Index.ANALYZED));
                    doc.add(new Field(FIELD_PATH, path, Store.YES, Index.ANALYZED));
                    doc.add(new Field(FIELD_DATE, blobDate, Store.YES, Index.NO));
                    doc.add(new Field(FIELD_AUTHOR, blobAuthor, Store.YES, Index.ANALYZED));
                    doc.add(new Field(FIELD_COMMITTER, blobCommitter, Store.YES, Index.ANALYZED));

                    // determine extension to compare to the extension
                    // blacklist
                    String ext = null;
                    String name = path.toLowerCase();
                    if (name.indexOf('.') > -1) {
                        ext = name.substring(name.lastIndexOf('.') + 1);
                    }

                    // index the blob content
                    if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) {
                        ObjectLoader ldr = repository.open(blobId, Constants.OBJ_BLOB);
                        InputStream in = ldr.openStream();
                        int n;
                        while ((n = in.read(tmp)) > 0) {
                            os.write(tmp, 0, n);
                        }
                        in.close();
                        byte[] content = os.toByteArray();
                        String str = StringUtils.decodeString(content, encodings);
                        doc.add(new Field(FIELD_CONTENT, str, Store.YES, Index.ANALYZED));
                        os.reset();
                    }

                    // add the blob to the index
                    writer.addDocument(doc);
                }
            }

            os.close();

            // index the tip commit object
            if (indexedCommits.add(tipId)) {
                Document doc = createDocument(tip, tags.get(tipId));
                doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.ANALYZED));
                writer.addDocument(doc);
                result.commitCount += 1;
                result.branchCount += 1;
            }

            // traverse the log and index the previous commit objects
            RevWalk historyWalk = new RevWalk(reader);
            historyWalk.markStart(historyWalk.parseCommit(tip.getId()));
            RevCommit rev;
            while ((rev = historyWalk.next()) != null) {
                String hash = rev.getId().getName();
                if (indexedCommits.add(hash)) {
                    Document doc = createDocument(rev, tags.get(hash));
                    doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.ANALYZED));
                    writer.addDocument(doc);
                    result.commitCount += 1;
                }
            }
        }

        // finished
        reader.release();

        // this repository has a gb-issues branch, index all issues
        if (IssueUtils.getIssuesBranch(repository) != null) {
            List<IssueModel> issues = IssueUtils.getIssues(repository, null);
            if (issues.size() > 0) {
                result.branchCount += 1;
            }
            for (IssueModel issue : issues) {
                result.issueCount++;
                Document doc = createDocument(issue);
                writer.addDocument(doc);
            }
        }

        // commit all changes and reset the searcher
        config.setInt(CONF_INDEX, null, CONF_VERSION, INDEX_VERSION);
        config.save();
        writer.commit();
        resetIndexSearcher(model.name);
        result.success();
    } catch (Exception e) {
        logger.error("Exception while reindexing " + model.name, e);
    }
    return result;
}

From source file:com.gitblit.LuceneExecutor.java

License:Apache License

/**
 * Incrementally update the index with the specified commit for the
 * repository.//from w  ww. j a  v  a 2  s . c  o  m
 * 
 * @param repositoryName
 * @param repository
 * @param branch
 *            the fully qualified branch name (e.g. refs/heads/master)
 * @param commit
 * @return true, if successful
 */
private IndexResult index(String repositoryName, Repository repository, String branch, RevCommit commit) {
    IndexResult result = new IndexResult();
    try {
        String[] encodings = storedSettings.getStrings(Keys.web.blobEncodings).toArray(new String[0]);
        List<PathChangeModel> changedPaths = JGitUtils.getFilesInCommit(repository, commit);
        String revDate = DateTools.timeToString(commit.getCommitTime() * 1000L, Resolution.MINUTE);
        IndexWriter writer = getIndexWriter(repositoryName);
        for (PathChangeModel path : changedPaths) {
            if (path.isSubmodule()) {
                continue;
            }
            // delete the indexed blob
            deleteBlob(repositoryName, branch, path.name);

            // re-index the blob
            if (!ChangeType.DELETE.equals(path.changeType)) {
                result.blobCount++;
                Document doc = new Document();
                doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.blob.name(), Store.YES,
                        Index.NOT_ANALYZED));
                doc.add(new Field(FIELD_BRANCH, branch, Store.YES, Index.ANALYZED));
                doc.add(new Field(FIELD_COMMIT, commit.getName(), Store.YES, Index.ANALYZED));
                doc.add(new Field(FIELD_PATH, path.path, Store.YES, Index.ANALYZED));
                doc.add(new Field(FIELD_DATE, revDate, Store.YES, Index.NO));
                doc.add(new Field(FIELD_AUTHOR, getAuthor(commit), Store.YES, Index.ANALYZED));
                doc.add(new Field(FIELD_COMMITTER, getCommitter(commit), Store.YES, Index.ANALYZED));

                // determine extension to compare to the extension
                // blacklist
                String ext = null;
                String name = path.name.toLowerCase();
                if (name.indexOf('.') > -1) {
                    ext = name.substring(name.lastIndexOf('.') + 1);
                }

                if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) {
                    // read the blob content
                    String str = JGitUtils.getStringContent(repository, commit.getTree(), path.path, encodings);
                    if (str != null) {
                        doc.add(new Field(FIELD_CONTENT, str, Store.YES, Index.ANALYZED));
                        writer.addDocument(doc);
                    }
                }
            }
        }
        writer.commit();

        // get any annotated commit tags
        List<String> commitTags = new ArrayList<String>();
        for (RefModel ref : JGitUtils.getTags(repository, false, -1)) {
            if (ref.isAnnotatedTag() && ref.getReferencedObjectId().equals(commit.getId())) {
                commitTags.add(ref.displayName);
            }
        }

        // create and write the Lucene document
        Document doc = createDocument(commit, commitTags);
        doc.add(new Field(FIELD_BRANCH, branch, Store.YES, Index.ANALYZED));
        result.commitCount++;
        result.success = index(repositoryName, doc);
    } catch (Exception e) {
        logger.error(MessageFormat.format("Exception while indexing commit {0} in {1}",
                commit.getId().getName(), repositoryName), e);
    }
    return result;
}

From source file:com.gitblit.LuceneExecutor.java

License:Apache License

/**
 * Incrementally index an object for the repository.
 * /*from   w  w  w.j  a v  a 2  s . c  o  m*/
 * @param repositoryName
 * @param doc
 * @return true, if successful
 */
private boolean index(String repositoryName, Document doc) {
    try {
        IndexWriter writer = getIndexWriter(repositoryName);
        writer.addDocument(doc);
        writer.commit();
        resetIndexSearcher(repositoryName);
        return true;
    } catch (Exception e) {
        logger.error(
                MessageFormat.format("Exception while incrementally updating {0} Lucene index", repositoryName),
                e);
    }
    return false;
}