Example usage for org.apache.lucene.index IndexWriter addDocument

List of usage examples for org.apache.lucene.index IndexWriter addDocument

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter addDocument.

Prototype

public long addDocument(Iterable<? extends IndexableField> doc) throws IOException 

Source Link

Document

Adds a document to this index.

Usage

From source file:com.gitblit.service.LuceneService.java

License:Apache License

/**
 * This completely indexes the repository and will destroy any existing
 * index./* w w w.jav  a  2  s  . c o m*/
 *
 * @param repositoryName
 * @param repository
 * @return IndexResult
 */
public IndexResult reindex(RepositoryModel model, Repository repository) {
    IndexResult result = new IndexResult();
    if (!deleteIndex(model.name)) {
        return result;
    }
    try {
        String[] encodings = storedSettings.getStrings(Keys.web.blobEncodings).toArray(new String[0]);
        FileBasedConfig config = getConfig(repository);
        Set<String> indexedCommits = new TreeSet<String>();
        IndexWriter writer = getIndexWriter(model.name);
        // build a quick lookup of tags
        Map<String, List<String>> tags = new HashMap<String, List<String>>();
        for (RefModel tag : JGitUtils.getTags(repository, false, -1)) {
            if (!tag.isAnnotatedTag()) {
                // skip non-annotated tags
                continue;
            }
            if (!tags.containsKey(tag.getReferencedObjectId().getName())) {
                tags.put(tag.getReferencedObjectId().getName(), new ArrayList<String>());
            }
            tags.get(tag.getReferencedObjectId().getName()).add(tag.displayName);
        }

        ObjectReader reader = repository.newObjectReader();

        // get the local branches
        List<RefModel> branches = JGitUtils.getLocalBranches(repository, true, -1);

        // sort them by most recently updated
        Collections.sort(branches, new Comparator<RefModel>() {
            @Override
            public int compare(RefModel ref1, RefModel ref2) {
                return ref2.getDate().compareTo(ref1.getDate());
            }
        });

        // reorder default branch to first position
        RefModel defaultBranch = null;
        ObjectId defaultBranchId = JGitUtils.getDefaultBranch(repository);
        for (RefModel branch : branches) {
            if (branch.getObjectId().equals(defaultBranchId)) {
                defaultBranch = branch;
                break;
            }
        }
        branches.remove(defaultBranch);
        branches.add(0, defaultBranch);

        // walk through each branch
        for (RefModel branch : branches) {

            boolean indexBranch = false;
            if (model.indexedBranches.contains(com.gitblit.Constants.DEFAULT_BRANCH)
                    && branch.equals(defaultBranch)) {
                // indexing "default" branch
                indexBranch = true;
            } else if (branch.getName().startsWith(com.gitblit.Constants.R_META)) {
                // skip internal meta branches
                indexBranch = false;
            } else {
                // normal explicit branch check
                indexBranch = model.indexedBranches.contains(branch.getName());
            }

            // if this branch is not specifically indexed then skip
            if (!indexBranch) {
                continue;
            }

            String branchName = branch.getName();
            RevWalk revWalk = new RevWalk(reader);
            RevCommit tip = revWalk.parseCommit(branch.getObjectId());
            String tipId = tip.getId().getName();

            String keyName = getBranchKey(branchName);
            config.setString(CONF_ALIAS, null, keyName, branchName);
            config.setString(CONF_BRANCH, null, keyName, tipId);

            // index the blob contents of the tree
            TreeWalk treeWalk = new TreeWalk(repository);
            treeWalk.addTree(tip.getTree());
            treeWalk.setRecursive(true);

            Map<String, ObjectId> paths = new TreeMap<String, ObjectId>();
            while (treeWalk.next()) {
                // ensure path is not in a submodule
                if (treeWalk.getFileMode(0) != FileMode.GITLINK) {
                    paths.put(treeWalk.getPathString(), treeWalk.getObjectId(0));
                }
            }

            ByteArrayOutputStream os = new ByteArrayOutputStream();
            byte[] tmp = new byte[32767];

            RevWalk commitWalk = new RevWalk(reader);
            commitWalk.markStart(tip);

            RevCommit commit;
            while ((paths.size() > 0) && (commit = commitWalk.next()) != null) {
                TreeWalk diffWalk = new TreeWalk(reader);
                int parentCount = commit.getParentCount();
                switch (parentCount) {
                case 0:
                    diffWalk.addTree(new EmptyTreeIterator());
                    break;
                case 1:
                    diffWalk.addTree(getTree(commitWalk, commit.getParent(0)));
                    break;
                default:
                    // skip merge commits
                    continue;
                }
                diffWalk.addTree(getTree(commitWalk, commit));
                diffWalk.setFilter(ANY_DIFF);
                diffWalk.setRecursive(true);
                while ((paths.size() > 0) && diffWalk.next()) {
                    String path = diffWalk.getPathString();
                    if (!paths.containsKey(path)) {
                        continue;
                    }

                    // remove path from set
                    ObjectId blobId = paths.remove(path);
                    result.blobCount++;

                    // index the blob metadata
                    String blobAuthor = getAuthor(commit);
                    String blobCommitter = getCommitter(commit);
                    String blobDate = DateTools.timeToString(commit.getCommitTime() * 1000L, Resolution.MINUTE);

                    Document doc = new Document();
                    doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.blob.name(),
                            StringField.TYPE_STORED));
                    doc.add(new Field(FIELD_BRANCH, branchName, TextField.TYPE_STORED));
                    doc.add(new Field(FIELD_COMMIT, commit.getName(), TextField.TYPE_STORED));
                    doc.add(new Field(FIELD_PATH, path, TextField.TYPE_STORED));
                    doc.add(new Field(FIELD_DATE, blobDate, StringField.TYPE_STORED));
                    doc.add(new Field(FIELD_AUTHOR, blobAuthor, TextField.TYPE_STORED));
                    doc.add(new Field(FIELD_COMMITTER, blobCommitter, TextField.TYPE_STORED));

                    // determine extension to compare to the extension
                    // blacklist
                    String ext = null;
                    String name = path.toLowerCase();
                    if (name.indexOf('.') > -1) {
                        ext = name.substring(name.lastIndexOf('.') + 1);
                    }

                    // index the blob content
                    if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) {
                        ObjectLoader ldr = repository.open(blobId, Constants.OBJ_BLOB);
                        InputStream in = ldr.openStream();
                        int n;
                        while ((n = in.read(tmp)) > 0) {
                            os.write(tmp, 0, n);
                        }
                        in.close();
                        byte[] content = os.toByteArray();
                        String str = StringUtils.decodeString(content, encodings);
                        doc.add(new Field(FIELD_CONTENT, str, TextField.TYPE_STORED));
                        os.reset();
                    }

                    // add the blob to the index
                    writer.addDocument(doc);
                }
            }

            os.close();

            // index the tip commit object
            if (indexedCommits.add(tipId)) {
                Document doc = createDocument(tip, tags.get(tipId));
                doc.add(new Field(FIELD_BRANCH, branchName, TextField.TYPE_STORED));
                writer.addDocument(doc);
                result.commitCount += 1;
                result.branchCount += 1;
            }

            // traverse the log and index the previous commit objects
            RevWalk historyWalk = new RevWalk(reader);
            historyWalk.markStart(historyWalk.parseCommit(tip.getId()));
            RevCommit rev;
            while ((rev = historyWalk.next()) != null) {
                String hash = rev.getId().getName();
                if (indexedCommits.add(hash)) {
                    Document doc = createDocument(rev, tags.get(hash));
                    doc.add(new Field(FIELD_BRANCH, branchName, TextField.TYPE_STORED));
                    writer.addDocument(doc);
                    result.commitCount += 1;
                }
            }
        }

        // finished
        reader.close();

        // commit all changes and reset the searcher
        config.save();
        writer.commit();
        resetIndexSearcher(model.name);
        result.success();
    } catch (Exception e) {
        logger.error("Exception while reindexing " + model.name, e);
    }
    return result;
}

From source file:com.gitblit.service.LuceneService.java

License:Apache License

/**
 * Incrementally update the index with the specified commit for the
 * repository.//from  w ww. ja  v a2  s  .c o  m
 *
 * @param repositoryName
 * @param repository
 * @param branch
 *            the fully qualified branch name (e.g. refs/heads/master)
 * @param commit
 * @return true, if successful
 */
private IndexResult index(String repositoryName, Repository repository, String branch, RevCommit commit) {
    IndexResult result = new IndexResult();
    try {
        String[] encodings = storedSettings.getStrings(Keys.web.blobEncodings).toArray(new String[0]);
        List<PathChangeModel> changedPaths = JGitUtils.getFilesInCommit(repository, commit);
        String revDate = DateTools.timeToString(commit.getCommitTime() * 1000L, Resolution.MINUTE);
        IndexWriter writer = getIndexWriter(repositoryName);
        for (PathChangeModel path : changedPaths) {
            if (path.isSubmodule()) {
                continue;
            }
            // delete the indexed blob
            deleteBlob(repositoryName, branch, path.name);

            // re-index the blob
            if (!ChangeType.DELETE.equals(path.changeType)) {
                result.blobCount++;
                Document doc = new Document();
                doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.blob.name(), StringField.TYPE_STORED));
                doc.add(new Field(FIELD_BRANCH, branch, TextField.TYPE_STORED));
                doc.add(new Field(FIELD_COMMIT, commit.getName(), TextField.TYPE_STORED));
                doc.add(new Field(FIELD_PATH, path.path, TextField.TYPE_STORED));
                doc.add(new Field(FIELD_DATE, revDate, StringField.TYPE_STORED));
                doc.add(new Field(FIELD_AUTHOR, getAuthor(commit), TextField.TYPE_STORED));
                doc.add(new Field(FIELD_COMMITTER, getCommitter(commit), TextField.TYPE_STORED));

                // determine extension to compare to the extension
                // blacklist
                String ext = null;
                String name = path.name.toLowerCase();
                if (name.indexOf('.') > -1) {
                    ext = name.substring(name.lastIndexOf('.') + 1);
                }

                if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) {
                    // read the blob content
                    String str = JGitUtils.getStringContent(repository, commit.getTree(), path.path, encodings);
                    if (str != null) {
                        doc.add(new Field(FIELD_CONTENT, str, TextField.TYPE_STORED));
                        writer.addDocument(doc);
                    }
                }
            }
        }
        writer.commit();

        // get any annotated commit tags
        List<String> commitTags = new ArrayList<String>();
        for (RefModel ref : JGitUtils.getTags(repository, false, -1)) {
            if (ref.isAnnotatedTag() && ref.getReferencedObjectId().equals(commit.getId())) {
                commitTags.add(ref.displayName);
            }
        }

        // create and write the Lucene document
        Document doc = createDocument(commit, commitTags);
        doc.add(new Field(FIELD_BRANCH, branch, TextField.TYPE_STORED));
        result.commitCount++;
        result.success = index(repositoryName, doc);
    } catch (Exception e) {
        logger.error(MessageFormat.format("Exception while indexing commit {0} in {1}",
                commit.getId().getName(), repositoryName), e);
    }
    return result;
}

From source file:com.gitblit.tickets.TicketIndexer.java

License:Apache License

/**
 * Bulk Add/Update tickets in the Lucene index
 *
 * @param tickets/*  www . j ava  2s . co  m*/
 */
public void index(List<TicketModel> tickets) {
    try {
        IndexWriter writer = getWriter();
        for (TicketModel ticket : tickets) {
            Document doc = ticketToDoc(ticket);
            writer.addDocument(doc);
        }
        writer.commit();
        closeSearcher();
    } catch (Exception e) {
        log.error("error", e);
    }
}

From source file:com.gitblit.tickets.TicketIndexer.java

License:Apache License

/**
 * Add/Update a ticket in the Lucene index
 *
 * @param ticket/*w  ww .j  a v a2 s.  c o  m*/
 */
public void index(TicketModel ticket) {
    try {
        IndexWriter writer = getWriter();
        delete(ticket.repository, ticket.number, writer);
        Document doc = ticketToDoc(ticket);
        writer.addDocument(doc);
        writer.commit();
        closeSearcher();
    } catch (Exception e) {
        log.error("error", e);
    }
}

From source file:com.github.buzztaiki.lucene.lastuni.CJKSingleCharQueryTest.java

License:Apache License

private void addDoc(IndexWriter writer, String content) throws IOException {
    Document doc = new Document();
    doc.add(newTextField("content", content, Field.Store.YES));
    writer.addDocument(doc);
}

From source file:com.github.flaxsearch.testutil.GutenbergIndex.java

License:Apache License

public static void writeDocuments(IndexWriter writer, Path source) throws IOException {
    int count = 0;
    try (DirectoryStream<Path> directory = Files.newDirectoryStream(source)) {
        for (Path file : directory) {
            byte[] data = Files.readAllBytes(file);
            writer.addDocument(buildDocument(file, data));
            if (count++ % 7 == 0)
                writer.commit();// w w w .j ava2s  .c o  m
        }
    }
}

From source file:com.github.lucene.store.CreateJavaTestIndex.java

License:Apache License

public static void populate(final Directory directory, final Analyzer analyzer)
        throws IOException, ParseException {
    final String dataDir = new File("src").getAbsolutePath();
    final List<File> results = new ArrayList<File>();
    findFiles(results, new File(dataDir));

    final IndexWriterConfig config = TestUtils.getIndexWriterConfig(analyzer, openMode, useCompoundFile);
    final IndexWriter writer = new IndexWriter(directory, config);
    for (final File file : results) {
        final Document doc = getDocument(dataDir, file);
        writer.addDocument(doc);
    }/*from   w  ww  . jav a2  s.c  o m*/
    writer.close();
}

From source file:com.github.lucene.store.CreateTestIndex.java

License:Apache License

public static void populate(final Directory directory, final Analyzer analyzer)
        throws IOException, ParseException {
    final String dataDir = new File("target/test-classes/data").getAbsolutePath();
    final List<File> results = new ArrayList<File>();
    findFiles(results, new File(dataDir));

    final IndexWriterConfig config = TestUtils.getIndexWriterConfig(analyzer, openMode, useCompoundFile);
    final IndexWriter writer = new IndexWriter(directory, config);
    for (final File file : results) {
        final Document doc = getDocument(dataDir, file);
        writer.addDocument(doc);
    }// w w w  . j a v  a 2  s  .  c  o  m
    writer.close();
}

From source file:com.github.lucene.store.jdbc.AbstractJdbcDirectoryITest.java

License:Apache License

protected void addDocuments(final Directory directory, final OpenMode openMode, final boolean useCompoundFile,
        final Collection<String> docs) throws IOException {
    final IndexWriterConfig config = new IndexWriterConfig(analyzer);
    config.setOpenMode(OpenMode.CREATE);
    config.setUseCompoundFile(useCompoundFile);

    final DirectoryTemplate template = new DirectoryTemplate(directory);
    template.execute(new DirectoryTemplate.DirectoryCallbackWithoutResult() {
        @Override//from www  .ja va2  s. c  om
        public void doInDirectoryWithoutResult(final Directory dir) throws IOException {
            final IndexWriter writer = new IndexWriter(dir, config);
            for (final Object element : docs) {
                final Document doc = new Document();
                final String word = (String) element;
                // FIXME: review
                // doc.add(new Field("keyword", word, Field.Store.YES,
                // Field.Index.UN_TOKENIZED));
                // doc.add(new Field("unindexed", word, Field.Store.YES,
                // Field.Index.NO));
                // doc.add(new Field("unstored", word, Field.Store.NO,
                // Field.Index.TOKENIZED));
                // doc.add(new Field("text", word, Field.Store.YES,
                // Field.Index.TOKENIZED));
                doc.add(new StringField("keyword", word, Field.Store.YES));
                doc.add(new StringField("unindexed", word, Field.Store.YES));
                doc.add(new StringField("unstored", word, Field.Store.NO));
                doc.add(new StringField("text", word, Field.Store.YES));
                writer.addDocument(doc);
            }

            // FIXME: review
            // writer.optimize();
            writer.close();
        }
    });
}

From source file:com.github.mosuka.apache.lucene.example.cmd.AddCommand.java

License:Apache License

@Override
public void execute(Map<String, Object> attrs) {
    Map<String, Object> responseMap = new LinkedHashMap<String, Object>();

    String responseJSON = null;/*from  ww w .  j  a  v  a  2s .c o m*/
    Directory indexDir = null;
    IndexWriter writer = null;

    try {
        String index = (String) attrs.get("index");
        String uniqueId = (String) attrs.get("unique_id");
        String text = (String) attrs.get("text");

        indexDir = FSDirectory.open(new File(index).toPath());

        Document document = LuceneExampleUtil.createDocument(uniqueId, text);

        IndexWriterConfig config = new IndexWriterConfig(LuceneExampleUtil.createAnalyzerWrapper());
        config.setOpenMode(OpenMode.CREATE_OR_APPEND);

        writer = new IndexWriter(indexDir, config);
        writer.addDocument(document);
        writer.commit();

        responseMap.put("status", 0);
        responseMap.put("message", "OK");
    } catch (IOException e) {
        responseMap.put("status", 1);
        responseMap.put("message", e.getMessage());
    } finally {
        try {
            if (writer != null) {
                writer.close();
            }
        } catch (IOException e) {
            responseMap.put("status", 1);
            responseMap.put("message", e.getMessage());
        }
        try {
            if (indexDir != null) {
                indexDir.close();
            }
        } catch (IOException e) {
            responseMap.put("status", 1);
            responseMap.put("message", e.getMessage());
        }
    }

    try {
        ObjectMapper mapper = new ObjectMapper();
        responseJSON = mapper.writeValueAsString(responseMap);
    } catch (IOException e) {
        responseJSON = String.format("{\"status\":1, \"message\":\"%s\"}", e.getMessage());
    }
    System.out.println(responseJSON);
}