List of usage examples for org.apache.lucene.index IndexWriter addDocument
public long addDocument(Iterable<? extends IndexableField> doc) throws IOException
From source file:com.gitblit.service.LuceneService.java
License:Apache License
/** * This completely indexes the repository and will destroy any existing * index./* w w w.jav a 2 s . c o m*/ * * @param repositoryName * @param repository * @return IndexResult */ public IndexResult reindex(RepositoryModel model, Repository repository) { IndexResult result = new IndexResult(); if (!deleteIndex(model.name)) { return result; } try { String[] encodings = storedSettings.getStrings(Keys.web.blobEncodings).toArray(new String[0]); FileBasedConfig config = getConfig(repository); Set<String> indexedCommits = new TreeSet<String>(); IndexWriter writer = getIndexWriter(model.name); // build a quick lookup of tags Map<String, List<String>> tags = new HashMap<String, List<String>>(); for (RefModel tag : JGitUtils.getTags(repository, false, -1)) { if (!tag.isAnnotatedTag()) { // skip non-annotated tags continue; } if (!tags.containsKey(tag.getReferencedObjectId().getName())) { tags.put(tag.getReferencedObjectId().getName(), new ArrayList<String>()); } tags.get(tag.getReferencedObjectId().getName()).add(tag.displayName); } ObjectReader reader = repository.newObjectReader(); // get the local branches List<RefModel> branches = JGitUtils.getLocalBranches(repository, true, -1); // sort them by most recently updated Collections.sort(branches, new Comparator<RefModel>() { @Override public int compare(RefModel ref1, RefModel ref2) { return ref2.getDate().compareTo(ref1.getDate()); } }); // reorder default branch to first position RefModel defaultBranch = null; ObjectId defaultBranchId = JGitUtils.getDefaultBranch(repository); for (RefModel branch : branches) { if (branch.getObjectId().equals(defaultBranchId)) { defaultBranch = branch; break; } } branches.remove(defaultBranch); branches.add(0, defaultBranch); // walk through each branch for (RefModel branch : branches) { boolean indexBranch = false; if (model.indexedBranches.contains(com.gitblit.Constants.DEFAULT_BRANCH) && branch.equals(defaultBranch)) { // indexing "default" branch indexBranch = true; } else if (branch.getName().startsWith(com.gitblit.Constants.R_META)) { // skip internal meta branches indexBranch = false; } else { // normal explicit branch check indexBranch = model.indexedBranches.contains(branch.getName()); } // if this branch is not specifically indexed then skip if (!indexBranch) { continue; } String branchName = branch.getName(); RevWalk revWalk = new RevWalk(reader); RevCommit tip = revWalk.parseCommit(branch.getObjectId()); String tipId = tip.getId().getName(); String keyName = getBranchKey(branchName); config.setString(CONF_ALIAS, null, keyName, branchName); config.setString(CONF_BRANCH, null, keyName, tipId); // index the blob contents of the tree TreeWalk treeWalk = new TreeWalk(repository); treeWalk.addTree(tip.getTree()); treeWalk.setRecursive(true); Map<String, ObjectId> paths = new TreeMap<String, ObjectId>(); while (treeWalk.next()) { // ensure path is not in a submodule if (treeWalk.getFileMode(0) != FileMode.GITLINK) { paths.put(treeWalk.getPathString(), treeWalk.getObjectId(0)); } } ByteArrayOutputStream os = new ByteArrayOutputStream(); byte[] tmp = new byte[32767]; RevWalk commitWalk = new RevWalk(reader); commitWalk.markStart(tip); RevCommit commit; while ((paths.size() > 0) && (commit = commitWalk.next()) != null) { TreeWalk diffWalk = new TreeWalk(reader); int parentCount = commit.getParentCount(); switch (parentCount) { case 0: diffWalk.addTree(new EmptyTreeIterator()); break; case 1: diffWalk.addTree(getTree(commitWalk, commit.getParent(0))); break; default: // skip merge commits continue; } diffWalk.addTree(getTree(commitWalk, commit)); diffWalk.setFilter(ANY_DIFF); diffWalk.setRecursive(true); while ((paths.size() > 0) && diffWalk.next()) { String path = diffWalk.getPathString(); if (!paths.containsKey(path)) { continue; } // remove path from set ObjectId blobId = paths.remove(path); result.blobCount++; // index the blob metadata String blobAuthor = getAuthor(commit); String blobCommitter = getCommitter(commit); String blobDate = DateTools.timeToString(commit.getCommitTime() * 1000L, Resolution.MINUTE); Document doc = new Document(); doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.blob.name(), StringField.TYPE_STORED)); doc.add(new Field(FIELD_BRANCH, branchName, TextField.TYPE_STORED)); doc.add(new Field(FIELD_COMMIT, commit.getName(), TextField.TYPE_STORED)); doc.add(new Field(FIELD_PATH, path, TextField.TYPE_STORED)); doc.add(new Field(FIELD_DATE, blobDate, StringField.TYPE_STORED)); doc.add(new Field(FIELD_AUTHOR, blobAuthor, TextField.TYPE_STORED)); doc.add(new Field(FIELD_COMMITTER, blobCommitter, TextField.TYPE_STORED)); // determine extension to compare to the extension // blacklist String ext = null; String name = path.toLowerCase(); if (name.indexOf('.') > -1) { ext = name.substring(name.lastIndexOf('.') + 1); } // index the blob content if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) { ObjectLoader ldr = repository.open(blobId, Constants.OBJ_BLOB); InputStream in = ldr.openStream(); int n; while ((n = in.read(tmp)) > 0) { os.write(tmp, 0, n); } in.close(); byte[] content = os.toByteArray(); String str = StringUtils.decodeString(content, encodings); doc.add(new Field(FIELD_CONTENT, str, TextField.TYPE_STORED)); os.reset(); } // add the blob to the index writer.addDocument(doc); } } os.close(); // index the tip commit object if (indexedCommits.add(tipId)) { Document doc = createDocument(tip, tags.get(tipId)); doc.add(new Field(FIELD_BRANCH, branchName, TextField.TYPE_STORED)); writer.addDocument(doc); result.commitCount += 1; result.branchCount += 1; } // traverse the log and index the previous commit objects RevWalk historyWalk = new RevWalk(reader); historyWalk.markStart(historyWalk.parseCommit(tip.getId())); RevCommit rev; while ((rev = historyWalk.next()) != null) { String hash = rev.getId().getName(); if (indexedCommits.add(hash)) { Document doc = createDocument(rev, tags.get(hash)); doc.add(new Field(FIELD_BRANCH, branchName, TextField.TYPE_STORED)); writer.addDocument(doc); result.commitCount += 1; } } } // finished reader.close(); // commit all changes and reset the searcher config.save(); writer.commit(); resetIndexSearcher(model.name); result.success(); } catch (Exception e) { logger.error("Exception while reindexing " + model.name, e); } return result; }
From source file:com.gitblit.service.LuceneService.java
License:Apache License
/** * Incrementally update the index with the specified commit for the * repository.//from w ww. ja v a2 s .c o m * * @param repositoryName * @param repository * @param branch * the fully qualified branch name (e.g. refs/heads/master) * @param commit * @return true, if successful */ private IndexResult index(String repositoryName, Repository repository, String branch, RevCommit commit) { IndexResult result = new IndexResult(); try { String[] encodings = storedSettings.getStrings(Keys.web.blobEncodings).toArray(new String[0]); List<PathChangeModel> changedPaths = JGitUtils.getFilesInCommit(repository, commit); String revDate = DateTools.timeToString(commit.getCommitTime() * 1000L, Resolution.MINUTE); IndexWriter writer = getIndexWriter(repositoryName); for (PathChangeModel path : changedPaths) { if (path.isSubmodule()) { continue; } // delete the indexed blob deleteBlob(repositoryName, branch, path.name); // re-index the blob if (!ChangeType.DELETE.equals(path.changeType)) { result.blobCount++; Document doc = new Document(); doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.blob.name(), StringField.TYPE_STORED)); doc.add(new Field(FIELD_BRANCH, branch, TextField.TYPE_STORED)); doc.add(new Field(FIELD_COMMIT, commit.getName(), TextField.TYPE_STORED)); doc.add(new Field(FIELD_PATH, path.path, TextField.TYPE_STORED)); doc.add(new Field(FIELD_DATE, revDate, StringField.TYPE_STORED)); doc.add(new Field(FIELD_AUTHOR, getAuthor(commit), TextField.TYPE_STORED)); doc.add(new Field(FIELD_COMMITTER, getCommitter(commit), TextField.TYPE_STORED)); // determine extension to compare to the extension // blacklist String ext = null; String name = path.name.toLowerCase(); if (name.indexOf('.') > -1) { ext = name.substring(name.lastIndexOf('.') + 1); } if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) { // read the blob content String str = JGitUtils.getStringContent(repository, commit.getTree(), path.path, encodings); if (str != null) { doc.add(new Field(FIELD_CONTENT, str, TextField.TYPE_STORED)); writer.addDocument(doc); } } } } writer.commit(); // get any annotated commit tags List<String> commitTags = new ArrayList<String>(); for (RefModel ref : JGitUtils.getTags(repository, false, -1)) { if (ref.isAnnotatedTag() && ref.getReferencedObjectId().equals(commit.getId())) { commitTags.add(ref.displayName); } } // create and write the Lucene document Document doc = createDocument(commit, commitTags); doc.add(new Field(FIELD_BRANCH, branch, TextField.TYPE_STORED)); result.commitCount++; result.success = index(repositoryName, doc); } catch (Exception e) { logger.error(MessageFormat.format("Exception while indexing commit {0} in {1}", commit.getId().getName(), repositoryName), e); } return result; }
From source file:com.gitblit.tickets.TicketIndexer.java
License:Apache License
/** * Bulk Add/Update tickets in the Lucene index * * @param tickets/* www . j ava 2s . co m*/ */ public void index(List<TicketModel> tickets) { try { IndexWriter writer = getWriter(); for (TicketModel ticket : tickets) { Document doc = ticketToDoc(ticket); writer.addDocument(doc); } writer.commit(); closeSearcher(); } catch (Exception e) { log.error("error", e); } }
From source file:com.gitblit.tickets.TicketIndexer.java
License:Apache License
/** * Add/Update a ticket in the Lucene index * * @param ticket/*w ww .j a v a2 s. c o m*/ */ public void index(TicketModel ticket) { try { IndexWriter writer = getWriter(); delete(ticket.repository, ticket.number, writer); Document doc = ticketToDoc(ticket); writer.addDocument(doc); writer.commit(); closeSearcher(); } catch (Exception e) { log.error("error", e); } }
From source file:com.github.buzztaiki.lucene.lastuni.CJKSingleCharQueryTest.java
License:Apache License
private void addDoc(IndexWriter writer, String content) throws IOException { Document doc = new Document(); doc.add(newTextField("content", content, Field.Store.YES)); writer.addDocument(doc); }
From source file:com.github.flaxsearch.testutil.GutenbergIndex.java
License:Apache License
public static void writeDocuments(IndexWriter writer, Path source) throws IOException { int count = 0; try (DirectoryStream<Path> directory = Files.newDirectoryStream(source)) { for (Path file : directory) { byte[] data = Files.readAllBytes(file); writer.addDocument(buildDocument(file, data)); if (count++ % 7 == 0) writer.commit();// w w w .j ava2s .c o m } } }
From source file:com.github.lucene.store.CreateJavaTestIndex.java
License:Apache License
public static void populate(final Directory directory, final Analyzer analyzer) throws IOException, ParseException { final String dataDir = new File("src").getAbsolutePath(); final List<File> results = new ArrayList<File>(); findFiles(results, new File(dataDir)); final IndexWriterConfig config = TestUtils.getIndexWriterConfig(analyzer, openMode, useCompoundFile); final IndexWriter writer = new IndexWriter(directory, config); for (final File file : results) { final Document doc = getDocument(dataDir, file); writer.addDocument(doc); }/*from w ww . jav a2 s.c o m*/ writer.close(); }
From source file:com.github.lucene.store.CreateTestIndex.java
License:Apache License
public static void populate(final Directory directory, final Analyzer analyzer) throws IOException, ParseException { final String dataDir = new File("target/test-classes/data").getAbsolutePath(); final List<File> results = new ArrayList<File>(); findFiles(results, new File(dataDir)); final IndexWriterConfig config = TestUtils.getIndexWriterConfig(analyzer, openMode, useCompoundFile); final IndexWriter writer = new IndexWriter(directory, config); for (final File file : results) { final Document doc = getDocument(dataDir, file); writer.addDocument(doc); }// w w w . j a v a 2 s . c o m writer.close(); }
From source file:com.github.lucene.store.jdbc.AbstractJdbcDirectoryITest.java
License:Apache License
protected void addDocuments(final Directory directory, final OpenMode openMode, final boolean useCompoundFile, final Collection<String> docs) throws IOException { final IndexWriterConfig config = new IndexWriterConfig(analyzer); config.setOpenMode(OpenMode.CREATE); config.setUseCompoundFile(useCompoundFile); final DirectoryTemplate template = new DirectoryTemplate(directory); template.execute(new DirectoryTemplate.DirectoryCallbackWithoutResult() { @Override//from www .ja va2 s. c om public void doInDirectoryWithoutResult(final Directory dir) throws IOException { final IndexWriter writer = new IndexWriter(dir, config); for (final Object element : docs) { final Document doc = new Document(); final String word = (String) element; // FIXME: review // doc.add(new Field("keyword", word, Field.Store.YES, // Field.Index.UN_TOKENIZED)); // doc.add(new Field("unindexed", word, Field.Store.YES, // Field.Index.NO)); // doc.add(new Field("unstored", word, Field.Store.NO, // Field.Index.TOKENIZED)); // doc.add(new Field("text", word, Field.Store.YES, // Field.Index.TOKENIZED)); doc.add(new StringField("keyword", word, Field.Store.YES)); doc.add(new StringField("unindexed", word, Field.Store.YES)); doc.add(new StringField("unstored", word, Field.Store.NO)); doc.add(new StringField("text", word, Field.Store.YES)); writer.addDocument(doc); } // FIXME: review // writer.optimize(); writer.close(); } }); }
From source file:com.github.mosuka.apache.lucene.example.cmd.AddCommand.java
License:Apache License
@Override public void execute(Map<String, Object> attrs) { Map<String, Object> responseMap = new LinkedHashMap<String, Object>(); String responseJSON = null;/*from ww w . j a v a 2s .c o m*/ Directory indexDir = null; IndexWriter writer = null; try { String index = (String) attrs.get("index"); String uniqueId = (String) attrs.get("unique_id"); String text = (String) attrs.get("text"); indexDir = FSDirectory.open(new File(index).toPath()); Document document = LuceneExampleUtil.createDocument(uniqueId, text); IndexWriterConfig config = new IndexWriterConfig(LuceneExampleUtil.createAnalyzerWrapper()); config.setOpenMode(OpenMode.CREATE_OR_APPEND); writer = new IndexWriter(indexDir, config); writer.addDocument(document); writer.commit(); responseMap.put("status", 0); responseMap.put("message", "OK"); } catch (IOException e) { responseMap.put("status", 1); responseMap.put("message", e.getMessage()); } finally { try { if (writer != null) { writer.close(); } } catch (IOException e) { responseMap.put("status", 1); responseMap.put("message", e.getMessage()); } try { if (indexDir != null) { indexDir.close(); } } catch (IOException e) { responseMap.put("status", 1); responseMap.put("message", e.getMessage()); } } try { ObjectMapper mapper = new ObjectMapper(); responseJSON = mapper.writeValueAsString(responseMap); } catch (IOException e) { responseJSON = String.format("{\"status\":1, \"message\":\"%s\"}", e.getMessage()); } System.out.println(responseJSON); }