List of usage examples for org.apache.lucene.index IndexWriter addDocument
public long addDocument(Iterable<? extends IndexableField> doc) throws IOException
From source file:com.foundationdb.lucene.SimpleTest.java
License:Open Source License
@Test public void indexBasic() throws Exception { StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_44); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_44, analyzer); // recreate the index on each execution config.setOpenMode(IndexWriterConfig.OpenMode.CREATE); config.setCodec(new FDBCodec()); FDBDirectory dir = createDirectoryForMethod(); IndexWriter writer = new IndexWriter(dir, config); try {//from w w w .j a v a2 s .c om writer.addDocument(Arrays.asList(new TextField("title", "The title of my first document", Store.YES), new TextField("content", "The content of the first document", Store.NO))); writer.addDocument(Arrays.asList(new TextField("title", "The title of the second document", Store.YES), new TextField("content", "And this is the content", Store.NO))); } finally { writer.close(); } assertDocumentsAreThere(dir, 2); }
From source file:com.fuerve.villageelder.actions.results.SearchResultItemTest.java
License:Apache License
private void buildDummyIndex(final Directory indexDirectory, final Directory taxonomyDirectory) throws IOException { IndexWriterConfig iwc = new IndexWriterConfig(Lucene.LUCENE_VERSION, Lucene.getPerFieldAnalyzer()); iwc.setOpenMode(OpenMode.CREATE);//from w w w.j av a 2s . c om IndexWriter iw = new IndexWriter(indexDirectory, iwc); TaxonomyWriter tw = new DirectoryTaxonomyWriter(taxonomyDirectory, OpenMode.CREATE); List<CategoryPath> categories = new ArrayList<CategoryPath>(); FacetFields facetFields = new FacetFields(tw); Document doc = new Document(); categories.clear(); doc.add(new StringField("Author", "foo", Store.YES)); categories.add(new CategoryPath("Author", "foo")); doc.add(new LongField("RevisionNumber", 50L, Store.YES)); doc.add(new StringField("Revision", "50", Store.YES)); doc.add(new TextField("Message", "stuff", Store.YES)); iw.addDocument(doc); facetFields.addFields(doc, categories); doc = new Document(); facetFields = new FacetFields(tw); categories.clear(); doc.add(new StringField("Author", "bar", Store.YES)); categories.add(new CategoryPath("Author", "bar")); doc.add(new LongField("RevisionNumber", 5000L, Store.YES)); doc.add(new StringField("Revision", "5000", Store.YES)); doc.add(new TextField("Message", "stuff", Store.YES)); iw.addDocument(doc); facetFields.addFields(doc, categories); tw.commit(); tw.close(); iw.commit(); iw.close(); }
From source file:com.fuerve.villageelder.indexing.IndexerTest.java
License:Apache License
/** * Test method for {@link com.fuerve.villageelder.indexing.Indexer#Indexer(org.apache.lucene.store.Directory, org.apache.lucene.store.Directory, org.apache.lucene.index.IndexWriterConfig.OpenMode)}. *//*w w w . j a va 2 s .com*/ @Test public final void testIndexerDirectoryDirectoryOpenMode() throws Exception { RAMDirectory indexDirectory = new RAMDirectory(); RAMDirectory taxonomyDirectory = new RAMDirectory(); Field idField = IndexManager.class.getDeclaredField("indexDirectory"); Field tdField = IndexManager.class.getDeclaredField("taxonomyDirectory"); Field iwField = IndexManager.class.getDeclaredField("indexWriter"); Field twField = IndexManager.class.getDeclaredField("taxonomyWriter"); Field stField = IndexManager.class.getDeclaredField("stringDirectories"); Field initField = IndexManager.class.getDeclaredField("initialized"); Field imField = Indexer.class.getDeclaredField("indexManager"); idField.setAccessible(true); tdField.setAccessible(true); iwField.setAccessible(true); twField.setAccessible(true); stField.setAccessible(true); initField.setAccessible(true); imField.setAccessible(true); Indexer target = new Indexer(indexDirectory, taxonomyDirectory, OpenMode.CREATE); target.initializeIndex(); IndexManager testManager = (IndexManager) imField.get(target); TaxonomyWriter tw = (TaxonomyWriter) twField.get(testManager); IndexWriter iw = (IndexWriter) iwField.get(testManager); tw.addCategory(new CategoryPath("test/stuff", '/')); Document doc = new Document(); doc.add(new LongField("testfield", 1000L, Store.YES)); iw.addDocument(doc); target.dispose(); // TEST: Initializing an index, disposing it and initializing another // index instance on the same Directories results in loading the same // index. Indexer target2 = new Indexer(indexDirectory, taxonomyDirectory, OpenMode.APPEND); target2.initializeIndex(); testManager = (IndexManager) imField.get(target2); iw = (IndexWriter) iwField.get(testManager); tw = (TaxonomyWriter) twField.get(testManager); assertEquals(1, iw.numDocs()); assertEquals(3, tw.getSize()); target2.dispose(); }
From source file:com.fuerve.villageelder.indexing.IndexManagerTest.java
License:Apache License
/** * Test method for {@link com.fuerve.villageelder.indexing.IndexManager#IndexManager(org.apache.lucene.store.Directory, org.apache.lucene.store.Directory, org.apache.lucene.index.IndexWriterConfig.OpenMode)}. *///from ww w .ja va 2s .c o m @Test public final void testIndexManagerDirectoryDirectoryOpenMode() throws Exception { RAMDirectory indexDirectory = new RAMDirectory(); RAMDirectory taxonomyDirectory = new RAMDirectory(); Field idField = IndexManager.class.getDeclaredField("indexDirectory"); Field tdField = IndexManager.class.getDeclaredField("taxonomyDirectory"); Field iwField = IndexManager.class.getDeclaredField("indexWriter"); Field twField = IndexManager.class.getDeclaredField("taxonomyWriter"); Field stField = IndexManager.class.getDeclaredField("stringDirectories"); Field initField = IndexManager.class.getDeclaredField("initialized"); idField.setAccessible(true); tdField.setAccessible(true); iwField.setAccessible(true); twField.setAccessible(true); stField.setAccessible(true); initField.setAccessible(true); IndexManager target = new IndexManager(indexDirectory, taxonomyDirectory, OpenMode.CREATE); target.initializeIndex(); TaxonomyWriter tw = (TaxonomyWriter) twField.get(target); IndexWriter iw = (IndexWriter) iwField.get(target); tw.addCategory(new CategoryPath("test/stuff", '/')); Document doc = new Document(); doc.add(new LongField("testfield", 1000L, Store.YES)); iw.addDocument(doc); target.dispose(); // TEST: Initializing an index, disposing it and initializing another // index instance on the same Directories results in loading the same // index. IndexManager target2 = new IndexManager(indexDirectory, taxonomyDirectory, OpenMode.APPEND); target2.initializeIndex(); iw = (IndexWriter) iwField.get(target2); tw = (TaxonomyWriter) twField.get(target2); assertEquals(1, iw.numDocs()); assertEquals(3, tw.getSize()); target2.dispose(); }
From source file:com.fuerve.villageelder.search.SearchQueryParserTest.java
License:Apache License
private IndexReader buildDummyIndex() throws IOException { RAMDirectory indexDirectory = new RAMDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(Lucene.LUCENE_VERSION, Lucene.getPerFieldAnalyzer()); iwc.setOpenMode(OpenMode.CREATE);//ww w . jav a 2s.c om IndexWriter iw = new IndexWriter(indexDirectory, iwc); Document doc = new Document(); doc.add(new StringField("Author", "foo", Field.Store.YES)); doc.add(new LongField("RevisionNumber", 50L, Field.Store.YES)); doc.add(new StringField("Revision", "50", Field.Store.YES)); doc.add(new TextField("Message", "stuff", Field.Store.YES)); iw.addDocument(doc); doc = new Document(); doc.add(new StringField("Author", "bar", Field.Store.YES)); doc.add(new LongField("RevisionNumber", 5000L, Field.Store.YES)); doc.add(new StringField("Revision", "5000", Field.Store.YES)); doc.add(new TextField("Message", "stuff", Field.Store.YES)); iw.addDocument(doc); iw.commit(); iw.close(); DirectoryReader result = DirectoryReader.open(indexDirectory); return result; }
From source file:com.fun.sb.demo.lucene.IndexFiles.java
License:Apache License
/** Indexes a single document */ static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException { try (InputStream stream = Files.newInputStream(file)) { // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency // or positional information: Field pathField = new StringField("path", file.toString(), Field.Store.YES); doc.add(pathField);//from w w w . ja v a2s .c o m // Add the last modified date of the file a field named "modified". // Use a LongPoint that is indexed (i.e. efficiently filterable with // PointRangeQuery). This indexes to milli-second resolution, which // is often too fine. You could instead create a number based on // year/month/day/hour/minutes/seconds, down the resolution you require. // For example the long value 2011021714 would mean // February 17, 2011, 2-3 PM. // doc.add(new LongPoint("modified", lastModified)); // Add the contents of the file to a field named "contents". Specify a Reader, // so that the text of the file is tokenized and indexed, but not stored. // Note that FileReader expects the file to be in UTF-8 encoding. // If that's not the case searching for special characters will fail. doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8)))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("path", file.toString()), doc); } } }
From source file:com.gauronit.tagmata.core.Indexer.java
License:Open Source License
public String createIndex(String indexDisplayName) { try {/*from w w w . jav a2 s .co m*/ UUID uuid = UUID.randomUUID(); String indexName = uuid.toString().substring(uuid.toString().length() - 8, uuid.toString().length()); IndexWriter iw = new IndexWriter(FSDirectory.open(new File(indexDir + File.separator + indexName)), new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35))); iw.prepareCommit(); iw.commit(); iw.close(); iw = null; Document doc = new Document(); doc.add(new Field("displayName", indexDisplayName, Store.YES, Index.NOT_ANALYZED)); doc.add(new Field("indexName", indexName, Store.YES, Index.NOT_ANALYZED)); IndexWriter mainIndexWriter = new IndexWriter( FSDirectory.open(new File(indexDir + File.separator + MAIN_INDEX)), new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35))); mainIndexWriter.addDocument(doc); mainIndexWriter.commit(); mainIndexWriter.close(); mainIndexWriter = null; return indexName; } catch (IOException ex) { Logger.getLogger(Indexer.class.getName()).log(Level.SEVERE, null, ex); return null; } }
From source file:com.gitblit.LuceneExecutor.java
License:Apache License
/** * This completely indexes the repository and will destroy any existing * index./*from w w w.ja v a 2 s.co m*/ * * @param repositoryName * @param repository * @return IndexResult */ public IndexResult reindex(RepositoryModel model, Repository repository) { IndexResult result = new IndexResult(); if (!deleteIndex(model.name)) { return result; } try { String[] encodings = storedSettings.getStrings(Keys.web.blobEncodings).toArray(new String[0]); FileBasedConfig config = getConfig(repository); Set<String> indexedCommits = new TreeSet<String>(); IndexWriter writer = getIndexWriter(model.name); // build a quick lookup of tags Map<String, List<String>> tags = new HashMap<String, List<String>>(); for (RefModel tag : JGitUtils.getTags(repository, false, -1)) { if (!tag.isAnnotatedTag()) { // skip non-annotated tags continue; } if (!tags.containsKey(tag.getObjectId())) { tags.put(tag.getReferencedObjectId().getName(), new ArrayList<String>()); } tags.get(tag.getReferencedObjectId().getName()).add(tag.displayName); } ObjectReader reader = repository.newObjectReader(); // get the local branches List<RefModel> branches = JGitUtils.getLocalBranches(repository, true, -1); // sort them by most recently updated Collections.sort(branches, new Comparator<RefModel>() { @Override public int compare(RefModel ref1, RefModel ref2) { return ref2.getDate().compareTo(ref1.getDate()); } }); // reorder default branch to first position RefModel defaultBranch = null; ObjectId defaultBranchId = JGitUtils.getDefaultBranch(repository); for (RefModel branch : branches) { if (branch.getObjectId().equals(defaultBranchId)) { defaultBranch = branch; break; } } branches.remove(defaultBranch); branches.add(0, defaultBranch); // walk through each branch for (RefModel branch : branches) { boolean indexBranch = false; if (model.indexedBranches.contains(com.gitblit.Constants.DEFAULT_BRANCH) && branch.equals(defaultBranch)) { // indexing "default" branch indexBranch = true; } else if (IssueUtils.GB_ISSUES.equals(branch)) { // skip the GB_ISSUES branch because it is indexed later // note: this is different than updateIndex indexBranch = false; } else { // normal explicit branch check indexBranch = model.indexedBranches.contains(branch.getName()); } // if this branch is not specifically indexed then skip if (!indexBranch) { continue; } String branchName = branch.getName(); RevWalk revWalk = new RevWalk(reader); RevCommit tip = revWalk.parseCommit(branch.getObjectId()); String tipId = tip.getId().getName(); String keyName = getBranchKey(branchName); config.setString(CONF_ALIAS, null, keyName, branchName); config.setString(CONF_BRANCH, null, keyName, tipId); // index the blob contents of the tree TreeWalk treeWalk = new TreeWalk(repository); treeWalk.addTree(tip.getTree()); treeWalk.setRecursive(true); Map<String, ObjectId> paths = new TreeMap<String, ObjectId>(); while (treeWalk.next()) { // ensure path is not in a submodule if (treeWalk.getFileMode(0) != FileMode.GITLINK) { paths.put(treeWalk.getPathString(), treeWalk.getObjectId(0)); } } ByteArrayOutputStream os = new ByteArrayOutputStream(); byte[] tmp = new byte[32767]; RevWalk commitWalk = new RevWalk(reader); commitWalk.markStart(tip); RevCommit commit; while ((paths.size() > 0) && (commit = commitWalk.next()) != null) { TreeWalk diffWalk = new TreeWalk(reader); int parentCount = commit.getParentCount(); switch (parentCount) { case 0: diffWalk.addTree(new EmptyTreeIterator()); break; case 1: diffWalk.addTree(getTree(commitWalk, commit.getParent(0))); break; default: // skip merge commits continue; } diffWalk.addTree(getTree(commitWalk, commit)); diffWalk.setFilter(ANY_DIFF); diffWalk.setRecursive(true); while ((paths.size() > 0) && diffWalk.next()) { String path = diffWalk.getPathString(); if (!paths.containsKey(path)) { continue; } // remove path from set ObjectId blobId = paths.remove(path); result.blobCount++; // index the blob metadata String blobAuthor = getAuthor(commit); String blobCommitter = getCommitter(commit); String blobDate = DateTools.timeToString(commit.getCommitTime() * 1000L, Resolution.MINUTE); Document doc = new Document(); doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.blob.name(), Store.YES, Index.NOT_ANALYZED_NO_NORMS)); doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.ANALYZED)); doc.add(new Field(FIELD_COMMIT, commit.getName(), Store.YES, Index.ANALYZED)); doc.add(new Field(FIELD_PATH, path, Store.YES, Index.ANALYZED)); doc.add(new Field(FIELD_DATE, blobDate, Store.YES, Index.NO)); doc.add(new Field(FIELD_AUTHOR, blobAuthor, Store.YES, Index.ANALYZED)); doc.add(new Field(FIELD_COMMITTER, blobCommitter, Store.YES, Index.ANALYZED)); // determine extension to compare to the extension // blacklist String ext = null; String name = path.toLowerCase(); if (name.indexOf('.') > -1) { ext = name.substring(name.lastIndexOf('.') + 1); } // index the blob content if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) { ObjectLoader ldr = repository.open(blobId, Constants.OBJ_BLOB); InputStream in = ldr.openStream(); int n; while ((n = in.read(tmp)) > 0) { os.write(tmp, 0, n); } in.close(); byte[] content = os.toByteArray(); String str = StringUtils.decodeString(content, encodings); doc.add(new Field(FIELD_CONTENT, str, Store.YES, Index.ANALYZED)); os.reset(); } // add the blob to the index writer.addDocument(doc); } } os.close(); // index the tip commit object if (indexedCommits.add(tipId)) { Document doc = createDocument(tip, tags.get(tipId)); doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.ANALYZED)); writer.addDocument(doc); result.commitCount += 1; result.branchCount += 1; } // traverse the log and index the previous commit objects RevWalk historyWalk = new RevWalk(reader); historyWalk.markStart(historyWalk.parseCommit(tip.getId())); RevCommit rev; while ((rev = historyWalk.next()) != null) { String hash = rev.getId().getName(); if (indexedCommits.add(hash)) { Document doc = createDocument(rev, tags.get(hash)); doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.ANALYZED)); writer.addDocument(doc); result.commitCount += 1; } } } // finished reader.release(); // this repository has a gb-issues branch, index all issues if (IssueUtils.getIssuesBranch(repository) != null) { List<IssueModel> issues = IssueUtils.getIssues(repository, null); if (issues.size() > 0) { result.branchCount += 1; } for (IssueModel issue : issues) { result.issueCount++; Document doc = createDocument(issue); writer.addDocument(doc); } } // commit all changes and reset the searcher config.setInt(CONF_INDEX, null, CONF_VERSION, INDEX_VERSION); config.save(); writer.commit(); resetIndexSearcher(model.name); result.success(); } catch (Exception e) { logger.error("Exception while reindexing " + model.name, e); } return result; }
From source file:com.gitblit.LuceneExecutor.java
License:Apache License
/** * Incrementally update the index with the specified commit for the * repository.//from w ww. j a v a 2 s . c o m * * @param repositoryName * @param repository * @param branch * the fully qualified branch name (e.g. refs/heads/master) * @param commit * @return true, if successful */ private IndexResult index(String repositoryName, Repository repository, String branch, RevCommit commit) { IndexResult result = new IndexResult(); try { String[] encodings = storedSettings.getStrings(Keys.web.blobEncodings).toArray(new String[0]); List<PathChangeModel> changedPaths = JGitUtils.getFilesInCommit(repository, commit); String revDate = DateTools.timeToString(commit.getCommitTime() * 1000L, Resolution.MINUTE); IndexWriter writer = getIndexWriter(repositoryName); for (PathChangeModel path : changedPaths) { if (path.isSubmodule()) { continue; } // delete the indexed blob deleteBlob(repositoryName, branch, path.name); // re-index the blob if (!ChangeType.DELETE.equals(path.changeType)) { result.blobCount++; Document doc = new Document(); doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.blob.name(), Store.YES, Index.NOT_ANALYZED)); doc.add(new Field(FIELD_BRANCH, branch, Store.YES, Index.ANALYZED)); doc.add(new Field(FIELD_COMMIT, commit.getName(), Store.YES, Index.ANALYZED)); doc.add(new Field(FIELD_PATH, path.path, Store.YES, Index.ANALYZED)); doc.add(new Field(FIELD_DATE, revDate, Store.YES, Index.NO)); doc.add(new Field(FIELD_AUTHOR, getAuthor(commit), Store.YES, Index.ANALYZED)); doc.add(new Field(FIELD_COMMITTER, getCommitter(commit), Store.YES, Index.ANALYZED)); // determine extension to compare to the extension // blacklist String ext = null; String name = path.name.toLowerCase(); if (name.indexOf('.') > -1) { ext = name.substring(name.lastIndexOf('.') + 1); } if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) { // read the blob content String str = JGitUtils.getStringContent(repository, commit.getTree(), path.path, encodings); if (str != null) { doc.add(new Field(FIELD_CONTENT, str, Store.YES, Index.ANALYZED)); writer.addDocument(doc); } } } } writer.commit(); // get any annotated commit tags List<String> commitTags = new ArrayList<String>(); for (RefModel ref : JGitUtils.getTags(repository, false, -1)) { if (ref.isAnnotatedTag() && ref.getReferencedObjectId().equals(commit.getId())) { commitTags.add(ref.displayName); } } // create and write the Lucene document Document doc = createDocument(commit, commitTags); doc.add(new Field(FIELD_BRANCH, branch, Store.YES, Index.ANALYZED)); result.commitCount++; result.success = index(repositoryName, doc); } catch (Exception e) { logger.error(MessageFormat.format("Exception while indexing commit {0} in {1}", commit.getId().getName(), repositoryName), e); } return result; }
From source file:com.gitblit.LuceneExecutor.java
License:Apache License
/** * Incrementally index an object for the repository. * /*from w w w.j a v a 2 s . c o m*/ * @param repositoryName * @param doc * @return true, if successful */ private boolean index(String repositoryName, Document doc) { try { IndexWriter writer = getIndexWriter(repositoryName); writer.addDocument(doc); writer.commit(); resetIndexSearcher(repositoryName); return true; } catch (Exception e) { logger.error( MessageFormat.format("Exception while incrementally updating {0} Lucene index", repositoryName), e); } return false; }