List of usage examples for org.apache.lucene.document StringField TYPE_STORED
FieldType TYPE_STORED
To view the source code for org.apache.lucene.document StringField TYPE_STORED.
Click Source Link
From source file:at.ac.univie.mminf.luceneSKOS.analysis.engine.jena.SKOSEngineImpl.java
License:Apache License
/** * Creates lucene documents from SKOS concept. In order to allow language * restrictions, one document per language is created. *//*from w ww. j ava 2s . c o m*/ private Document createDocumentsFromConcept(Resource skos_concept) { Document conceptDoc = new Document(); String conceptURI = skos_concept.getURI(); Field uriField = new Field(FIELD_URI, conceptURI, StringField.TYPE_STORED); conceptDoc.add(uriField); // store the preferred lexical labels indexAnnotation(skos_concept, conceptDoc, SKOS.prefLabel, FIELD_PREF_LABEL); // store the alternative lexical labels indexAnnotation(skos_concept, conceptDoc, SKOS.altLabel, FIELD_ALT_LABEL); // store the hidden lexical labels indexAnnotation(skos_concept, conceptDoc, SKOS.hiddenLabel, FIELD_HIDDEN_LABEL); // store the URIs of the broader concepts indexObject(skos_concept, conceptDoc, SKOS.broader, FIELD_BROADER); // store the URIs of the broader transitive concepts indexObject(skos_concept, conceptDoc, SKOS.broaderTransitive, FIELD_BROADER_TRANSITIVE); // store the URIs of the narrower concepts indexObject(skos_concept, conceptDoc, SKOS.narrower, FIELD_NARROWER); // store the URIs of the narrower transitive concepts indexObject(skos_concept, conceptDoc, SKOS.narrowerTransitive, FIELD_NARROWER_TRANSITIVE); // store the URIs of the related concepts indexObject(skos_concept, conceptDoc, SKOS.related, FIELD_RELATED); return conceptDoc; }
From source file:at.ac.univie.mminf.luceneSKOS.analysis.engine.jena.SKOSEngineImpl.java
License:Apache License
private void indexAnnotation(Resource skos_concept, Document conceptDoc, AnnotationProperty property, String field) {//from www. j a v a 2 s . com StmtIterator stmt_iter = skos_concept.listProperties(property); while (stmt_iter.hasNext()) { Literal labelLiteral = stmt_iter.nextStatement().getObject().as(Literal.class); String label = labelLiteral.getLexicalForm(); String labelLang = labelLiteral.getLanguage(); if (this.languages != null && !this.languages.isEmpty() && !this.languages.contains(labelLang)) { continue; } // converting label to lower-case label = label.toLowerCase(Locale.ROOT); Field labelField = new Field(field, label, StringField.TYPE_STORED); conceptDoc.add(labelField); } }
From source file:ch.admin.isb.hermes5.business.search.IndexWriterWrapper.java
License:Apache License
private org.apache.lucene.document.Document buildDocument(String content, String presentationName, String name, String type) {//w ww. ja v a2 s . c om org.apache.lucene.document.Document doc = new org.apache.lucene.document.Document(); Field presentationNameField = new Field("presentationName", presentationName, TextField.TYPE_STORED); doc.add(presentationNameField); doc.add(new Field("content", content, TextField.TYPE_STORED)); doc.add(new Field("name", name, StringField.TYPE_STORED)); doc.add(new Field("type", type, StringField.TYPE_STORED)); return doc; }
From source file:com.basistech.lucene.tools.LuceneQueryToolTest.java
License:Apache License
@BeforeClass public static void oneTimeSetup() throws IOException, ParseException { LuceneQueryToolTest.showOutput = false; // for debugging tests Directory dir = new RAMDirectory(); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig config = new IndexWriterConfig(analyzer); IndexWriter writer = new IndexWriter(dir, config); Document doc = new Document(); doc.add(new Field("longest-mention", "Bill Clinton", StringField.TYPE_STORED)); doc.add(new Field("context", "Hillary Clinton Arkansas", TextField.TYPE_NOT_STORED)); writer.addDocument(doc);//from w w w .j a v a2s. co m doc = new Document(); doc.add(new Field("longest-mention", "George W. Bush", StringField.TYPE_STORED)); doc.add(new Field("context", "Texas Laura Bush", TextField.TYPE_NOT_STORED)); writer.addDocument(doc); doc = new Document(); doc.add(new Field("longest-mention", "George H. W. Bush", StringField.TYPE_STORED)); doc.add(new Field("context", "Barbara Bush Texas", TextField.TYPE_NOT_STORED)); writer.addDocument(doc); doc = new Document(); doc.add(new Field("bbb", "foo", StringField.TYPE_STORED)); doc.add(new Field("bbb", "bar", StringField.TYPE_STORED)); doc.add(new Field("aaa", "foo", StringField.TYPE_STORED)); FieldType typeUnindexed = new FieldType(StringField.TYPE_STORED); typeUnindexed.setIndexOptions(IndexOptions.NONE); doc.add(new Field("zzz", "foo", typeUnindexed)); writer.addDocument(doc); writer.close(); reader = DirectoryReader.open(dir); }
From source file:com.basistech.lucene.tools.LuceneQueryToolTest.java
License:Apache License
@Test public void testBinaryField() throws IOException, ParseException { Directory dir = new RAMDirectory(); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig config = new IndexWriterConfig(analyzer); IndexWriter writer = new IndexWriter(dir, config); Document doc = new Document(); doc.add(new Field("id", "1", StringField.TYPE_STORED)); doc.add(new Field("binary-field", "ABC".getBytes(Charsets.UTF_8), StoredField.TYPE)); writer.addDocument(doc);//from w w w . j a va2s . c om writer.close(); reader = DirectoryReader.open(dir); ByteArrayOutputStream bytes = new ByteArrayOutputStream(); PrintStream out = new PrintStream(bytes); LuceneQueryTool lqt = new LuceneQueryTool(reader, out); lqt.run(new String[] { "id:1" }); String result = Joiner.on('\n').join(getOutput(bytes)); assertTrue(result.contains("0x414243")); // binary rep of "ABC" }
From source file:com.gitblit.service.LuceneService.java
License:Apache License
/** * This completely indexes the repository and will destroy any existing * index.//from ww w .j av a 2s . c o m * * @param repositoryName * @param repository * @return IndexResult */ public IndexResult reindex(RepositoryModel model, Repository repository) { IndexResult result = new IndexResult(); if (!deleteIndex(model.name)) { return result; } try { String[] encodings = storedSettings.getStrings(Keys.web.blobEncodings).toArray(new String[0]); FileBasedConfig config = getConfig(repository); Set<String> indexedCommits = new TreeSet<String>(); IndexWriter writer = getIndexWriter(model.name); // build a quick lookup of tags Map<String, List<String>> tags = new HashMap<String, List<String>>(); for (RefModel tag : JGitUtils.getTags(repository, false, -1)) { if (!tag.isAnnotatedTag()) { // skip non-annotated tags continue; } if (!tags.containsKey(tag.getReferencedObjectId().getName())) { tags.put(tag.getReferencedObjectId().getName(), new ArrayList<String>()); } tags.get(tag.getReferencedObjectId().getName()).add(tag.displayName); } ObjectReader reader = repository.newObjectReader(); // get the local branches List<RefModel> branches = JGitUtils.getLocalBranches(repository, true, -1); // sort them by most recently updated Collections.sort(branches, new Comparator<RefModel>() { @Override public int compare(RefModel ref1, RefModel ref2) { return ref2.getDate().compareTo(ref1.getDate()); } }); // reorder default branch to first position RefModel defaultBranch = null; ObjectId defaultBranchId = JGitUtils.getDefaultBranch(repository); for (RefModel branch : branches) { if (branch.getObjectId().equals(defaultBranchId)) { defaultBranch = branch; break; } } branches.remove(defaultBranch); branches.add(0, defaultBranch); // walk through each branch for (RefModel branch : branches) { boolean indexBranch = false; if (model.indexedBranches.contains(com.gitblit.Constants.DEFAULT_BRANCH) && branch.equals(defaultBranch)) { // indexing "default" branch indexBranch = true; } else if (branch.getName().startsWith(com.gitblit.Constants.R_META)) { // skip internal meta branches indexBranch = false; } else { // normal explicit branch check indexBranch = model.indexedBranches.contains(branch.getName()); } // if this branch is not specifically indexed then skip if (!indexBranch) { continue; } String branchName = branch.getName(); RevWalk revWalk = new RevWalk(reader); RevCommit tip = revWalk.parseCommit(branch.getObjectId()); String tipId = tip.getId().getName(); String keyName = getBranchKey(branchName); config.setString(CONF_ALIAS, null, keyName, branchName); config.setString(CONF_BRANCH, null, keyName, tipId); // index the blob contents of the tree TreeWalk treeWalk = new TreeWalk(repository); treeWalk.addTree(tip.getTree()); treeWalk.setRecursive(true); Map<String, ObjectId> paths = new TreeMap<String, ObjectId>(); while (treeWalk.next()) { // ensure path is not in a submodule if (treeWalk.getFileMode(0) != FileMode.GITLINK) { paths.put(treeWalk.getPathString(), treeWalk.getObjectId(0)); } } ByteArrayOutputStream os = new ByteArrayOutputStream(); byte[] tmp = new byte[32767]; RevWalk commitWalk = new RevWalk(reader); commitWalk.markStart(tip); RevCommit commit; while ((paths.size() > 0) && (commit = commitWalk.next()) != null) { TreeWalk diffWalk = new TreeWalk(reader); int parentCount = commit.getParentCount(); switch (parentCount) { case 0: diffWalk.addTree(new EmptyTreeIterator()); break; case 1: diffWalk.addTree(getTree(commitWalk, commit.getParent(0))); break; default: // skip merge commits continue; } diffWalk.addTree(getTree(commitWalk, commit)); diffWalk.setFilter(ANY_DIFF); diffWalk.setRecursive(true); while ((paths.size() > 0) && diffWalk.next()) { String path = diffWalk.getPathString(); if (!paths.containsKey(path)) { continue; } // remove path from set ObjectId blobId = paths.remove(path); result.blobCount++; // index the blob metadata String blobAuthor = getAuthor(commit); String blobCommitter = getCommitter(commit); String blobDate = DateTools.timeToString(commit.getCommitTime() * 1000L, Resolution.MINUTE); Document doc = new Document(); doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.blob.name(), StringField.TYPE_STORED)); doc.add(new Field(FIELD_BRANCH, branchName, TextField.TYPE_STORED)); doc.add(new Field(FIELD_COMMIT, commit.getName(), TextField.TYPE_STORED)); doc.add(new Field(FIELD_PATH, path, TextField.TYPE_STORED)); doc.add(new Field(FIELD_DATE, blobDate, StringField.TYPE_STORED)); doc.add(new Field(FIELD_AUTHOR, blobAuthor, TextField.TYPE_STORED)); doc.add(new Field(FIELD_COMMITTER, blobCommitter, TextField.TYPE_STORED)); // determine extension to compare to the extension // blacklist String ext = null; String name = path.toLowerCase(); if (name.indexOf('.') > -1) { ext = name.substring(name.lastIndexOf('.') + 1); } // index the blob content if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) { ObjectLoader ldr = repository.open(blobId, Constants.OBJ_BLOB); InputStream in = ldr.openStream(); int n; while ((n = in.read(tmp)) > 0) { os.write(tmp, 0, n); } in.close(); byte[] content = os.toByteArray(); String str = StringUtils.decodeString(content, encodings); doc.add(new Field(FIELD_CONTENT, str, TextField.TYPE_STORED)); os.reset(); } // add the blob to the index writer.addDocument(doc); } } os.close(); // index the tip commit object if (indexedCommits.add(tipId)) { Document doc = createDocument(tip, tags.get(tipId)); doc.add(new Field(FIELD_BRANCH, branchName, TextField.TYPE_STORED)); writer.addDocument(doc); result.commitCount += 1; result.branchCount += 1; } // traverse the log and index the previous commit objects RevWalk historyWalk = new RevWalk(reader); historyWalk.markStart(historyWalk.parseCommit(tip.getId())); RevCommit rev; while ((rev = historyWalk.next()) != null) { String hash = rev.getId().getName(); if (indexedCommits.add(hash)) { Document doc = createDocument(rev, tags.get(hash)); doc.add(new Field(FIELD_BRANCH, branchName, TextField.TYPE_STORED)); writer.addDocument(doc); result.commitCount += 1; } } } // finished reader.close(); // commit all changes and reset the searcher config.save(); writer.commit(); resetIndexSearcher(model.name); result.success(); } catch (Exception e) { logger.error("Exception while reindexing " + model.name, e); } return result; }
From source file:com.gitblit.service.LuceneService.java
License:Apache License
/** * Incrementally update the index with the specified commit for the * repository.//from w w w. ja v a 2 s . c o m * * @param repositoryName * @param repository * @param branch * the fully qualified branch name (e.g. refs/heads/master) * @param commit * @return true, if successful */ private IndexResult index(String repositoryName, Repository repository, String branch, RevCommit commit) { IndexResult result = new IndexResult(); try { String[] encodings = storedSettings.getStrings(Keys.web.blobEncodings).toArray(new String[0]); List<PathChangeModel> changedPaths = JGitUtils.getFilesInCommit(repository, commit); String revDate = DateTools.timeToString(commit.getCommitTime() * 1000L, Resolution.MINUTE); IndexWriter writer = getIndexWriter(repositoryName); for (PathChangeModel path : changedPaths) { if (path.isSubmodule()) { continue; } // delete the indexed blob deleteBlob(repositoryName, branch, path.name); // re-index the blob if (!ChangeType.DELETE.equals(path.changeType)) { result.blobCount++; Document doc = new Document(); doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.blob.name(), StringField.TYPE_STORED)); doc.add(new Field(FIELD_BRANCH, branch, TextField.TYPE_STORED)); doc.add(new Field(FIELD_COMMIT, commit.getName(), TextField.TYPE_STORED)); doc.add(new Field(FIELD_PATH, path.path, TextField.TYPE_STORED)); doc.add(new Field(FIELD_DATE, revDate, StringField.TYPE_STORED)); doc.add(new Field(FIELD_AUTHOR, getAuthor(commit), TextField.TYPE_STORED)); doc.add(new Field(FIELD_COMMITTER, getCommitter(commit), TextField.TYPE_STORED)); // determine extension to compare to the extension // blacklist String ext = null; String name = path.name.toLowerCase(); if (name.indexOf('.') > -1) { ext = name.substring(name.lastIndexOf('.') + 1); } if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) { // read the blob content String str = JGitUtils.getStringContent(repository, commit.getTree(), path.path, encodings); if (str != null) { doc.add(new Field(FIELD_CONTENT, str, TextField.TYPE_STORED)); writer.addDocument(doc); } } } } writer.commit(); // get any annotated commit tags List<String> commitTags = new ArrayList<String>(); for (RefModel ref : JGitUtils.getTags(repository, false, -1)) { if (ref.isAnnotatedTag() && ref.getReferencedObjectId().equals(commit.getId())) { commitTags.add(ref.displayName); } } // create and write the Lucene document Document doc = createDocument(commit, commitTags); doc.add(new Field(FIELD_BRANCH, branch, TextField.TYPE_STORED)); result.commitCount++; result.success = index(repositoryName, doc); } catch (Exception e) { logger.error(MessageFormat.format("Exception while indexing commit {0} in {1}", commit.getId().getName(), repositoryName), e); } return result; }
From source file:com.gitblit.service.LuceneService.java
License:Apache License
/** * Creates a Lucene document for a commit * * @param commit//from ww w.jav a2 s . co m * @param tags * @return a Lucene document */ private Document createDocument(RevCommit commit, List<String> tags) { Document doc = new Document(); doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.commit.name(), StringField.TYPE_STORED)); doc.add(new Field(FIELD_COMMIT, commit.getName(), TextField.TYPE_STORED)); doc.add(new Field(FIELD_DATE, DateTools.timeToString(commit.getCommitTime() * 1000L, Resolution.MINUTE), StringField.TYPE_STORED)); doc.add(new Field(FIELD_AUTHOR, getAuthor(commit), TextField.TYPE_STORED)); doc.add(new Field(FIELD_COMMITTER, getCommitter(commit), TextField.TYPE_STORED)); doc.add(new Field(FIELD_SUMMARY, commit.getShortMessage(), TextField.TYPE_STORED)); doc.add(new Field(FIELD_CONTENT, commit.getFullMessage(), TextField.TYPE_STORED)); if (!ArrayUtils.isEmpty(tags)) { doc.add(new Field(FIELD_TAG, StringUtils.flattenStrings(tags), TextField.TYPE_STORED)); } return doc; }
From source file:com.googlecode.lucene.spatial.strategy.prefix.TermQueryGridStrategyTestCase.java
License:Apache License
@Test public void testPrefixGridLosAngeles() throws IOException { Shape point = new PointImpl(-118.243680, 34.052230); Document losAngeles = new Document(); losAngeles.add(new Field("name", "Los Angeles", StringField.TYPE_STORED)); losAngeles.add(strategy.createField(fieldInfo, point, true, true)); addDocumentsAndCommit(Arrays.asList(losAngeles)); // Polygon won't work with SimpleSpatialContext SpatialArgsParser spatialArgsParser = new SpatialArgsParser(); SpatialArgs spatialArgs = spatialArgsParser.parse( "IsWithin(POLYGON((-127.00390625 39.8125,-112.765625 39.98828125,-111.53515625 31.375,-125.94921875 30.14453125,-127.00390625 39.8125)))", ctx);//from w w w. j av a 2 s .c o m Query query = strategy.makeQuery(spatialArgs, fieldInfo); SearchResults searchResults = executeQuery(query, 1); assertEquals(1, searchResults.numFound); }
From source file:com.googlecode.lucene.spatial.strategy.prefix.TestTermQueryGridStrategy.java
License:Apache License
@Test public void testNGramPrefixGridLosAngeles() throws IOException { final JtsSpatialContext ctx = JtsSpatialContext.GEO_KM; final QuadPrefixTree grid = new QuadPrefixTree(ctx); SimpleSpatialFieldInfo fieldInfo = new SimpleSpatialFieldInfo("geo"); TermQueryPrefixTreeStrategy prefixGridStrategy = new TermQueryPrefixTreeStrategy(grid); Shape point = new PointImpl(-118.243680, 34.052230); Document losAngeles = new Document(); losAngeles.add(new Field("name", "Los Angeles", StringField.TYPE_STORED)); losAngeles.add(prefixGridStrategy.createField(fieldInfo, point, true, true)); addDocumentsAndCommit(Arrays.asList(losAngeles)); // This won't work with simple spatial context... SpatialArgsParser spatialArgsParser = new SpatialArgsParser(); SpatialArgs spatialArgs = spatialArgsParser.parse( "IsWithin(POLYGON((-127.00390625 39.8125,-112.765625 39.98828125,-111.53515625 31.375,-125.94921875 30.14453125,-127.00390625 39.8125)))", ctx);/*from w ww . jav a2 s . co m*/ Query query = prefixGridStrategy.makeQuery(spatialArgs, fieldInfo); SearchResults searchResults = executeQuery(query, 1); assertEquals(1, searchResults.numFound); }