Example usage for org.apache.lucene.document StringField TYPE_STORED

List of usage examples for org.apache.lucene.document StringField TYPE_STORED

Introduction

In this page you can find the example usage for org.apache.lucene.document StringField TYPE_STORED.

Prototype

FieldType TYPE_STORED

To view the source code for org.apache.lucene.document StringField TYPE_STORED.

Click Source Link

Document

Indexed, not tokenized, omits norms, indexes DOCS_ONLY, stored

Usage

From source file:at.ac.univie.mminf.luceneSKOS.analysis.engine.jena.SKOSEngineImpl.java

License:Apache License

/**
 * Creates lucene documents from SKOS concept. In order to allow language
 * restrictions, one document per language is created.
 *//*from w  ww. j  ava 2s  . c  o m*/
private Document createDocumentsFromConcept(Resource skos_concept) {
    Document conceptDoc = new Document();
    String conceptURI = skos_concept.getURI();
    Field uriField = new Field(FIELD_URI, conceptURI, StringField.TYPE_STORED);
    conceptDoc.add(uriField);
    // store the preferred lexical labels
    indexAnnotation(skos_concept, conceptDoc, SKOS.prefLabel, FIELD_PREF_LABEL);
    // store the alternative lexical labels
    indexAnnotation(skos_concept, conceptDoc, SKOS.altLabel, FIELD_ALT_LABEL);
    // store the hidden lexical labels
    indexAnnotation(skos_concept, conceptDoc, SKOS.hiddenLabel, FIELD_HIDDEN_LABEL);
    // store the URIs of the broader concepts
    indexObject(skos_concept, conceptDoc, SKOS.broader, FIELD_BROADER);
    // store the URIs of the broader transitive concepts
    indexObject(skos_concept, conceptDoc, SKOS.broaderTransitive, FIELD_BROADER_TRANSITIVE);
    // store the URIs of the narrower concepts
    indexObject(skos_concept, conceptDoc, SKOS.narrower, FIELD_NARROWER);
    // store the URIs of the narrower transitive concepts
    indexObject(skos_concept, conceptDoc, SKOS.narrowerTransitive, FIELD_NARROWER_TRANSITIVE);
    // store the URIs of the related concepts
    indexObject(skos_concept, conceptDoc, SKOS.related, FIELD_RELATED);
    return conceptDoc;
}

From source file:at.ac.univie.mminf.luceneSKOS.analysis.engine.jena.SKOSEngineImpl.java

License:Apache License

private void indexAnnotation(Resource skos_concept, Document conceptDoc, AnnotationProperty property,
        String field) {//from   www.  j a  v a  2  s .  com
    StmtIterator stmt_iter = skos_concept.listProperties(property);
    while (stmt_iter.hasNext()) {
        Literal labelLiteral = stmt_iter.nextStatement().getObject().as(Literal.class);
        String label = labelLiteral.getLexicalForm();
        String labelLang = labelLiteral.getLanguage();
        if (this.languages != null && !this.languages.isEmpty() && !this.languages.contains(labelLang)) {
            continue;
        }
        // converting label to lower-case
        label = label.toLowerCase(Locale.ROOT);
        Field labelField = new Field(field, label, StringField.TYPE_STORED);
        conceptDoc.add(labelField);
    }
}

From source file:ch.admin.isb.hermes5.business.search.IndexWriterWrapper.java

License:Apache License

private org.apache.lucene.document.Document buildDocument(String content, String presentationName, String name,
        String type) {//w  ww.  ja v a2 s  .  c om
    org.apache.lucene.document.Document doc = new org.apache.lucene.document.Document();
    Field presentationNameField = new Field("presentationName", presentationName, TextField.TYPE_STORED);
    doc.add(presentationNameField);
    doc.add(new Field("content", content, TextField.TYPE_STORED));
    doc.add(new Field("name", name, StringField.TYPE_STORED));
    doc.add(new Field("type", type, StringField.TYPE_STORED));
    return doc;
}

From source file:com.basistech.lucene.tools.LuceneQueryToolTest.java

License:Apache License

@BeforeClass
public static void oneTimeSetup() throws IOException, ParseException {
    LuceneQueryToolTest.showOutput = false; // for debugging tests
    Directory dir = new RAMDirectory();
    Analyzer analyzer = new StandardAnalyzer();
    IndexWriterConfig config = new IndexWriterConfig(analyzer);
    IndexWriter writer = new IndexWriter(dir, config);
    Document doc = new Document();
    doc.add(new Field("longest-mention", "Bill Clinton", StringField.TYPE_STORED));
    doc.add(new Field("context", "Hillary Clinton Arkansas", TextField.TYPE_NOT_STORED));
    writer.addDocument(doc);//from w  w w  .j a v  a2s. co m
    doc = new Document();
    doc.add(new Field("longest-mention", "George W. Bush", StringField.TYPE_STORED));
    doc.add(new Field("context", "Texas Laura Bush", TextField.TYPE_NOT_STORED));
    writer.addDocument(doc);
    doc = new Document();
    doc.add(new Field("longest-mention", "George H. W. Bush", StringField.TYPE_STORED));
    doc.add(new Field("context", "Barbara Bush Texas", TextField.TYPE_NOT_STORED));
    writer.addDocument(doc);
    doc = new Document();
    doc.add(new Field("bbb", "foo", StringField.TYPE_STORED));
    doc.add(new Field("bbb", "bar", StringField.TYPE_STORED));
    doc.add(new Field("aaa", "foo", StringField.TYPE_STORED));
    FieldType typeUnindexed = new FieldType(StringField.TYPE_STORED);
    typeUnindexed.setIndexOptions(IndexOptions.NONE);
    doc.add(new Field("zzz", "foo", typeUnindexed));
    writer.addDocument(doc);
    writer.close();
    reader = DirectoryReader.open(dir);
}

From source file:com.basistech.lucene.tools.LuceneQueryToolTest.java

License:Apache License

@Test
public void testBinaryField() throws IOException, ParseException {
    Directory dir = new RAMDirectory();
    Analyzer analyzer = new StandardAnalyzer();
    IndexWriterConfig config = new IndexWriterConfig(analyzer);
    IndexWriter writer = new IndexWriter(dir, config);
    Document doc = new Document();
    doc.add(new Field("id", "1", StringField.TYPE_STORED));
    doc.add(new Field("binary-field", "ABC".getBytes(Charsets.UTF_8), StoredField.TYPE));
    writer.addDocument(doc);//from  w w  w . j  a va2s  .  c om
    writer.close();
    reader = DirectoryReader.open(dir);

    ByteArrayOutputStream bytes = new ByteArrayOutputStream();
    PrintStream out = new PrintStream(bytes);
    LuceneQueryTool lqt = new LuceneQueryTool(reader, out);
    lqt.run(new String[] { "id:1" });
    String result = Joiner.on('\n').join(getOutput(bytes));
    assertTrue(result.contains("0x414243")); // binary rep of "ABC"
}

From source file:com.gitblit.service.LuceneService.java

License:Apache License

/**
 * This completely indexes the repository and will destroy any existing
 * index.//from   ww w .j  av  a 2s  .  c  o  m
 *
 * @param repositoryName
 * @param repository
 * @return IndexResult
 */
public IndexResult reindex(RepositoryModel model, Repository repository) {
    IndexResult result = new IndexResult();
    if (!deleteIndex(model.name)) {
        return result;
    }
    try {
        String[] encodings = storedSettings.getStrings(Keys.web.blobEncodings).toArray(new String[0]);
        FileBasedConfig config = getConfig(repository);
        Set<String> indexedCommits = new TreeSet<String>();
        IndexWriter writer = getIndexWriter(model.name);
        // build a quick lookup of tags
        Map<String, List<String>> tags = new HashMap<String, List<String>>();
        for (RefModel tag : JGitUtils.getTags(repository, false, -1)) {
            if (!tag.isAnnotatedTag()) {
                // skip non-annotated tags
                continue;
            }
            if (!tags.containsKey(tag.getReferencedObjectId().getName())) {
                tags.put(tag.getReferencedObjectId().getName(), new ArrayList<String>());
            }
            tags.get(tag.getReferencedObjectId().getName()).add(tag.displayName);
        }

        ObjectReader reader = repository.newObjectReader();

        // get the local branches
        List<RefModel> branches = JGitUtils.getLocalBranches(repository, true, -1);

        // sort them by most recently updated
        Collections.sort(branches, new Comparator<RefModel>() {
            @Override
            public int compare(RefModel ref1, RefModel ref2) {
                return ref2.getDate().compareTo(ref1.getDate());
            }
        });

        // reorder default branch to first position
        RefModel defaultBranch = null;
        ObjectId defaultBranchId = JGitUtils.getDefaultBranch(repository);
        for (RefModel branch : branches) {
            if (branch.getObjectId().equals(defaultBranchId)) {
                defaultBranch = branch;
                break;
            }
        }
        branches.remove(defaultBranch);
        branches.add(0, defaultBranch);

        // walk through each branch
        for (RefModel branch : branches) {

            boolean indexBranch = false;
            if (model.indexedBranches.contains(com.gitblit.Constants.DEFAULT_BRANCH)
                    && branch.equals(defaultBranch)) {
                // indexing "default" branch
                indexBranch = true;
            } else if (branch.getName().startsWith(com.gitblit.Constants.R_META)) {
                // skip internal meta branches
                indexBranch = false;
            } else {
                // normal explicit branch check
                indexBranch = model.indexedBranches.contains(branch.getName());
            }

            // if this branch is not specifically indexed then skip
            if (!indexBranch) {
                continue;
            }

            String branchName = branch.getName();
            RevWalk revWalk = new RevWalk(reader);
            RevCommit tip = revWalk.parseCommit(branch.getObjectId());
            String tipId = tip.getId().getName();

            String keyName = getBranchKey(branchName);
            config.setString(CONF_ALIAS, null, keyName, branchName);
            config.setString(CONF_BRANCH, null, keyName, tipId);

            // index the blob contents of the tree
            TreeWalk treeWalk = new TreeWalk(repository);
            treeWalk.addTree(tip.getTree());
            treeWalk.setRecursive(true);

            Map<String, ObjectId> paths = new TreeMap<String, ObjectId>();
            while (treeWalk.next()) {
                // ensure path is not in a submodule
                if (treeWalk.getFileMode(0) != FileMode.GITLINK) {
                    paths.put(treeWalk.getPathString(), treeWalk.getObjectId(0));
                }
            }

            ByteArrayOutputStream os = new ByteArrayOutputStream();
            byte[] tmp = new byte[32767];

            RevWalk commitWalk = new RevWalk(reader);
            commitWalk.markStart(tip);

            RevCommit commit;
            while ((paths.size() > 0) && (commit = commitWalk.next()) != null) {
                TreeWalk diffWalk = new TreeWalk(reader);
                int parentCount = commit.getParentCount();
                switch (parentCount) {
                case 0:
                    diffWalk.addTree(new EmptyTreeIterator());
                    break;
                case 1:
                    diffWalk.addTree(getTree(commitWalk, commit.getParent(0)));
                    break;
                default:
                    // skip merge commits
                    continue;
                }
                diffWalk.addTree(getTree(commitWalk, commit));
                diffWalk.setFilter(ANY_DIFF);
                diffWalk.setRecursive(true);
                while ((paths.size() > 0) && diffWalk.next()) {
                    String path = diffWalk.getPathString();
                    if (!paths.containsKey(path)) {
                        continue;
                    }

                    // remove path from set
                    ObjectId blobId = paths.remove(path);
                    result.blobCount++;

                    // index the blob metadata
                    String blobAuthor = getAuthor(commit);
                    String blobCommitter = getCommitter(commit);
                    String blobDate = DateTools.timeToString(commit.getCommitTime() * 1000L, Resolution.MINUTE);

                    Document doc = new Document();
                    doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.blob.name(),
                            StringField.TYPE_STORED));
                    doc.add(new Field(FIELD_BRANCH, branchName, TextField.TYPE_STORED));
                    doc.add(new Field(FIELD_COMMIT, commit.getName(), TextField.TYPE_STORED));
                    doc.add(new Field(FIELD_PATH, path, TextField.TYPE_STORED));
                    doc.add(new Field(FIELD_DATE, blobDate, StringField.TYPE_STORED));
                    doc.add(new Field(FIELD_AUTHOR, blobAuthor, TextField.TYPE_STORED));
                    doc.add(new Field(FIELD_COMMITTER, blobCommitter, TextField.TYPE_STORED));

                    // determine extension to compare to the extension
                    // blacklist
                    String ext = null;
                    String name = path.toLowerCase();
                    if (name.indexOf('.') > -1) {
                        ext = name.substring(name.lastIndexOf('.') + 1);
                    }

                    // index the blob content
                    if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) {
                        ObjectLoader ldr = repository.open(blobId, Constants.OBJ_BLOB);
                        InputStream in = ldr.openStream();
                        int n;
                        while ((n = in.read(tmp)) > 0) {
                            os.write(tmp, 0, n);
                        }
                        in.close();
                        byte[] content = os.toByteArray();
                        String str = StringUtils.decodeString(content, encodings);
                        doc.add(new Field(FIELD_CONTENT, str, TextField.TYPE_STORED));
                        os.reset();
                    }

                    // add the blob to the index
                    writer.addDocument(doc);
                }
            }

            os.close();

            // index the tip commit object
            if (indexedCommits.add(tipId)) {
                Document doc = createDocument(tip, tags.get(tipId));
                doc.add(new Field(FIELD_BRANCH, branchName, TextField.TYPE_STORED));
                writer.addDocument(doc);
                result.commitCount += 1;
                result.branchCount += 1;
            }

            // traverse the log and index the previous commit objects
            RevWalk historyWalk = new RevWalk(reader);
            historyWalk.markStart(historyWalk.parseCommit(tip.getId()));
            RevCommit rev;
            while ((rev = historyWalk.next()) != null) {
                String hash = rev.getId().getName();
                if (indexedCommits.add(hash)) {
                    Document doc = createDocument(rev, tags.get(hash));
                    doc.add(new Field(FIELD_BRANCH, branchName, TextField.TYPE_STORED));
                    writer.addDocument(doc);
                    result.commitCount += 1;
                }
            }
        }

        // finished
        reader.close();

        // commit all changes and reset the searcher
        config.save();
        writer.commit();
        resetIndexSearcher(model.name);
        result.success();
    } catch (Exception e) {
        logger.error("Exception while reindexing " + model.name, e);
    }
    return result;
}

From source file:com.gitblit.service.LuceneService.java

License:Apache License

/**
 * Incrementally update the index with the specified commit for the
 * repository.//from   w w w.  ja v  a  2  s  .  c  o  m
 *
 * @param repositoryName
 * @param repository
 * @param branch
 *            the fully qualified branch name (e.g. refs/heads/master)
 * @param commit
 * @return true, if successful
 */
private IndexResult index(String repositoryName, Repository repository, String branch, RevCommit commit) {
    IndexResult result = new IndexResult();
    try {
        String[] encodings = storedSettings.getStrings(Keys.web.blobEncodings).toArray(new String[0]);
        List<PathChangeModel> changedPaths = JGitUtils.getFilesInCommit(repository, commit);
        String revDate = DateTools.timeToString(commit.getCommitTime() * 1000L, Resolution.MINUTE);
        IndexWriter writer = getIndexWriter(repositoryName);
        for (PathChangeModel path : changedPaths) {
            if (path.isSubmodule()) {
                continue;
            }
            // delete the indexed blob
            deleteBlob(repositoryName, branch, path.name);

            // re-index the blob
            if (!ChangeType.DELETE.equals(path.changeType)) {
                result.blobCount++;
                Document doc = new Document();
                doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.blob.name(), StringField.TYPE_STORED));
                doc.add(new Field(FIELD_BRANCH, branch, TextField.TYPE_STORED));
                doc.add(new Field(FIELD_COMMIT, commit.getName(), TextField.TYPE_STORED));
                doc.add(new Field(FIELD_PATH, path.path, TextField.TYPE_STORED));
                doc.add(new Field(FIELD_DATE, revDate, StringField.TYPE_STORED));
                doc.add(new Field(FIELD_AUTHOR, getAuthor(commit), TextField.TYPE_STORED));
                doc.add(new Field(FIELD_COMMITTER, getCommitter(commit), TextField.TYPE_STORED));

                // determine extension to compare to the extension
                // blacklist
                String ext = null;
                String name = path.name.toLowerCase();
                if (name.indexOf('.') > -1) {
                    ext = name.substring(name.lastIndexOf('.') + 1);
                }

                if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) {
                    // read the blob content
                    String str = JGitUtils.getStringContent(repository, commit.getTree(), path.path, encodings);
                    if (str != null) {
                        doc.add(new Field(FIELD_CONTENT, str, TextField.TYPE_STORED));
                        writer.addDocument(doc);
                    }
                }
            }
        }
        writer.commit();

        // get any annotated commit tags
        List<String> commitTags = new ArrayList<String>();
        for (RefModel ref : JGitUtils.getTags(repository, false, -1)) {
            if (ref.isAnnotatedTag() && ref.getReferencedObjectId().equals(commit.getId())) {
                commitTags.add(ref.displayName);
            }
        }

        // create and write the Lucene document
        Document doc = createDocument(commit, commitTags);
        doc.add(new Field(FIELD_BRANCH, branch, TextField.TYPE_STORED));
        result.commitCount++;
        result.success = index(repositoryName, doc);
    } catch (Exception e) {
        logger.error(MessageFormat.format("Exception while indexing commit {0} in {1}",
                commit.getId().getName(), repositoryName), e);
    }
    return result;
}

From source file:com.gitblit.service.LuceneService.java

License:Apache License

/**
 * Creates a Lucene document for a commit
 *
 * @param commit//from   ww w.jav  a2 s  .  co  m
 * @param tags
 * @return a Lucene document
 */
private Document createDocument(RevCommit commit, List<String> tags) {
    Document doc = new Document();
    doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.commit.name(), StringField.TYPE_STORED));
    doc.add(new Field(FIELD_COMMIT, commit.getName(), TextField.TYPE_STORED));
    doc.add(new Field(FIELD_DATE, DateTools.timeToString(commit.getCommitTime() * 1000L, Resolution.MINUTE),
            StringField.TYPE_STORED));
    doc.add(new Field(FIELD_AUTHOR, getAuthor(commit), TextField.TYPE_STORED));
    doc.add(new Field(FIELD_COMMITTER, getCommitter(commit), TextField.TYPE_STORED));
    doc.add(new Field(FIELD_SUMMARY, commit.getShortMessage(), TextField.TYPE_STORED));
    doc.add(new Field(FIELD_CONTENT, commit.getFullMessage(), TextField.TYPE_STORED));
    if (!ArrayUtils.isEmpty(tags)) {
        doc.add(new Field(FIELD_TAG, StringUtils.flattenStrings(tags), TextField.TYPE_STORED));
    }
    return doc;
}

From source file:com.googlecode.lucene.spatial.strategy.prefix.TermQueryGridStrategyTestCase.java

License:Apache License

@Test
public void testPrefixGridLosAngeles() throws IOException {

    Shape point = new PointImpl(-118.243680, 34.052230);

    Document losAngeles = new Document();
    losAngeles.add(new Field("name", "Los Angeles", StringField.TYPE_STORED));
    losAngeles.add(strategy.createField(fieldInfo, point, true, true));

    addDocumentsAndCommit(Arrays.asList(losAngeles));

    // Polygon won't work with SimpleSpatialContext
    SpatialArgsParser spatialArgsParser = new SpatialArgsParser();
    SpatialArgs spatialArgs = spatialArgsParser.parse(
            "IsWithin(POLYGON((-127.00390625 39.8125,-112.765625 39.98828125,-111.53515625 31.375,-125.94921875 30.14453125,-127.00390625 39.8125)))",
            ctx);//from w w w. j  av a  2  s  .c o  m

    Query query = strategy.makeQuery(spatialArgs, fieldInfo);
    SearchResults searchResults = executeQuery(query, 1);
    assertEquals(1, searchResults.numFound);
}

From source file:com.googlecode.lucene.spatial.strategy.prefix.TestTermQueryGridStrategy.java

License:Apache License

@Test
public void testNGramPrefixGridLosAngeles() throws IOException {
    final JtsSpatialContext ctx = JtsSpatialContext.GEO_KM;
    final QuadPrefixTree grid = new QuadPrefixTree(ctx);

    SimpleSpatialFieldInfo fieldInfo = new SimpleSpatialFieldInfo("geo");
    TermQueryPrefixTreeStrategy prefixGridStrategy = new TermQueryPrefixTreeStrategy(grid);

    Shape point = new PointImpl(-118.243680, 34.052230);

    Document losAngeles = new Document();
    losAngeles.add(new Field("name", "Los Angeles", StringField.TYPE_STORED));
    losAngeles.add(prefixGridStrategy.createField(fieldInfo, point, true, true));

    addDocumentsAndCommit(Arrays.asList(losAngeles));

    // This won't work with simple spatial context...
    SpatialArgsParser spatialArgsParser = new SpatialArgsParser();

    SpatialArgs spatialArgs = spatialArgsParser.parse(
            "IsWithin(POLYGON((-127.00390625 39.8125,-112.765625 39.98828125,-111.53515625 31.375,-125.94921875 30.14453125,-127.00390625 39.8125)))",
            ctx);/*from   w ww  . jav  a2 s  .  co m*/

    Query query = prefixGridStrategy.makeQuery(spatialArgs, fieldInfo);
    SearchResults searchResults = executeQuery(query, 1);
    assertEquals(1, searchResults.numFound);
}