Example usage for org.apache.lucene.index IndexWriter addDocument

List of usage examples for org.apache.lucene.index IndexWriter addDocument

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter addDocument.

Prototype

public long addDocument(Iterable<? extends IndexableField> doc) throws IOException 

Source Link

Document

Adds a document to this index.

Usage

From source file:com.concursive.connect.web.modules.reviews.dao.ProjectRatingIndexer.java

License:Open Source License

/**
 * Description of the Method//from w ww.  ja  v  a 2 s  .  c om
 *
 * @param writer   Description of the Parameter
 * @param item     Description of the Parameter
 * @param modified Description of the Parameter
 * @throws IOException Description of the Exception
 */
public void add(IndexWriter writer, Object item, boolean modified) throws IOException {
    ProjectRating projectRating = (ProjectRating) item;
    // add the document
    Document document = new Document();
    document.add(new Field("type", "reviews", Field.Store.YES, Field.Index.UN_TOKENIZED));
    document.add(new Field("ratingId", String.valueOf(projectRating.getId()), Field.Store.YES,
            Field.Index.UN_TOKENIZED));
    document.add(new Field("projectId", String.valueOf(projectRating.getProjectId()), Field.Store.YES,
            Field.Index.UN_TOKENIZED));
    if (projectRating.getProjectId() > -1) {
        // use the project's general access to speedup guest projects
        Project project = ProjectUtils.loadProject(projectRating.getProjectId());
        document.add(new Field("projectCategoryId", String.valueOf(project.getCategoryId()), Field.Store.YES,
                Field.Index.UN_TOKENIZED));
        document.add(new Field("guests", (project.getFeatures().getAllowGuests() ? "1" : "0"), Field.Store.YES,
                Field.Index.UN_TOKENIZED));
        document.add(new Field("participants", (project.getFeatures().getAllowParticipants() ? "1" : "0"),
                Field.Store.YES, Field.Index.UN_TOKENIZED));
        // determine if membership is needed for this content based on a guest's access to the data
        int membership = project.getFeatures().getMembershipRequired() ? 1 : 0;
        if (membership == 1 && ProjectUtils.hasAccess(project.getId(), UserUtils.createGuestUser(),
                "project-reviews-view")) {
            membership = 0;
        }
        document.add(
                new Field("membership", String.valueOf(membership), Field.Store.YES, Field.Index.UN_TOKENIZED));
    }
    document.add(new Field("title", projectRating.getTitle(), Field.Store.YES, Field.Index.TOKENIZED));
    document.add(new Field("titleLower", projectRating.getTitle().toLowerCase(), Field.Store.YES,
            Field.Index.UN_TOKENIZED));
    document.add(new Field("contents",
            projectRating.getTitle() + " " + ContentUtils.toText(projectRating.getComment()), Field.Store.YES,
            Field.Index.TOKENIZED));
    if (projectRating.getModified() != null) {
        SimpleDateFormat formatter = new SimpleDateFormat("yyyyMMdd");
        document.add(new Field("lastModified", String.valueOf(formatter.format(projectRating.getModified())),
                Field.Store.YES, Field.Index.UN_TOKENIZED));
    }
    writer.addDocument(document);
    if (System.getProperty("DEBUG") != null && modified) {
        System.out.println("ProjectRatingIndexer-> Added: " + projectRating.getId());
    }
}

From source file:com.concursive.connect.web.modules.wiki.dao.WikiIndexer.java

License:Open Source License

/**
 * Description of the Method// www . ja  v  a2s  . c om
 *
 * @param writer   Description of the Parameter
 * @param item     Description of the Parameter
 * @param modified Description of the Parameter
 * @throws java.io.IOException Description of the Exception
 */
public void add(IndexWriter writer, Object item, boolean modified) throws IOException {
    Wiki wiki = (Wiki) item;
    // use the project's general access to speedup guest projects
    Project project = ProjectUtils.loadProject(wiki.getProjectId());
    // Use the wiki context
    WikiToHTMLContext wikiContext = new WikiToHTMLContext(wiki, new HashMap<String, ImageInfo>(), -1, false,
            "");
    // add the document
    Document document = new Document();
    document.add(new Field("type", "wiki", Field.Store.YES, Field.Index.UN_TOKENIZED));
    document.add(new Field("wikiId", String.valueOf(wiki.getId()), Field.Store.YES, Field.Index.UN_TOKENIZED));
    document.add(new Field("projectId", String.valueOf(wiki.getProjectId()), Field.Store.YES,
            Field.Index.UN_TOKENIZED));
    document.add(new Field("projectCategoryId", String.valueOf(project.getCategoryId()), Field.Store.YES,
            Field.Index.UN_TOKENIZED));
    document.add(new Field("instanceId", String.valueOf(project.getInstanceId()), Field.Store.YES,
            Field.Index.UN_TOKENIZED));
    // use the project to determine when this data can be viewed...
    document.add(new Field("guests", (project.getFeatures().getAllowGuests() ? "1" : "0"), Field.Store.YES,
            Field.Index.UN_TOKENIZED));
    document.add(new Field("participants", (project.getFeatures().getAllowParticipants() ? "1" : "0"),
            Field.Store.YES, Field.Index.UN_TOKENIZED));
    // determine if membership is needed for this content based on a guest's access to the data
    int membership = project.getFeatures().getMembershipRequired() ? 1 : 0;
    if (membership == 1
            && ProjectUtils.hasAccess(project.getId(), UserUtils.createGuestUser(), "project-wiki-view")) {
        membership = 0;
    }
    document.add(
            new Field("membership", String.valueOf(membership), Field.Store.YES, Field.Index.UN_TOKENIZED));
    document.add(new Field("title", wiki.getSubject(), Field.Store.YES, Field.Index.TOKENIZED));
    document.add(new Field("titleLower", wiki.getSubject().toLowerCase(), Field.Store.YES,
            Field.Index.UN_TOKENIZED));
    document.add(new Field("contents",
            wiki.getSubject() + " "
                    + ContentUtils.toText(ContentUtils
                            .stripHTML(StringUtils.fromHtmlValue(WikiToHTMLUtils.getHTML(wikiContext, null)))),
            Field.Store.YES, Field.Index.TOKENIZED));
    document.add(new Field("subjectLink", wiki.getSubjectLink(), Field.Store.YES, Field.Index.TOKENIZED));
    if (modified) {
        document.add(new Field("modified", String.valueOf(System.currentTimeMillis()), Field.Store.YES,
                Field.Index.UN_TOKENIZED));
    } else {
        document.add(new Field("modified", String.valueOf(wiki.getModified().getTime()), Field.Store.YES,
                Field.Index.UN_TOKENIZED));
    }
    writer.addDocument(document);
    if (System.getProperty("DEBUG") != null && modified) {
        System.out.println("WikiIndexer-> Added: " + wiki.getId());
    }
}

From source file:com.czw.search.lucene.example.facet.AssociationsFacetsExample.java

License:Apache License

/** Build the example index. */
private void index() throws IOException {
    IndexWriterConfig iwc = new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE);
    IndexWriter indexWriter = new IndexWriter(indexDir, iwc);

    // Writes facet ords to a separate directory from the main index
    DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);

    Document doc = new Document();
    // 3 occurrences for tag 'lucene'
    doc.add(new IntAssociationFacetField(3, "tags", "lucene"));
    // 87% confidence level of genre 'computing'
    doc.add(new FloatAssociationFacetField(0.87f, "genre", "computing"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    doc = new Document();
    // 1 occurrence for tag 'lucene'
    doc.add(new IntAssociationFacetField(1, "tags", "lucene"));
    // 2 occurrence for tag 'solr'
    doc.add(new IntAssociationFacetField(2, "tags", "solr"));
    // 75% confidence level of genre 'computing'
    doc.add(new FloatAssociationFacetField(0.75f, "genre", "computing"));
    // 34% confidence level of genre 'software'
    doc.add(new FloatAssociationFacetField(0.34f, "genre", "software"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    indexWriter.close();//from  w w  w.  j ava 2 s.c o m
    taxoWriter.close();
}

From source file:com.czw.search.lucene.example.facet.DistanceFacetsExample.java

License:Apache License

/** Build the example index. */
public void index() throws IOException {
    IndexWriter writer = new IndexWriter(indexDir,
            new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE));

    // TODO: we could index in radians instead ... saves all the conversions in getBoundingBoxFilter

    // Add documents with latitude/longitude location:
    // we index these both as DoublePoints (for bounding box/ranges) and as NumericDocValuesFields (for scoring)
    Document doc = new Document();
    doc.add(new DoublePoint("latitude", 40.759011));
    doc.add(new NumericDocValuesField("latitude", Double.doubleToRawLongBits(40.759011)));
    doc.add(new DoublePoint("longitude", -73.9844722));
    doc.add(new NumericDocValuesField("longitude", Double.doubleToRawLongBits(-73.9844722)));
    writer.addDocument(doc);

    doc = new Document();
    doc.add(new DoublePoint("latitude", 40.718266));
    doc.add(new NumericDocValuesField("latitude", Double.doubleToRawLongBits(40.718266)));
    doc.add(new DoublePoint("longitude", -74.007819));
    doc.add(new NumericDocValuesField("longitude", Double.doubleToRawLongBits(-74.007819)));
    writer.addDocument(doc);//ww w .  ja  v a2  s  . c  om

    doc = new Document();
    doc.add(new DoublePoint("latitude", 40.7051157));
    doc.add(new NumericDocValuesField("latitude", Double.doubleToRawLongBits(40.7051157)));
    doc.add(new DoublePoint("longitude", -74.0088305));
    doc.add(new NumericDocValuesField("longitude", Double.doubleToRawLongBits(-74.0088305)));
    writer.addDocument(doc);

    // Open near-real-time searcher
    searcher = new IndexSearcher(DirectoryReader.open(writer));
    writer.close();
}

From source file:com.czw.search.lucene.example.facet.ExpressionAggregationFacetsExample.java

License:Apache License

/** Build the example index. */
private void index() throws IOException {
    IndexWriter indexWriter = new IndexWriter(indexDir,
            new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE));

    // Writes facet ords to a separate directory from the main index
    DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);

    Document doc = new Document();
    doc.add(new TextField("c", "foo bar", Store.NO));
    doc.add(new NumericDocValuesField("popularity", 5L));
    doc.add(new FacetField("A", "B"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    doc = new Document();
    doc.add(new TextField("c", "foo foo bar", Store.NO));
    doc.add(new NumericDocValuesField("popularity", 3L));
    doc.add(new FacetField("A", "C"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    indexWriter.close();/*  w w  w. j a va2s.  c  o m*/
    taxoWriter.close();
}

From source file:com.czw.search.lucene.example.facet.MultiCategoryListsFacetsExample.java

License:Apache License

/** Build the example index. */
private void index() throws IOException {
    IndexWriter indexWriter = new IndexWriter(indexDir,
            new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE));

    // Writes facet ords to a separate directory from the main index
    DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);

    Document doc = new Document();
    doc.add(new FacetField("Author", "Bob"));
    doc.add(new FacetField("Publish Date", "2010", "10", "15"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    doc = new Document();
    doc.add(new FacetField("Author", "Lisa"));
    doc.add(new FacetField("Publish Date", "2010", "10", "20"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    doc = new Document();
    doc.add(new FacetField("Author", "Lisa"));
    doc.add(new FacetField("Publish Date", "2012", "1", "1"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    doc = new Document();
    doc.add(new FacetField("Author", "Susan"));
    doc.add(new FacetField("Publish Date", "2012", "1", "7"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    doc = new Document();
    doc.add(new FacetField("Author", "Frank"));
    doc.add(new FacetField("Publish Date", "1999", "5", "5"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    indexWriter.close();/*from  w  w w  . j  av a  2  s . co  m*/
    taxoWriter.close();
}

From source file:com.czw.search.lucene.example.facet.RangeFacetsExample.java

License:Apache License

/** Build the example index. */
public void index() throws IOException {
    IndexWriter indexWriter = new IndexWriter(indexDir,
            new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE));

    // Add documents with a fake timestamp, 1000 sec before
    // "now", 2000 sec before "now", ...:
    for (int i = 0; i < 100; i++) {
        Document doc = new Document();
        long then = nowSec - i * 1000;
        // Add as doc values field, so we can compute range facets:
        doc.add(new NumericDocValuesField("timestamp", then));
        // Add as numeric field so we can drill-down:
        doc.add(new LongPoint("timestamp", then));
        indexWriter.addDocument(doc);
    }/*from  w  ww. ja va2  s .com*/

    // Open near-real-time searcher
    searcher = new IndexSearcher(DirectoryReader.open(indexWriter));
    indexWriter.close();
}

From source file:com.czw.search.lucene.example.facet.SimpleFacetsExample.java

License:Apache License

/**
 * Build the example index./* www.  ja v a  2  s .  co  m*/
 */
private void index() throws IOException {
    IndexWriter indexWriter = new IndexWriter(indexDir,
            new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE));

    // Writes facet ords to a separate directory from the main index
    DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);

    Document doc = new Document();
    doc.add(new FacetField("Author", "Bob"));
    doc.add(new FacetField("Publish Date", "2010", "10", "15"));
    doc.add(new FacetField("Category", "art"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    doc = new Document();
    doc.add(new FacetField("Author", "Bob"));
    doc.add(new FacetField("Publish Date", "2010", "10", "15"));
    doc.add(new FacetField("Category", "biography"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    doc = new Document();
    doc.add(new FacetField("Author", "Lisa"));
    doc.add(new FacetField("Publish Date", "2010", "10", "20"));
    doc.add(new FacetField("Category", "fiction"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    doc = new Document();
    doc.add(new FacetField("Author", "Lisa"));
    doc.add(new FacetField("Publish Date", "2012", "1", "1"));
    doc.add(new FacetField("Category", "food"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    doc = new Document();
    doc.add(new FacetField("Author", "Susan"));
    doc.add(new FacetField("Publish Date", "2012", "1", "7"));
    doc.add(new FacetField("Category", "science"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    doc = new Document();
    doc.add(new FacetField("Author", "Frank"));
    doc.add(new FacetField("Publish Date", "1999", "5", "5"));
    doc.add(new FacetField("Category", "crime"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    doc = new Document();
    doc.add(new FacetField("Author", "Frank"));
    doc.add(new FacetField("Publish Date", "2011", "5", "15"));
    doc.add(new FacetField("Category", "fiction"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    doc = new Document();
    doc.add(new FacetField("Author", "Frank"));
    doc.add(new FacetField("Publish Date", "2017", "6", "5"));
    doc.add(new FacetField("Category", "science"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    indexWriter.close();
    taxoWriter.close();
}

From source file:com.czw.search.lucene.example.facet.SimpleSortedSetFacetsExample.java

License:Apache License

/**
 * Build the example index./* w  ww .  j av  a  2 s .  c  om*/
 */
private void index() throws IOException {
    IndexWriter indexWriter = new IndexWriter(indexDir,
            new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE));

    Document doc = new Document();
    doc.add(new SortedSetDocValuesFacetField("Author", "Bob"));
    doc.add(new SortedSetDocValuesFacetField("Publish Year", "2010"));
    indexWriter.addDocument(config.build(doc));

    doc = new Document();
    doc.add(new SortedSetDocValuesFacetField("Author", "Lisa"));
    doc.add(new SortedSetDocValuesFacetField("Publish Year", "2010"));
    indexWriter.addDocument(config.build(doc));

    doc = new Document();
    doc.add(new SortedSetDocValuesFacetField("Author", "Lisa"));
    doc.add(new SortedSetDocValuesFacetField("Publish Year", "2012"));
    indexWriter.addDocument(config.build(doc));

    doc = new Document();
    doc.add(new SortedSetDocValuesFacetField("Author", "Susan"));
    doc.add(new SortedSetDocValuesFacetField("Publish Year", "2012"));
    indexWriter.addDocument(config.build(doc));

    doc = new Document();
    doc.add(new SortedSetDocValuesFacetField("Author", "Frank"));
    doc.add(new SortedSetDocValuesFacetField("Publish Year", "1999"));
    indexWriter.addDocument(config.build(doc));

    indexWriter.close();
}

From source file:com.czw.search.lucene.example.IndexFiles.java

License:Apache License

/**
 * Indexes a single document/*from  www . j  a  v  a 2  s  .  c  o  m*/
 */
static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {
    try (InputStream stream = Files.newInputStream(file)) {
        // make a new, empty document
        Document doc = new Document();

        // Add the path of the file as a field named "path".  Use a
        // field that is indexed (i.e. searchable), but don't tokenize
        // the field into separate words and don't index term frequency
        // or positional information:
        Field pathField = new StringField("path", file.toString(), Field.Store.YES);
        doc.add(pathField);

        // Add the last modified date of the file a field named "modified".
        // Use a LongPoint that is indexed (i.e. efficiently filterable with
        // PointRangeQuery).  This indexes to milli-second resolution, which
        // is often too fine.  You could instead create a number based on
        // year/month/day/hour/minutes/seconds, down the resolution you require.
        // For example the long value 2011021714 would mean
        // February 17, 2011, 2-3 PM.
        doc.add(new LongPoint("modified", lastModified));

        // Add the contents of the file to a field named "contents".  Specify a Reader,
        // so that the text of the file is tokenized and indexed, but not stored.
        // Note that FileReader expects the file to be in UTF-8 encoding.
        // If that's not the case searching for special characters will fail.
        doc.add(new TextField("contents",
                new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))));

        if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
            // New index, so we just add the document (no old document can be there):
            System.out.println("adding " + file);
            writer.addDocument(doc);
        } else {
            // Existing index (an old copy of this document may have been indexed) so
            // we use updateDocument instead to replace the old one matching the exact
            // path, if present:
            System.out.println("updating " + file);
            writer.updateDocument(new Term("path", file.toString()), doc);
        }
    }
}