Example usage for org.apache.lucene.facet.taxonomy.directory DirectoryTaxonomyWriter DirectoryTaxonomyWriter

List of usage examples for org.apache.lucene.facet.taxonomy.directory DirectoryTaxonomyWriter DirectoryTaxonomyWriter

Introduction

In this page you can find the example usage for org.apache.lucene.facet.taxonomy.directory DirectoryTaxonomyWriter DirectoryTaxonomyWriter.

Prototype

public DirectoryTaxonomyWriter(Directory d) throws IOException 

Source Link

Document

Create this with OpenMode.CREATE_OR_APPEND .

Usage

From source file:com.czw.search.lucene.example.facet.AssociationsFacetsExample.java

License:Apache License

/** Build the example index. */
private void index() throws IOException {
    IndexWriterConfig iwc = new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE);
    IndexWriter indexWriter = new IndexWriter(indexDir, iwc);

    // Writes facet ords to a separate directory from the main index
    DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);

    Document doc = new Document();
    // 3 occurrences for tag 'lucene'
    doc.add(new IntAssociationFacetField(3, "tags", "lucene"));
    // 87% confidence level of genre 'computing'
    doc.add(new FloatAssociationFacetField(0.87f, "genre", "computing"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    doc = new Document();
    // 1 occurrence for tag 'lucene'
    doc.add(new IntAssociationFacetField(1, "tags", "lucene"));
    // 2 occurrence for tag 'solr'
    doc.add(new IntAssociationFacetField(2, "tags", "solr"));
    // 75% confidence level of genre 'computing'
    doc.add(new FloatAssociationFacetField(0.75f, "genre", "computing"));
    // 34% confidence level of genre 'software'
    doc.add(new FloatAssociationFacetField(0.34f, "genre", "software"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    indexWriter.close();/*w w  w. j ava 2  s  .c  o m*/
    taxoWriter.close();
}

From source file:com.czw.search.lucene.example.facet.ExpressionAggregationFacetsExample.java

License:Apache License

/** Build the example index. */
private void index() throws IOException {
    IndexWriter indexWriter = new IndexWriter(indexDir,
            new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE));

    // Writes facet ords to a separate directory from the main index
    DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);

    Document doc = new Document();
    doc.add(new TextField("c", "foo bar", Store.NO));
    doc.add(new NumericDocValuesField("popularity", 5L));
    doc.add(new FacetField("A", "B"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    doc = new Document();
    doc.add(new TextField("c", "foo foo bar", Store.NO));
    doc.add(new NumericDocValuesField("popularity", 3L));
    doc.add(new FacetField("A", "C"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    indexWriter.close();/*w  w  w.  j  av  a 2 s  .com*/
    taxoWriter.close();
}

From source file:com.czw.search.lucene.example.facet.MultiCategoryListsFacetsExample.java

License:Apache License

/** Build the example index. */
private void index() throws IOException {
    IndexWriter indexWriter = new IndexWriter(indexDir,
            new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE));

    // Writes facet ords to a separate directory from the main index
    DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);

    Document doc = new Document();
    doc.add(new FacetField("Author", "Bob"));
    doc.add(new FacetField("Publish Date", "2010", "10", "15"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    doc = new Document();
    doc.add(new FacetField("Author", "Lisa"));
    doc.add(new FacetField("Publish Date", "2010", "10", "20"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    doc = new Document();
    doc.add(new FacetField("Author", "Lisa"));
    doc.add(new FacetField("Publish Date", "2012", "1", "1"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    doc = new Document();
    doc.add(new FacetField("Author", "Susan"));
    doc.add(new FacetField("Publish Date", "2012", "1", "7"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    doc = new Document();
    doc.add(new FacetField("Author", "Frank"));
    doc.add(new FacetField("Publish Date", "1999", "5", "5"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    indexWriter.close();/* w ww.j av a2  s .c o m*/
    taxoWriter.close();
}

From source file:com.czw.search.lucene.example.facet.SimpleFacetsExample.java

License:Apache License

/**
 * Build the example index.//from w w w  .ja  v a 2s.c  o  m
 */
private void index() throws IOException {
    IndexWriter indexWriter = new IndexWriter(indexDir,
            new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE));

    // Writes facet ords to a separate directory from the main index
    DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);

    Document doc = new Document();
    doc.add(new FacetField("Author", "Bob"));
    doc.add(new FacetField("Publish Date", "2010", "10", "15"));
    doc.add(new FacetField("Category", "art"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    doc = new Document();
    doc.add(new FacetField("Author", "Bob"));
    doc.add(new FacetField("Publish Date", "2010", "10", "15"));
    doc.add(new FacetField("Category", "biography"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    doc = new Document();
    doc.add(new FacetField("Author", "Lisa"));
    doc.add(new FacetField("Publish Date", "2010", "10", "20"));
    doc.add(new FacetField("Category", "fiction"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    doc = new Document();
    doc.add(new FacetField("Author", "Lisa"));
    doc.add(new FacetField("Publish Date", "2012", "1", "1"));
    doc.add(new FacetField("Category", "food"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    doc = new Document();
    doc.add(new FacetField("Author", "Susan"));
    doc.add(new FacetField("Publish Date", "2012", "1", "7"));
    doc.add(new FacetField("Category", "science"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    doc = new Document();
    doc.add(new FacetField("Author", "Frank"));
    doc.add(new FacetField("Publish Date", "1999", "5", "5"));
    doc.add(new FacetField("Category", "crime"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    doc = new Document();
    doc.add(new FacetField("Author", "Frank"));
    doc.add(new FacetField("Publish Date", "2011", "5", "15"));
    doc.add(new FacetField("Category", "fiction"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    doc = new Document();
    doc.add(new FacetField("Author", "Frank"));
    doc.add(new FacetField("Publish Date", "2017", "6", "5"));
    doc.add(new FacetField("Category", "science"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    indexWriter.close();
    taxoWriter.close();
}

From source file:com.justinleegrant.myluceneplayground.SimpleFacetsExample.java

License:Apache License

/** Build the example index. */
private void index() throws IOException {
    IndexWriter indexWriter = new IndexWriter(indexDir,
            new IndexWriterConfig(Version.LATEST, new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE));

    // Writes facet ords to a separate directory from the main index
    DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);

    Document doc = new Document();
    doc.add(new FacetField("Author", "Bob"));
    doc.add(new FacetField("Publish Date", "2010", "10", "15"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    doc = new Document();
    doc.add(new FacetField("Author", "Lisa"));
    doc.add(new FacetField("Publish Date", "2010", "10", "20"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    doc = new Document();
    doc.add(new FacetField("Author", "Lisa"));
    doc.add(new FacetField("Publish Date", "2012", "1", "1"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    doc = new Document();
    doc.add(new FacetField("Author", "Susan"));
    doc.add(new FacetField("Publish Date", "2012", "1", "7"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    doc = new Document();
    doc.add(new FacetField("Author", "Frank"));
    doc.add(new FacetField("Publish Date", "1999", "5", "5"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    indexWriter.close();//from   ww w.  j a  va  2 s.c o  m
    taxoWriter.close();
}

From source file:com.m3958.apps.pcms.lucene.facet.MultiCategoryListsFacetsExample.java

License:Apache License

/** Build the example index. */
private void index() throws IOException {
    IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(FacetExamples.EXAMPLES_VER,
            new WhitespaceAnalyzer(FacetExamples.EXAMPLES_VER)));

    // Writes facet ords to a separate directory from the main index
    DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);

    // Reused across documents, to add the necessary facet fields
    FacetFields facetFields = new FacetFields(taxoWriter, indexingParams);

    add(indexWriter, facetFields, "Author/Bob", "Publish Date/2010/10/15");
    add(indexWriter, facetFields, "Author/Lisa", "Publish Date/2010/10/20");
    add(indexWriter, facetFields, "Author/Lisa", "Publish Date/2012/1/1");
    add(indexWriter, facetFields, "Author/Susan", "Publish Date/2012/1/7");
    add(indexWriter, facetFields, "Author/Frank", "Publish Date/1999/5/5");

    indexWriter.close();/*  ww w.jav  a 2s  .  c  o m*/
    taxoWriter.close();
}

From source file:com.m3958.apps.pcms.lucene.facet.SimpleFacetsExample.java

License:Apache License

/** Build the example index. */
private void index() throws IOException {
    IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(FacetExamples.EXAMPLES_VER,
            new WhitespaceAnalyzer(FacetExamples.EXAMPLES_VER)));

    // Writes facet ords to a separate directory from the main index
    DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);

    // Reused across documents, to add the necessary facet fields
    FacetFields facetFields = new FacetFields(taxoWriter);

    add(indexWriter, facetFields, "Author/Bob", "Publish Date/2010/10/15");
    add(indexWriter, facetFields, "Author/Lisa", "Publish Date/2010/10/20");
    add(indexWriter, facetFields, "Author/Lisa", "Publish Date/2012/1/1");
    add(indexWriter, facetFields, "Author/Susan", "Publish Date/2012/1/7");
    add(indexWriter, facetFields, "Author/Frank", "Publish Date/1999/5/5");

    indexWriter.close();//w  ww .j a v a  2 s . c om
    taxoWriter.close();
}

From source file:com.orientechnologies.lucene.test.LuceneNativeFacet.java

License:Apache License

/** Build the example index. */
private void index() throws IOException {
    IndexWriter indexWriter = new IndexWriter(indexDir,
            new IndexWriterConfig(Version.LUCENE_47, new WhitespaceAnalyzer(Version.LUCENE_47))
                    .setOpenMode(OpenMode.CREATE));

    // Writes facet ords to a separate directory from the main index
    DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);

    Document doc = new Document();
    doc.add(new FacetField("Author", "Bob"));
    doc.add(new FacetField("Publish Date", "2010", "10", "15"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    doc = new Document();
    doc.add(new FacetField("Author", "Lisa"));
    doc.add(new FacetField("Publish Date", "2010", "10", "20"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    doc = new Document();
    doc.add(new FacetField("Author", "Lisa"));
    doc.add(new FacetField("Publish Date", "2012", "1", "1"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    doc = new Document();
    doc.add(new FacetField("Author", "Susan"));
    doc.add(new FacetField("Publish Date", "2012", "1", "7"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    doc = new Document();
    doc.add(new FacetField("Author", "Frank"));
    doc.add(new FacetField("Publish Date", "1999", "5", "5"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    indexWriter.close();/*  ww w . j  a va  2  s.  c o m*/
    taxoWriter.close();
}

From source file:com.searchcode.app.service.CodeIndexer.java

License:Open Source License

/**
 * Given a queue of documents to index, index them by popping the queue limited to default of 1000 items.
 * This method must be synchronized as we have not added any logic to deal with multiple threads writing to the
 * index.//from  w  w  w  .jav a  2  s . com
 * TODO investigate how Lucene deals with multiple writes
 */
public synchronized void indexDocuments(Queue<CodeIndexDocument> codeIndexDocumentQueue) throws IOException {
    Directory indexDirectory = FSDirectory.open(this.INDEX_LOCATION);
    Directory facetDirectory = FSDirectory.open(this.FACET_LOCATION);

    Analyzer analyzer = new CodeAnalyzer();
    IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);
    FacetsConfig facetsConfig;

    indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);

    IndexWriter writer = new IndexWriter(indexDirectory, indexWriterConfig);
    TaxonomyWriter taxonomyWriter = new DirectoryTaxonomyWriter(facetDirectory);

    try {
        CodeIndexDocument codeIndexDocument = codeIndexDocumentQueue.poll();
        int count = 0;

        while (codeIndexDocument != null) {
            Singleton.getLogger()
                    .info("Indexing file " + codeIndexDocument.getRepoLocationRepoNameLocationFilename());
            this.sharedService.decrementCodeIndexLinesCount(codeIndexDocument.getCodeLines());

            facetsConfig = new FacetsConfig();
            facetsConfig.setIndexFieldName(Values.LANGUAGENAME, Values.LANGUAGENAME);
            facetsConfig.setIndexFieldName(Values.REPONAME, Values.REPONAME);
            facetsConfig.setIndexFieldName(Values.CODEOWNER, Values.CODEOWNER);

            Document doc = this.buildDocument(codeIndexDocument);

            writer.updateDocument(
                    new Term(Values.PATH, codeIndexDocument.getRepoLocationRepoNameLocationFilename()),
                    facetsConfig.build(taxonomyWriter, doc));

            count++;
            if (count >= INDEX_QUEUE_BATCH_SIZE) {
                codeIndexDocument = null;
            } else {
                codeIndexDocument = codeIndexDocumentQueue.poll();
            }
        }
    } finally {
        try {
            writer.close();
        } finally {
            taxonomyWriter.close();
        }
        Singleton.getLogger().info("Closing writers");
    }
}

From source file:com.searchcode.app.service.CodeIndexer.java

License:Open Source License

/**
 * Given a queue of documents to index, index them by popping the queue limited to 1000 items.
 * This method must be synchronized as we have not added any logic to deal with multiple threads writing to the
 * index.//from   www  .java  2 s. c o  m
 * TODO investigate how Lucene deals with multiple writes
 */
public synchronized void indexTimeDocuments(Queue<CodeIndexDocument> codeIndexDocumentQueue)
        throws IOException {
    // Index all documents and commit at the end for performance gains
    Directory dir = FSDirectory.open(Paths.get(
            Properties.getProperties().getProperty(Values.TIMEINDEXLOCATION, Values.DEFAULTTIMEINDEXLOCATION)));
    Directory facetsdir = FSDirectory.open(Paths.get(Properties.getProperties()
            .getProperty(Values.TIMEINDEXFACETLOCATION, Values.DEFAULTTIMEINDEXFACETLOCATION)));

    Analyzer analyzer = new CodeAnalyzer();
    IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
    FacetsConfig facetsConfig;

    iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);

    IndexWriter writer = new IndexWriter(dir, iwc);
    TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(facetsdir);

    try {
        CodeIndexDocument codeIndexDocument = codeIndexDocumentQueue.poll();
        int count = 0;

        while (codeIndexDocument != null) {
            Singleton.getLogger()
                    .info("Indexing time file " + codeIndexDocument.getRepoLocationRepoNameLocationFilename());
            this.sharedService.decrementCodeIndexLinesCount(codeIndexDocument.getCodeLines());

            Document doc = new Document();
            // Path is the primary key for documents
            // needs to include repo location, project name and then filepath including file and revision
            Field pathField = new StringField("path",
                    codeIndexDocument.getRepoLocationRepoNameLocationFilename() + ":"
                            + codeIndexDocument.getRevision(),
                    Field.Store.YES);
            doc.add(pathField);

            // Add in facets
            facetsConfig = new FacetsConfig();
            facetsConfig.setIndexFieldName(Values.LANGUAGENAME, Values.LANGUAGENAME);
            facetsConfig.setIndexFieldName(Values.REPONAME, Values.REPONAME);
            facetsConfig.setIndexFieldName(Values.CODEOWNER, Values.CODEOWNER);
            facetsConfig.setIndexFieldName(Values.DATEYEARMONTHDAY, Values.DATEYEARMONTHDAY);
            facetsConfig.setIndexFieldName(Values.DATEYEARMONTH, Values.DATEYEARMONTH);
            facetsConfig.setIndexFieldName(Values.DATEYEAR, Values.DATEYEAR);
            facetsConfig.setIndexFieldName(Values.REVISION, Values.REVISION);
            facetsConfig.setIndexFieldName(Values.DELETED, Values.DELETED);

            if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getLanguageName()) == false) {
                doc.add(new SortedSetDocValuesFacetField(Values.LANGUAGENAME,
                        codeIndexDocument.getLanguageName()));
            }
            if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getRepoName()) == false) {
                doc.add(new SortedSetDocValuesFacetField(Values.REPONAME, codeIndexDocument.getRepoName()));
            }
            if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getCodeOwner()) == false) {
                doc.add(new SortedSetDocValuesFacetField(Values.CODEOWNER, codeIndexDocument.getCodeOwner()));
            }
            if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getYearMonthDay()) == false) {
                doc.add(new SortedSetDocValuesFacetField(Values.DATEYEARMONTHDAY,
                        codeIndexDocument.getYearMonthDay()));
            }
            if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getYearMonthDay()) == false) {
                doc.add(new SortedSetDocValuesFacetField(Values.DATEYEARMONTH,
                        codeIndexDocument.getYearMonthDay().substring(0, 6)));
            }
            if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getYearMonthDay()) == false) {
                doc.add(new SortedSetDocValuesFacetField(Values.DATEYEAR,
                        codeIndexDocument.getYearMonthDay().substring(0, 4)));
            }
            if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getRevision()) == false) {
                doc.add(new SortedSetDocValuesFacetField(Values.REVISION, codeIndexDocument.getRevision()));
            }
            if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.isDeleted()) == false) {
                doc.add(new SortedSetDocValuesFacetField(Values.DELETED, codeIndexDocument.isDeleted()));
            }

            String indexContents = Values.EMPTYSTRING;

            indexContents += this.searchcodeLib.splitKeywords(codeIndexDocument.getContents());
            indexContents += this.searchcodeLib.codeCleanPipeline(codeIndexDocument.getContents());
            this.searchcodeLib.addToSpellingCorrector(codeIndexDocument.getContents()); // Store in spelling corrector

            indexContents = indexContents.toLowerCase();

            doc.add(new TextField(Values.REPONAME, codeIndexDocument.getRepoName(), Field.Store.YES));
            doc.add(new TextField(Values.FILENAME, codeIndexDocument.getFileName(), Field.Store.YES));
            doc.add(new TextField(Values.FILELOCATION, codeIndexDocument.getFileLocation(), Field.Store.YES));
            doc.add(new TextField(Values.FILELOCATIONFILENAME, codeIndexDocument.getFileLocationFilename(),
                    Field.Store.YES));
            doc.add(new TextField(Values.MD5HASH, codeIndexDocument.getMd5hash(), Field.Store.YES));
            doc.add(new TextField(Values.LANGUAGENAME, codeIndexDocument.getLanguageName(), Field.Store.YES));
            doc.add(new IntField(Values.CODELINES, codeIndexDocument.getCodeLines(), Field.Store.YES));
            doc.add(new TextField(Values.CONTENTS, indexContents, Field.Store.NO));
            doc.add(new TextField(Values.REPOLOCATION, codeIndexDocument.getRepoRemoteLocation(),
                    Field.Store.YES));
            doc.add(new TextField(Values.CODEOWNER, codeIndexDocument.getCodeOwner(), Field.Store.YES));
            doc.add(new TextField(Values.REVISION, codeIndexDocument.getRevision(), Field.Store.YES));
            doc.add(new TextField(Values.DATEYEARMONTHDAY, codeIndexDocument.getYearMonthDay(),
                    Field.Store.YES));
            doc.add(new TextField(Values.DATEYEARMONTH, codeIndexDocument.getYearMonth(), Field.Store.YES));
            doc.add(new TextField(Values.DATEYEAR, codeIndexDocument.getYear(), Field.Store.YES));
            doc.add(new TextField(Values.MESSAGE, codeIndexDocument.getMessage(), Field.Store.YES));
            doc.add(new TextField(Values.DELETED, codeIndexDocument.isDeleted(), Field.Store.YES));

            // Extra metadata in this case when it was last indexed
            doc.add(new LongField(Values.MODIFIED, new Date().getTime(), Field.Store.YES));

            writer.updateDocument(
                    new Term(Values.PATH, codeIndexDocument.getRepoLocationRepoNameLocationFilename()),
                    facetsConfig.build(taxoWriter, doc));

            count++;
            if (count >= INDEX_QUEUE_BATCH_SIZE) {
                codeIndexDocument = null;
            } else {
                codeIndexDocument = codeIndexDocumentQueue.poll();
            }
        }
    } finally {
        Singleton.getLogger().info("Closing writers");
        writer.close();
        taxoWriter.close();
    }
}