List of usage examples for org.apache.lucene.facet.taxonomy.directory DirectoryTaxonomyWriter DirectoryTaxonomyWriter
public DirectoryTaxonomyWriter(Directory d) throws IOException
From source file:com.czw.search.lucene.example.facet.AssociationsFacetsExample.java
License:Apache License
/** Build the example index. */ private void index() throws IOException { IndexWriterConfig iwc = new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE); IndexWriter indexWriter = new IndexWriter(indexDir, iwc); // Writes facet ords to a separate directory from the main index DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); Document doc = new Document(); // 3 occurrences for tag 'lucene' doc.add(new IntAssociationFacetField(3, "tags", "lucene")); // 87% confidence level of genre 'computing' doc.add(new FloatAssociationFacetField(0.87f, "genre", "computing")); indexWriter.addDocument(config.build(taxoWriter, doc)); doc = new Document(); // 1 occurrence for tag 'lucene' doc.add(new IntAssociationFacetField(1, "tags", "lucene")); // 2 occurrence for tag 'solr' doc.add(new IntAssociationFacetField(2, "tags", "solr")); // 75% confidence level of genre 'computing' doc.add(new FloatAssociationFacetField(0.75f, "genre", "computing")); // 34% confidence level of genre 'software' doc.add(new FloatAssociationFacetField(0.34f, "genre", "software")); indexWriter.addDocument(config.build(taxoWriter, doc)); indexWriter.close();/*w w w. j ava 2 s .c o m*/ taxoWriter.close(); }
From source file:com.czw.search.lucene.example.facet.ExpressionAggregationFacetsExample.java
License:Apache License
/** Build the example index. */ private void index() throws IOException { IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE)); // Writes facet ords to a separate directory from the main index DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); Document doc = new Document(); doc.add(new TextField("c", "foo bar", Store.NO)); doc.add(new NumericDocValuesField("popularity", 5L)); doc.add(new FacetField("A", "B")); indexWriter.addDocument(config.build(taxoWriter, doc)); doc = new Document(); doc.add(new TextField("c", "foo foo bar", Store.NO)); doc.add(new NumericDocValuesField("popularity", 3L)); doc.add(new FacetField("A", "C")); indexWriter.addDocument(config.build(taxoWriter, doc)); indexWriter.close();/*w w w. j av a 2 s .com*/ taxoWriter.close(); }
From source file:com.czw.search.lucene.example.facet.MultiCategoryListsFacetsExample.java
License:Apache License
/** Build the example index. */ private void index() throws IOException { IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE)); // Writes facet ords to a separate directory from the main index DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); Document doc = new Document(); doc.add(new FacetField("Author", "Bob")); doc.add(new FacetField("Publish Date", "2010", "10", "15")); indexWriter.addDocument(config.build(taxoWriter, doc)); doc = new Document(); doc.add(new FacetField("Author", "Lisa")); doc.add(new FacetField("Publish Date", "2010", "10", "20")); indexWriter.addDocument(config.build(taxoWriter, doc)); doc = new Document(); doc.add(new FacetField("Author", "Lisa")); doc.add(new FacetField("Publish Date", "2012", "1", "1")); indexWriter.addDocument(config.build(taxoWriter, doc)); doc = new Document(); doc.add(new FacetField("Author", "Susan")); doc.add(new FacetField("Publish Date", "2012", "1", "7")); indexWriter.addDocument(config.build(taxoWriter, doc)); doc = new Document(); doc.add(new FacetField("Author", "Frank")); doc.add(new FacetField("Publish Date", "1999", "5", "5")); indexWriter.addDocument(config.build(taxoWriter, doc)); indexWriter.close();/* w ww.j av a2 s .c o m*/ taxoWriter.close(); }
From source file:com.czw.search.lucene.example.facet.SimpleFacetsExample.java
License:Apache License
/** * Build the example index.//from w w w .ja v a 2s.c o m */ private void index() throws IOException { IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE)); // Writes facet ords to a separate directory from the main index DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); Document doc = new Document(); doc.add(new FacetField("Author", "Bob")); doc.add(new FacetField("Publish Date", "2010", "10", "15")); doc.add(new FacetField("Category", "art")); indexWriter.addDocument(config.build(taxoWriter, doc)); doc = new Document(); doc.add(new FacetField("Author", "Bob")); doc.add(new FacetField("Publish Date", "2010", "10", "15")); doc.add(new FacetField("Category", "biography")); indexWriter.addDocument(config.build(taxoWriter, doc)); doc = new Document(); doc.add(new FacetField("Author", "Lisa")); doc.add(new FacetField("Publish Date", "2010", "10", "20")); doc.add(new FacetField("Category", "fiction")); indexWriter.addDocument(config.build(taxoWriter, doc)); doc = new Document(); doc.add(new FacetField("Author", "Lisa")); doc.add(new FacetField("Publish Date", "2012", "1", "1")); doc.add(new FacetField("Category", "food")); indexWriter.addDocument(config.build(taxoWriter, doc)); doc = new Document(); doc.add(new FacetField("Author", "Susan")); doc.add(new FacetField("Publish Date", "2012", "1", "7")); doc.add(new FacetField("Category", "science")); indexWriter.addDocument(config.build(taxoWriter, doc)); doc = new Document(); doc.add(new FacetField("Author", "Frank")); doc.add(new FacetField("Publish Date", "1999", "5", "5")); doc.add(new FacetField("Category", "crime")); indexWriter.addDocument(config.build(taxoWriter, doc)); doc = new Document(); doc.add(new FacetField("Author", "Frank")); doc.add(new FacetField("Publish Date", "2011", "5", "15")); doc.add(new FacetField("Category", "fiction")); indexWriter.addDocument(config.build(taxoWriter, doc)); doc = new Document(); doc.add(new FacetField("Author", "Frank")); doc.add(new FacetField("Publish Date", "2017", "6", "5")); doc.add(new FacetField("Category", "science")); indexWriter.addDocument(config.build(taxoWriter, doc)); indexWriter.close(); taxoWriter.close(); }
From source file:com.justinleegrant.myluceneplayground.SimpleFacetsExample.java
License:Apache License
/** Build the example index. */ private void index() throws IOException { IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(Version.LATEST, new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE)); // Writes facet ords to a separate directory from the main index DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); Document doc = new Document(); doc.add(new FacetField("Author", "Bob")); doc.add(new FacetField("Publish Date", "2010", "10", "15")); indexWriter.addDocument(config.build(taxoWriter, doc)); doc = new Document(); doc.add(new FacetField("Author", "Lisa")); doc.add(new FacetField("Publish Date", "2010", "10", "20")); indexWriter.addDocument(config.build(taxoWriter, doc)); doc = new Document(); doc.add(new FacetField("Author", "Lisa")); doc.add(new FacetField("Publish Date", "2012", "1", "1")); indexWriter.addDocument(config.build(taxoWriter, doc)); doc = new Document(); doc.add(new FacetField("Author", "Susan")); doc.add(new FacetField("Publish Date", "2012", "1", "7")); indexWriter.addDocument(config.build(taxoWriter, doc)); doc = new Document(); doc.add(new FacetField("Author", "Frank")); doc.add(new FacetField("Publish Date", "1999", "5", "5")); indexWriter.addDocument(config.build(taxoWriter, doc)); indexWriter.close();//from ww w. j a va 2 s.c o m taxoWriter.close(); }
From source file:com.m3958.apps.pcms.lucene.facet.MultiCategoryListsFacetsExample.java
License:Apache License
/** Build the example index. */ private void index() throws IOException { IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(FacetExamples.EXAMPLES_VER, new WhitespaceAnalyzer(FacetExamples.EXAMPLES_VER))); // Writes facet ords to a separate directory from the main index DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); // Reused across documents, to add the necessary facet fields FacetFields facetFields = new FacetFields(taxoWriter, indexingParams); add(indexWriter, facetFields, "Author/Bob", "Publish Date/2010/10/15"); add(indexWriter, facetFields, "Author/Lisa", "Publish Date/2010/10/20"); add(indexWriter, facetFields, "Author/Lisa", "Publish Date/2012/1/1"); add(indexWriter, facetFields, "Author/Susan", "Publish Date/2012/1/7"); add(indexWriter, facetFields, "Author/Frank", "Publish Date/1999/5/5"); indexWriter.close();/* ww w.jav a 2s . c o m*/ taxoWriter.close(); }
From source file:com.m3958.apps.pcms.lucene.facet.SimpleFacetsExample.java
License:Apache License
/** Build the example index. */ private void index() throws IOException { IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(FacetExamples.EXAMPLES_VER, new WhitespaceAnalyzer(FacetExamples.EXAMPLES_VER))); // Writes facet ords to a separate directory from the main index DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); // Reused across documents, to add the necessary facet fields FacetFields facetFields = new FacetFields(taxoWriter); add(indexWriter, facetFields, "Author/Bob", "Publish Date/2010/10/15"); add(indexWriter, facetFields, "Author/Lisa", "Publish Date/2010/10/20"); add(indexWriter, facetFields, "Author/Lisa", "Publish Date/2012/1/1"); add(indexWriter, facetFields, "Author/Susan", "Publish Date/2012/1/7"); add(indexWriter, facetFields, "Author/Frank", "Publish Date/1999/5/5"); indexWriter.close();//w ww .j a v a 2 s . c om taxoWriter.close(); }
From source file:com.orientechnologies.lucene.test.LuceneNativeFacet.java
License:Apache License
/** Build the example index. */ private void index() throws IOException { IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(Version.LUCENE_47, new WhitespaceAnalyzer(Version.LUCENE_47)) .setOpenMode(OpenMode.CREATE)); // Writes facet ords to a separate directory from the main index DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); Document doc = new Document(); doc.add(new FacetField("Author", "Bob")); doc.add(new FacetField("Publish Date", "2010", "10", "15")); indexWriter.addDocument(config.build(taxoWriter, doc)); doc = new Document(); doc.add(new FacetField("Author", "Lisa")); doc.add(new FacetField("Publish Date", "2010", "10", "20")); indexWriter.addDocument(config.build(taxoWriter, doc)); doc = new Document(); doc.add(new FacetField("Author", "Lisa")); doc.add(new FacetField("Publish Date", "2012", "1", "1")); indexWriter.addDocument(config.build(taxoWriter, doc)); doc = new Document(); doc.add(new FacetField("Author", "Susan")); doc.add(new FacetField("Publish Date", "2012", "1", "7")); indexWriter.addDocument(config.build(taxoWriter, doc)); doc = new Document(); doc.add(new FacetField("Author", "Frank")); doc.add(new FacetField("Publish Date", "1999", "5", "5")); indexWriter.addDocument(config.build(taxoWriter, doc)); indexWriter.close();/* ww w . j a va 2 s. c o m*/ taxoWriter.close(); }
From source file:com.searchcode.app.service.CodeIndexer.java
License:Open Source License
/** * Given a queue of documents to index, index them by popping the queue limited to default of 1000 items. * This method must be synchronized as we have not added any logic to deal with multiple threads writing to the * index.//from w w w .jav a 2 s . com * TODO investigate how Lucene deals with multiple writes */ public synchronized void indexDocuments(Queue<CodeIndexDocument> codeIndexDocumentQueue) throws IOException { Directory indexDirectory = FSDirectory.open(this.INDEX_LOCATION); Directory facetDirectory = FSDirectory.open(this.FACET_LOCATION); Analyzer analyzer = new CodeAnalyzer(); IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer); FacetsConfig facetsConfig; indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); IndexWriter writer = new IndexWriter(indexDirectory, indexWriterConfig); TaxonomyWriter taxonomyWriter = new DirectoryTaxonomyWriter(facetDirectory); try { CodeIndexDocument codeIndexDocument = codeIndexDocumentQueue.poll(); int count = 0; while (codeIndexDocument != null) { Singleton.getLogger() .info("Indexing file " + codeIndexDocument.getRepoLocationRepoNameLocationFilename()); this.sharedService.decrementCodeIndexLinesCount(codeIndexDocument.getCodeLines()); facetsConfig = new FacetsConfig(); facetsConfig.setIndexFieldName(Values.LANGUAGENAME, Values.LANGUAGENAME); facetsConfig.setIndexFieldName(Values.REPONAME, Values.REPONAME); facetsConfig.setIndexFieldName(Values.CODEOWNER, Values.CODEOWNER); Document doc = this.buildDocument(codeIndexDocument); writer.updateDocument( new Term(Values.PATH, codeIndexDocument.getRepoLocationRepoNameLocationFilename()), facetsConfig.build(taxonomyWriter, doc)); count++; if (count >= INDEX_QUEUE_BATCH_SIZE) { codeIndexDocument = null; } else { codeIndexDocument = codeIndexDocumentQueue.poll(); } } } finally { try { writer.close(); } finally { taxonomyWriter.close(); } Singleton.getLogger().info("Closing writers"); } }
From source file:com.searchcode.app.service.CodeIndexer.java
License:Open Source License
/** * Given a queue of documents to index, index them by popping the queue limited to 1000 items. * This method must be synchronized as we have not added any logic to deal with multiple threads writing to the * index.//from www .java 2 s. c o m * TODO investigate how Lucene deals with multiple writes */ public synchronized void indexTimeDocuments(Queue<CodeIndexDocument> codeIndexDocumentQueue) throws IOException { // Index all documents and commit at the end for performance gains Directory dir = FSDirectory.open(Paths.get( Properties.getProperties().getProperty(Values.TIMEINDEXLOCATION, Values.DEFAULTTIMEINDEXLOCATION))); Directory facetsdir = FSDirectory.open(Paths.get(Properties.getProperties() .getProperty(Values.TIMEINDEXFACETLOCATION, Values.DEFAULTTIMEINDEXFACETLOCATION))); Analyzer analyzer = new CodeAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); FacetsConfig facetsConfig; iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); IndexWriter writer = new IndexWriter(dir, iwc); TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(facetsdir); try { CodeIndexDocument codeIndexDocument = codeIndexDocumentQueue.poll(); int count = 0; while (codeIndexDocument != null) { Singleton.getLogger() .info("Indexing time file " + codeIndexDocument.getRepoLocationRepoNameLocationFilename()); this.sharedService.decrementCodeIndexLinesCount(codeIndexDocument.getCodeLines()); Document doc = new Document(); // Path is the primary key for documents // needs to include repo location, project name and then filepath including file and revision Field pathField = new StringField("path", codeIndexDocument.getRepoLocationRepoNameLocationFilename() + ":" + codeIndexDocument.getRevision(), Field.Store.YES); doc.add(pathField); // Add in facets facetsConfig = new FacetsConfig(); facetsConfig.setIndexFieldName(Values.LANGUAGENAME, Values.LANGUAGENAME); facetsConfig.setIndexFieldName(Values.REPONAME, Values.REPONAME); facetsConfig.setIndexFieldName(Values.CODEOWNER, Values.CODEOWNER); facetsConfig.setIndexFieldName(Values.DATEYEARMONTHDAY, Values.DATEYEARMONTHDAY); facetsConfig.setIndexFieldName(Values.DATEYEARMONTH, Values.DATEYEARMONTH); facetsConfig.setIndexFieldName(Values.DATEYEAR, Values.DATEYEAR); facetsConfig.setIndexFieldName(Values.REVISION, Values.REVISION); facetsConfig.setIndexFieldName(Values.DELETED, Values.DELETED); if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getLanguageName()) == false) { doc.add(new SortedSetDocValuesFacetField(Values.LANGUAGENAME, codeIndexDocument.getLanguageName())); } if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getRepoName()) == false) { doc.add(new SortedSetDocValuesFacetField(Values.REPONAME, codeIndexDocument.getRepoName())); } if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getCodeOwner()) == false) { doc.add(new SortedSetDocValuesFacetField(Values.CODEOWNER, codeIndexDocument.getCodeOwner())); } if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getYearMonthDay()) == false) { doc.add(new SortedSetDocValuesFacetField(Values.DATEYEARMONTHDAY, codeIndexDocument.getYearMonthDay())); } if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getYearMonthDay()) == false) { doc.add(new SortedSetDocValuesFacetField(Values.DATEYEARMONTH, codeIndexDocument.getYearMonthDay().substring(0, 6))); } if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getYearMonthDay()) == false) { doc.add(new SortedSetDocValuesFacetField(Values.DATEYEAR, codeIndexDocument.getYearMonthDay().substring(0, 4))); } if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getRevision()) == false) { doc.add(new SortedSetDocValuesFacetField(Values.REVISION, codeIndexDocument.getRevision())); } if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.isDeleted()) == false) { doc.add(new SortedSetDocValuesFacetField(Values.DELETED, codeIndexDocument.isDeleted())); } String indexContents = Values.EMPTYSTRING; indexContents += this.searchcodeLib.splitKeywords(codeIndexDocument.getContents()); indexContents += this.searchcodeLib.codeCleanPipeline(codeIndexDocument.getContents()); this.searchcodeLib.addToSpellingCorrector(codeIndexDocument.getContents()); // Store in spelling corrector indexContents = indexContents.toLowerCase(); doc.add(new TextField(Values.REPONAME, codeIndexDocument.getRepoName(), Field.Store.YES)); doc.add(new TextField(Values.FILENAME, codeIndexDocument.getFileName(), Field.Store.YES)); doc.add(new TextField(Values.FILELOCATION, codeIndexDocument.getFileLocation(), Field.Store.YES)); doc.add(new TextField(Values.FILELOCATIONFILENAME, codeIndexDocument.getFileLocationFilename(), Field.Store.YES)); doc.add(new TextField(Values.MD5HASH, codeIndexDocument.getMd5hash(), Field.Store.YES)); doc.add(new TextField(Values.LANGUAGENAME, codeIndexDocument.getLanguageName(), Field.Store.YES)); doc.add(new IntField(Values.CODELINES, codeIndexDocument.getCodeLines(), Field.Store.YES)); doc.add(new TextField(Values.CONTENTS, indexContents, Field.Store.NO)); doc.add(new TextField(Values.REPOLOCATION, codeIndexDocument.getRepoRemoteLocation(), Field.Store.YES)); doc.add(new TextField(Values.CODEOWNER, codeIndexDocument.getCodeOwner(), Field.Store.YES)); doc.add(new TextField(Values.REVISION, codeIndexDocument.getRevision(), Field.Store.YES)); doc.add(new TextField(Values.DATEYEARMONTHDAY, codeIndexDocument.getYearMonthDay(), Field.Store.YES)); doc.add(new TextField(Values.DATEYEARMONTH, codeIndexDocument.getYearMonth(), Field.Store.YES)); doc.add(new TextField(Values.DATEYEAR, codeIndexDocument.getYear(), Field.Store.YES)); doc.add(new TextField(Values.MESSAGE, codeIndexDocument.getMessage(), Field.Store.YES)); doc.add(new TextField(Values.DELETED, codeIndexDocument.isDeleted(), Field.Store.YES)); // Extra metadata in this case when it was last indexed doc.add(new LongField(Values.MODIFIED, new Date().getTime(), Field.Store.YES)); writer.updateDocument( new Term(Values.PATH, codeIndexDocument.getRepoLocationRepoNameLocationFilename()), facetsConfig.build(taxoWriter, doc)); count++; if (count >= INDEX_QUEUE_BATCH_SIZE) { codeIndexDocument = null; } else { codeIndexDocument = codeIndexDocumentQueue.poll(); } } } finally { Singleton.getLogger().info("Closing writers"); writer.close(); taxoWriter.close(); } }