List of usage examples for org.apache.lucene.facet FacetsConfig build
public Document build(TaxonomyWriter taxoWriter, Document doc) throws IOException
From source file:com.qwazr.search.bench.LuceneWithTaxonomyIndex.java
License:Apache License
@Override final public void updateDocument(final FacetsConfig facetsConfig, final LuceneRecord.Indexable record) throws IOException { indexWriter.updateDocument(record.termId, facetsConfig.build(taxonomyWriter, record.document)); }
From source file:com.searchcode.app.service.CodeIndexer.java
License:Open Source License
/** * Given a queue of documents to index, index them by popping the queue limited to default of 1000 items. * This method must be synchronized as we have not added any logic to deal with multiple threads writing to the * index./* ww w . j a v a 2s.co m*/ * TODO investigate how Lucene deals with multiple writes */ public synchronized void indexDocuments(Queue<CodeIndexDocument> codeIndexDocumentQueue) throws IOException { Directory indexDirectory = FSDirectory.open(this.INDEX_LOCATION); Directory facetDirectory = FSDirectory.open(this.FACET_LOCATION); Analyzer analyzer = new CodeAnalyzer(); IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer); FacetsConfig facetsConfig; indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); IndexWriter writer = new IndexWriter(indexDirectory, indexWriterConfig); TaxonomyWriter taxonomyWriter = new DirectoryTaxonomyWriter(facetDirectory); try { CodeIndexDocument codeIndexDocument = codeIndexDocumentQueue.poll(); int count = 0; while (codeIndexDocument != null) { Singleton.getLogger() .info("Indexing file " + codeIndexDocument.getRepoLocationRepoNameLocationFilename()); this.sharedService.decrementCodeIndexLinesCount(codeIndexDocument.getCodeLines()); facetsConfig = new FacetsConfig(); facetsConfig.setIndexFieldName(Values.LANGUAGENAME, Values.LANGUAGENAME); facetsConfig.setIndexFieldName(Values.REPONAME, Values.REPONAME); facetsConfig.setIndexFieldName(Values.CODEOWNER, Values.CODEOWNER); Document doc = this.buildDocument(codeIndexDocument); writer.updateDocument( new Term(Values.PATH, codeIndexDocument.getRepoLocationRepoNameLocationFilename()), facetsConfig.build(taxonomyWriter, doc)); count++; if (count >= INDEX_QUEUE_BATCH_SIZE) { codeIndexDocument = null; } else { codeIndexDocument = codeIndexDocumentQueue.poll(); } } } finally { try { writer.close(); } finally { taxonomyWriter.close(); } Singleton.getLogger().info("Closing writers"); } }
From source file:com.searchcode.app.service.CodeIndexer.java
License:Open Source License
/** * Given a queue of documents to index, index them by popping the queue limited to 1000 items. * This method must be synchronized as we have not added any logic to deal with multiple threads writing to the * index.//www . j a v a 2s.co m * TODO investigate how Lucene deals with multiple writes */ public synchronized void indexTimeDocuments(Queue<CodeIndexDocument> codeIndexDocumentQueue) throws IOException { // Index all documents and commit at the end for performance gains Directory dir = FSDirectory.open(Paths.get( Properties.getProperties().getProperty(Values.TIMEINDEXLOCATION, Values.DEFAULTTIMEINDEXLOCATION))); Directory facetsdir = FSDirectory.open(Paths.get(Properties.getProperties() .getProperty(Values.TIMEINDEXFACETLOCATION, Values.DEFAULTTIMEINDEXFACETLOCATION))); Analyzer analyzer = new CodeAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); FacetsConfig facetsConfig; iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); IndexWriter writer = new IndexWriter(dir, iwc); TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(facetsdir); try { CodeIndexDocument codeIndexDocument = codeIndexDocumentQueue.poll(); int count = 0; while (codeIndexDocument != null) { Singleton.getLogger() .info("Indexing time file " + codeIndexDocument.getRepoLocationRepoNameLocationFilename()); this.sharedService.decrementCodeIndexLinesCount(codeIndexDocument.getCodeLines()); Document doc = new Document(); // Path is the primary key for documents // needs to include repo location, project name and then filepath including file and revision Field pathField = new StringField("path", codeIndexDocument.getRepoLocationRepoNameLocationFilename() + ":" + codeIndexDocument.getRevision(), Field.Store.YES); doc.add(pathField); // Add in facets facetsConfig = new FacetsConfig(); facetsConfig.setIndexFieldName(Values.LANGUAGENAME, Values.LANGUAGENAME); facetsConfig.setIndexFieldName(Values.REPONAME, Values.REPONAME); facetsConfig.setIndexFieldName(Values.CODEOWNER, Values.CODEOWNER); facetsConfig.setIndexFieldName(Values.DATEYEARMONTHDAY, Values.DATEYEARMONTHDAY); facetsConfig.setIndexFieldName(Values.DATEYEARMONTH, Values.DATEYEARMONTH); facetsConfig.setIndexFieldName(Values.DATEYEAR, Values.DATEYEAR); facetsConfig.setIndexFieldName(Values.REVISION, Values.REVISION); facetsConfig.setIndexFieldName(Values.DELETED, Values.DELETED); if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getLanguageName()) == false) { doc.add(new SortedSetDocValuesFacetField(Values.LANGUAGENAME, codeIndexDocument.getLanguageName())); } if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getRepoName()) == false) { doc.add(new SortedSetDocValuesFacetField(Values.REPONAME, codeIndexDocument.getRepoName())); } if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getCodeOwner()) == false) { doc.add(new SortedSetDocValuesFacetField(Values.CODEOWNER, codeIndexDocument.getCodeOwner())); } if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getYearMonthDay()) == false) { doc.add(new SortedSetDocValuesFacetField(Values.DATEYEARMONTHDAY, codeIndexDocument.getYearMonthDay())); } if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getYearMonthDay()) == false) { doc.add(new SortedSetDocValuesFacetField(Values.DATEYEARMONTH, codeIndexDocument.getYearMonthDay().substring(0, 6))); } if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getYearMonthDay()) == false) { doc.add(new SortedSetDocValuesFacetField(Values.DATEYEAR, codeIndexDocument.getYearMonthDay().substring(0, 4))); } if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getRevision()) == false) { doc.add(new SortedSetDocValuesFacetField(Values.REVISION, codeIndexDocument.getRevision())); } if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.isDeleted()) == false) { doc.add(new SortedSetDocValuesFacetField(Values.DELETED, codeIndexDocument.isDeleted())); } String indexContents = Values.EMPTYSTRING; indexContents += this.searchcodeLib.splitKeywords(codeIndexDocument.getContents()); indexContents += this.searchcodeLib.codeCleanPipeline(codeIndexDocument.getContents()); this.searchcodeLib.addToSpellingCorrector(codeIndexDocument.getContents()); // Store in spelling corrector indexContents = indexContents.toLowerCase(); doc.add(new TextField(Values.REPONAME, codeIndexDocument.getRepoName(), Field.Store.YES)); doc.add(new TextField(Values.FILENAME, codeIndexDocument.getFileName(), Field.Store.YES)); doc.add(new TextField(Values.FILELOCATION, codeIndexDocument.getFileLocation(), Field.Store.YES)); doc.add(new TextField(Values.FILELOCATIONFILENAME, codeIndexDocument.getFileLocationFilename(), Field.Store.YES)); doc.add(new TextField(Values.MD5HASH, codeIndexDocument.getMd5hash(), Field.Store.YES)); doc.add(new TextField(Values.LANGUAGENAME, codeIndexDocument.getLanguageName(), Field.Store.YES)); doc.add(new IntField(Values.CODELINES, codeIndexDocument.getCodeLines(), Field.Store.YES)); doc.add(new TextField(Values.CONTENTS, indexContents, Field.Store.NO)); doc.add(new TextField(Values.REPOLOCATION, codeIndexDocument.getRepoRemoteLocation(), Field.Store.YES)); doc.add(new TextField(Values.CODEOWNER, codeIndexDocument.getCodeOwner(), Field.Store.YES)); doc.add(new TextField(Values.REVISION, codeIndexDocument.getRevision(), Field.Store.YES)); doc.add(new TextField(Values.DATEYEARMONTHDAY, codeIndexDocument.getYearMonthDay(), Field.Store.YES)); doc.add(new TextField(Values.DATEYEARMONTH, codeIndexDocument.getYearMonth(), Field.Store.YES)); doc.add(new TextField(Values.DATEYEAR, codeIndexDocument.getYear(), Field.Store.YES)); doc.add(new TextField(Values.MESSAGE, codeIndexDocument.getMessage(), Field.Store.YES)); doc.add(new TextField(Values.DELETED, codeIndexDocument.isDeleted(), Field.Store.YES)); // Extra metadata in this case when it was last indexed doc.add(new LongField(Values.MODIFIED, new Date().getTime(), Field.Store.YES)); writer.updateDocument( new Term(Values.PATH, codeIndexDocument.getRepoLocationRepoNameLocationFilename()), facetsConfig.build(taxoWriter, doc)); count++; if (count >= INDEX_QUEUE_BATCH_SIZE) { codeIndexDocument = null; } else { codeIndexDocument = codeIndexDocumentQueue.poll(); } } } finally { Singleton.getLogger().info("Closing writers"); writer.close(); taxoWriter.close(); } }
From source file:com.xiaomi.linden.hadoop.indexing.keyvalueformat.IntermediateForm.java
License:Apache License
/** * This method is used by the index update mapper and process a document * operation into the current intermediate form. * @param doc input document operation//from ww w . jav a 2 s. c om * @throws IOException */ public void process(Term id, Document doc, Analyzer analyzer, FacetsConfig facetsConfig) throws IOException { if (writer == null) { // analyzer is null because we specify an analyzer with addDocument createWriter(); } if (facetsConfig != null) { writer.updateDocument(id, facetsConfig.build(taxoWriter, doc), analyzer); } else { writer.updateDocument(id, doc, analyzer); } numDocs++; }
From source file:ir.FacetLucene_1106022654.java
private static void addDok(IndexWriter w, String penulis, String id, String judul, String teks, DirectoryTaxonomyWriter taxoWriter, FacetsConfig config) throws IOException { /* Instead of lengthly process of adding each new entry, we can create a generic fuction to add the new entry doc . We can add needed fields with field variable and respective tag .*/ Document dok = new Document(); dok.add(new FacetField("penulis", penulis)); dok.add(new FacetField("id", id)); dok.add(new FacetField("judul", judul)); dok.add(new TextField("teks", teks, Field.Store.YES)); w.addDocument(config.build(taxoWriter, dok)); }
From source file:org.wso2.carbon.analytics.dataservice.core.indexing.AnalyticsDataIndexer.java
License:Open Source License
private Document generateIndexDoc(Record record, Map<String, ColumnDefinition> columns, TaxonomyWriter taxonomyWriter) throws AnalyticsIndexException, IOException { Document doc = new Document(); FacetsConfig config = new FacetsConfig(); FieldType numericFieldType = getLuceneNumericFieldType(FieldType.NumericType.LONG); doc.add(new StringField(INDEX_ID_INTERNAL_FIELD, record.getId(), Store.YES)); doc.add(new LongField(INDEX_INTERNAL_TIMESTAMP_FIELD, record.getTimestamp(), numericFieldType)); /* make the best effort to store in the given timestamp, or else, * fall back to a compatible format, or else, lastly, string */ String name;//from w ww . j a v a2 s. c o m for (Map.Entry<String, ColumnDefinition> entry : columns.entrySet()) { name = entry.getKey(); this.checkAndAddDocEntry(doc, entry.getValue().getType(), name, record.getValue(name)); if (entry.getValue().isFacet()) { this.checkAndAddTaxonomyDocEntries(doc, name, record.getValue(name), config); } } return config.build(taxonomyWriter, doc); }