List of usage examples for org.apache.lucene.index IndexWriterConfig IndexWriterConfig
public IndexWriterConfig(Analyzer analyzer)
From source file:com.rapidminer.search.GlobalSearchIndexer.java
License:Open Source License
/** * Creates an instance of {@link IndexWriter}. * * @return the writer, never {@code null} * @throws IOException/* w w w .jav a 2s.c o m*/ * if something goes wrong */ private IndexWriter createIndexWriter() throws IOException { Directory dir = FSDirectory.open(indexDirectoryPath); IndexWriterConfig config = new IndexWriterConfig(GlobalSearchUtilities.ANALYZER); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE); return new IndexWriter(dir, config); }
From source file:com.ricky.codelab.lucene.LuceneIndexAndSearchDemo.java
License:Apache License
/** * //from w w w. j a v a2 s . c o m * ??? * @param args */ public static void main(String[] args) { //Lucene Document?? String fieldName = "text"; // String text = "IK Analyzer???????"; //IKAnalyzer? Analyzer analyzer = new IKAnalyzer(true); Directory directory = null; IndexWriter iwriter = null; IndexReader ireader = null; IndexSearcher isearcher = null; try { // directory = new RAMDirectory(); //?IndexWriterConfig IndexWriterConfig iwConfig = new IndexWriterConfig(analyzer); iwConfig.setOpenMode(OpenMode.CREATE_OR_APPEND); iwriter = new IndexWriter(directory, iwConfig); // Document doc = new Document(); doc.add(new StringField("ID", "10000", Field.Store.YES)); doc.add(new TextField(fieldName, text, Field.Store.YES)); iwriter.addDocument(doc); iwriter.close(); //?********************************** //? ireader = DirectoryReader.open(directory); isearcher = new IndexSearcher(ireader); String keyword = "?"; //QueryParser?Query QueryParser qp = new QueryParser(fieldName, analyzer); qp.setDefaultOperator(QueryParser.AND_OPERATOR); Query query = qp.parse(keyword); System.out.println("Query = " + query); //?5? TopDocs topDocs = isearcher.search(query, 5); System.out.println("" + topDocs.totalHits); // ScoreDoc[] scoreDocs = topDocs.scoreDocs; for (int i = 0; i < topDocs.totalHits; i++) { Document targetDoc = isearcher.doc(scoreDocs[i].doc); System.out.println("" + targetDoc.toString()); } } catch (CorruptIndexException e) { e.printStackTrace(); } catch (LockObtainFailedException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } catch (ParseException e) { e.printStackTrace(); } finally { if (ireader != null) { try { ireader.close(); } catch (IOException e) { e.printStackTrace(); } } if (directory != null) { try { directory.close(); } catch (IOException e) { e.printStackTrace(); } } } }
From source file:com.rocana.lucene.codec.v1.TestBlockPostingsFormat.java
License:Apache License
/** Make sure the final sub-block(s) are not skipped. */ public void testFinalBlock() throws Exception { Directory d = newDirectory();/*from www . j a v a 2s .c om*/ IndexWriter w = new IndexWriter(d, new IndexWriterConfig(new MockAnalyzer(random()))); for (int i = 0; i < 25; i++) { Document doc = new Document(); doc.add(newStringField("field", Character.toString((char) (97 + i)), Field.Store.NO)); doc.add(newStringField("field", "z" + Character.toString((char) (97 + i)), Field.Store.NO)); w.addDocument(doc); } w.forceMerge(1); DirectoryReader r = DirectoryReader.open(w); assertEquals(1, r.leaves().size()); RocanaFieldReader field = (RocanaFieldReader) r.leaves().get(0).reader().fields().terms("field"); // We should see exactly two blocks: one root block (prefix empty string) and one block for z* terms (prefix z): RocanaStats stats = field.getStats(); assertEquals(0, stats.floorBlockCount); assertEquals(2, stats.nonFloorBlockCount); r.close(); w.close(); d.close(); }
From source file:com.search.lucene.demo.facet.DistanceFacetsExample.java
License:Apache License
/** Build the example index. */ public void index() throws IOException { IndexWriter writer = new IndexWriter(indexDir, new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE)); // TODO: we could index in radians instead ... saves all the conversions in getBoundingBoxFilter // Add documents with latitude/longitude location: // we index these both as DoubleFields (for bounding box/ranges) and as NumericDocValuesFields (for scoring) Document doc = new Document(); doc.add(new DoubleField("latitude", 40.759011, Field.Store.NO)); doc.add(new NumericDocValuesField("latitude", Double.doubleToRawLongBits(40.759011))); doc.add(new DoubleField("longitude", -73.9844722, Field.Store.NO)); doc.add(new NumericDocValuesField("longitude", Double.doubleToRawLongBits(-73.9844722))); writer.addDocument(doc);// w w w .java 2 s. co m doc = new Document(); doc.add(new DoubleField("latitude", 40.718266, Field.Store.NO)); doc.add(new NumericDocValuesField("latitude", Double.doubleToRawLongBits(40.718266))); doc.add(new DoubleField("longitude", -74.007819, Field.Store.NO)); doc.add(new NumericDocValuesField("longitude", Double.doubleToRawLongBits(-74.007819))); writer.addDocument(doc); doc = new Document(); doc.add(new DoubleField("latitude", 40.7051157, Field.Store.NO)); doc.add(new NumericDocValuesField("latitude", Double.doubleToRawLongBits(40.7051157))); doc.add(new DoubleField("longitude", -74.0088305, Field.Store.NO)); doc.add(new NumericDocValuesField("longitude", Double.doubleToRawLongBits(-74.0088305))); writer.addDocument(doc); // Open near-real-time searcher searcher = new IndexSearcher(DirectoryReader.open(writer, true)); writer.close(); }
From source file:com.search.lucene.demo.facet.RangeFacetsExample.java
License:Apache License
/** Build the example index. */ public void index() throws IOException { IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE)); // Add documents with a fake timestamp, 1000 sec before // "now", 2000 sec before "now", ...: for (int i = 0; i < 100; i++) { Document doc = new Document(); long then = nowSec - i * 1000; // Add as doc values field, so we can compute range facets: doc.add(new NumericDocValuesField("timestamp", then)); // Add as numeric field so we can drill-down: doc.add(new LongField("timestamp", then, Field.Store.NO)); indexWriter.addDocument(doc);//from ww w. j a v a 2s . c o m } // Open near-real-time searcher searcher = new IndexSearcher(DirectoryReader.open(indexWriter, true)); indexWriter.close(); }
From source file:com.search.lucene.demo.facet.SimpleSortedSetFacetsExample.java
License:Apache License
/** Build the example index. */ private void index() throws IOException { IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE)); Document doc = new Document(); doc.add(new SortedSetDocValuesFacetField("Author", "Bob")); doc.add(new SortedSetDocValuesFacetField("Publish Year", "2010")); indexWriter.addDocument(config.build(doc)); doc = new Document(); doc.add(new SortedSetDocValuesFacetField("Author", "Lisa")); doc.add(new SortedSetDocValuesFacetField("Publish Year", "2010")); indexWriter.addDocument(config.build(doc)); doc = new Document(); doc.add(new SortedSetDocValuesFacetField("Author", "Lisa")); doc.add(new SortedSetDocValuesFacetField("Publish Year", "2012")); indexWriter.addDocument(config.build(doc)); doc = new Document(); doc.add(new SortedSetDocValuesFacetField("Author", "Susan")); doc.add(new SortedSetDocValuesFacetField("Publish Year", "2012")); indexWriter.addDocument(config.build(doc)); doc = new Document(); doc.add(new SortedSetDocValuesFacetField("Author", "Frank")); doc.add(new SortedSetDocValuesFacetField("Publish Year", "1999")); indexWriter.addDocument(config.build(doc)); indexWriter.close();// w ww . j a v a 2 s .c om }
From source file:com.searchcode.app.service.CodeIndexer.java
License:Open Source License
/** * Deletes all files that belong to a repository. * TODO I don't think this clears anything from the facets, which it should *///w ww . j a v a2s .co m public synchronized void deleteByReponame(String repoName) throws IOException { Directory dir = FSDirectory.open(Paths .get(Properties.getProperties().getProperty(Values.INDEXLOCATION, Values.DEFAULTINDEXLOCATION))); Analyzer analyzer = new CodeAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); IndexWriter writer = new IndexWriter(dir, iwc); writer.deleteDocuments(new Term(Values.REPONAME, repoName)); writer.close(); }
From source file:com.searchcode.app.service.CodeIndexer.java
License:Open Source License
/** * Deletes a file from the index using the code id which seems to be * the most reliable way of doing it// ww w . ja v a2 s . c o m * TODO Update the record and set the facets to a value we can ignore */ public synchronized void deleteByCodeId(String codeId) throws IOException { Directory dir = FSDirectory.open(Paths .get(Properties.getProperties().getProperty(Values.INDEXLOCATION, Values.DEFAULTINDEXLOCATION))); Analyzer analyzer = new CodeAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); IndexWriter writer = new IndexWriter(dir, iwc); try { QueryParser parser = new QueryParser(Values.CONTENTS, analyzer); Query query = parser.parse(Values.CODEID + ":" + QueryParser.escape(codeId)); writer.deleteDocuments(query); } catch (Exception ex) { Singleton.getLogger().warning( "ERROR - caught a " + ex.getClass() + " in CodeIndexer\n with message: " + ex.getMessage()); } finally { writer.close(); } }
From source file:com.searchcode.app.service.CodeIndexer.java
License:Open Source License
/** * Given a queue of documents to index, index them by popping the queue limited to default of 1000 items. * This method must be synchronized as we have not added any logic to deal with multiple threads writing to the * index./*from www .j a v a 2s .c o m*/ * TODO investigate how Lucene deals with multiple writes */ public synchronized void indexDocuments(Queue<CodeIndexDocument> codeIndexDocumentQueue) throws IOException { Directory indexDirectory = FSDirectory.open(this.INDEX_LOCATION); Directory facetDirectory = FSDirectory.open(this.FACET_LOCATION); Analyzer analyzer = new CodeAnalyzer(); IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer); FacetsConfig facetsConfig; indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); IndexWriter writer = new IndexWriter(indexDirectory, indexWriterConfig); TaxonomyWriter taxonomyWriter = new DirectoryTaxonomyWriter(facetDirectory); try { CodeIndexDocument codeIndexDocument = codeIndexDocumentQueue.poll(); int count = 0; while (codeIndexDocument != null) { Singleton.getLogger() .info("Indexing file " + codeIndexDocument.getRepoLocationRepoNameLocationFilename()); this.sharedService.decrementCodeIndexLinesCount(codeIndexDocument.getCodeLines()); facetsConfig = new FacetsConfig(); facetsConfig.setIndexFieldName(Values.LANGUAGENAME, Values.LANGUAGENAME); facetsConfig.setIndexFieldName(Values.REPONAME, Values.REPONAME); facetsConfig.setIndexFieldName(Values.CODEOWNER, Values.CODEOWNER); Document doc = this.buildDocument(codeIndexDocument); writer.updateDocument( new Term(Values.PATH, codeIndexDocument.getRepoLocationRepoNameLocationFilename()), facetsConfig.build(taxonomyWriter, doc)); count++; if (count >= INDEX_QUEUE_BATCH_SIZE) { codeIndexDocument = null; } else { codeIndexDocument = codeIndexDocumentQueue.poll(); } } } finally { try { writer.close(); } finally { taxonomyWriter.close(); } Singleton.getLogger().info("Closing writers"); } }
From source file:com.searchcode.app.service.CodeIndexer.java
License:Open Source License
/** * Given a queue of documents to index, index them by popping the queue limited to 1000 items. * This method must be synchronized as we have not added any logic to deal with multiple threads writing to the * index./*from w w w . j a v a2s .c o m*/ * TODO investigate how Lucene deals with multiple writes */ public synchronized void indexTimeDocuments(Queue<CodeIndexDocument> codeIndexDocumentQueue) throws IOException { // Index all documents and commit at the end for performance gains Directory dir = FSDirectory.open(Paths.get( Properties.getProperties().getProperty(Values.TIMEINDEXLOCATION, Values.DEFAULTTIMEINDEXLOCATION))); Directory facetsdir = FSDirectory.open(Paths.get(Properties.getProperties() .getProperty(Values.TIMEINDEXFACETLOCATION, Values.DEFAULTTIMEINDEXFACETLOCATION))); Analyzer analyzer = new CodeAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); FacetsConfig facetsConfig; iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); IndexWriter writer = new IndexWriter(dir, iwc); TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(facetsdir); try { CodeIndexDocument codeIndexDocument = codeIndexDocumentQueue.poll(); int count = 0; while (codeIndexDocument != null) { Singleton.getLogger() .info("Indexing time file " + codeIndexDocument.getRepoLocationRepoNameLocationFilename()); this.sharedService.decrementCodeIndexLinesCount(codeIndexDocument.getCodeLines()); Document doc = new Document(); // Path is the primary key for documents // needs to include repo location, project name and then filepath including file and revision Field pathField = new StringField("path", codeIndexDocument.getRepoLocationRepoNameLocationFilename() + ":" + codeIndexDocument.getRevision(), Field.Store.YES); doc.add(pathField); // Add in facets facetsConfig = new FacetsConfig(); facetsConfig.setIndexFieldName(Values.LANGUAGENAME, Values.LANGUAGENAME); facetsConfig.setIndexFieldName(Values.REPONAME, Values.REPONAME); facetsConfig.setIndexFieldName(Values.CODEOWNER, Values.CODEOWNER); facetsConfig.setIndexFieldName(Values.DATEYEARMONTHDAY, Values.DATEYEARMONTHDAY); facetsConfig.setIndexFieldName(Values.DATEYEARMONTH, Values.DATEYEARMONTH); facetsConfig.setIndexFieldName(Values.DATEYEAR, Values.DATEYEAR); facetsConfig.setIndexFieldName(Values.REVISION, Values.REVISION); facetsConfig.setIndexFieldName(Values.DELETED, Values.DELETED); if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getLanguageName()) == false) { doc.add(new SortedSetDocValuesFacetField(Values.LANGUAGENAME, codeIndexDocument.getLanguageName())); } if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getRepoName()) == false) { doc.add(new SortedSetDocValuesFacetField(Values.REPONAME, codeIndexDocument.getRepoName())); } if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getCodeOwner()) == false) { doc.add(new SortedSetDocValuesFacetField(Values.CODEOWNER, codeIndexDocument.getCodeOwner())); } if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getYearMonthDay()) == false) { doc.add(new SortedSetDocValuesFacetField(Values.DATEYEARMONTHDAY, codeIndexDocument.getYearMonthDay())); } if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getYearMonthDay()) == false) { doc.add(new SortedSetDocValuesFacetField(Values.DATEYEARMONTH, codeIndexDocument.getYearMonthDay().substring(0, 6))); } if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getYearMonthDay()) == false) { doc.add(new SortedSetDocValuesFacetField(Values.DATEYEAR, codeIndexDocument.getYearMonthDay().substring(0, 4))); } if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getRevision()) == false) { doc.add(new SortedSetDocValuesFacetField(Values.REVISION, codeIndexDocument.getRevision())); } if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.isDeleted()) == false) { doc.add(new SortedSetDocValuesFacetField(Values.DELETED, codeIndexDocument.isDeleted())); } String indexContents = Values.EMPTYSTRING; indexContents += this.searchcodeLib.splitKeywords(codeIndexDocument.getContents()); indexContents += this.searchcodeLib.codeCleanPipeline(codeIndexDocument.getContents()); this.searchcodeLib.addToSpellingCorrector(codeIndexDocument.getContents()); // Store in spelling corrector indexContents = indexContents.toLowerCase(); doc.add(new TextField(Values.REPONAME, codeIndexDocument.getRepoName(), Field.Store.YES)); doc.add(new TextField(Values.FILENAME, codeIndexDocument.getFileName(), Field.Store.YES)); doc.add(new TextField(Values.FILELOCATION, codeIndexDocument.getFileLocation(), Field.Store.YES)); doc.add(new TextField(Values.FILELOCATIONFILENAME, codeIndexDocument.getFileLocationFilename(), Field.Store.YES)); doc.add(new TextField(Values.MD5HASH, codeIndexDocument.getMd5hash(), Field.Store.YES)); doc.add(new TextField(Values.LANGUAGENAME, codeIndexDocument.getLanguageName(), Field.Store.YES)); doc.add(new IntField(Values.CODELINES, codeIndexDocument.getCodeLines(), Field.Store.YES)); doc.add(new TextField(Values.CONTENTS, indexContents, Field.Store.NO)); doc.add(new TextField(Values.REPOLOCATION, codeIndexDocument.getRepoRemoteLocation(), Field.Store.YES)); doc.add(new TextField(Values.CODEOWNER, codeIndexDocument.getCodeOwner(), Field.Store.YES)); doc.add(new TextField(Values.REVISION, codeIndexDocument.getRevision(), Field.Store.YES)); doc.add(new TextField(Values.DATEYEARMONTHDAY, codeIndexDocument.getYearMonthDay(), Field.Store.YES)); doc.add(new TextField(Values.DATEYEARMONTH, codeIndexDocument.getYearMonth(), Field.Store.YES)); doc.add(new TextField(Values.DATEYEAR, codeIndexDocument.getYear(), Field.Store.YES)); doc.add(new TextField(Values.MESSAGE, codeIndexDocument.getMessage(), Field.Store.YES)); doc.add(new TextField(Values.DELETED, codeIndexDocument.isDeleted(), Field.Store.YES)); // Extra metadata in this case when it was last indexed doc.add(new LongField(Values.MODIFIED, new Date().getTime(), Field.Store.YES)); writer.updateDocument( new Term(Values.PATH, codeIndexDocument.getRepoLocationRepoNameLocationFilename()), facetsConfig.build(taxoWriter, doc)); count++; if (count >= INDEX_QUEUE_BATCH_SIZE) { codeIndexDocument = null; } else { codeIndexDocument = codeIndexDocumentQueue.poll(); } } } finally { Singleton.getLogger().info("Closing writers"); writer.close(); taxoWriter.close(); } }