List of usage examples for org.apache.lucene.index IndexWriterConfig IndexWriterConfig
public IndexWriterConfig(Analyzer analyzer)
From source file:com.codenvy.test.lucene.DeleteFilesWithSameName.java
License:Open Source License
public static void main(String[] args) throws Exception { String DOC_DIR_NAME = "files"; filesDirPath = Paths.get(DOC_DIR_NAME).toAbsolutePath().toString(); Path indexPath = Paths.get("index"); Path docDir = Paths.get(DOC_DIR_NAME); Path file1 = Paths.get(DOC_DIR_NAME, "File1"); Path file2 = Paths.get(DOC_DIR_NAME, "File1A"); Analyzer analyzer = new SimpleAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); //iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); if (!Files.isReadable(docDir)) { System.out.println("document folder not found"); return;/*from w w w .j a v a 2s . co m*/ } Directory index = FSDirectory.open(indexPath); IndexWriter writer = new IndexWriter(index, iwc); //add files to index indexDocs(writer, file1); indexDocs(writer, file2); writer.commit(); searchAndPrintResult(indexPath); //delete files System.out.println(); System.out.println("=================================================================="); System.out.println("delete by prefix \"" + filesDirPath + "/File1\""); Query query = new PrefixQuery(new Term(PATH, filesDirPath + "/File1")); writer.deleteDocuments(query); writer.close(); searchAndPrintResult(indexPath); }
From source file:com.codenvy.test.lucene.IndexFiles.java
License:Open Source License
/** Index all text files under a directory. */ public static void main(String[] args) { String usage = "java org.apache.lucene.demo.IndexFiles" + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n" + "This indexes the documents in DOCS_PATH, creating a Lucene index" + "in INDEX_PATH that can be searched with SearchFiles"; String indexPath = "index"; String docsPath = null;//from w w w.jav a2s . co m boolean create = true; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { indexPath = args[i + 1]; i++; } else if ("-docs".equals(args[i])) { docsPath = args[i + 1]; i++; } else if ("-update".equals(args[i])) { create = false; } } if (docsPath == null) { System.err.println("Usage: " + usage); System.exit(1); } final Path docDir = Paths.get(docsPath); if (!Files.isReadable(docDir)) { System.out.println("Document directory '" + docDir.toAbsolutePath() + "' does not exist or is not readable, please check the path"); System.exit(1); } Date start = new Date(); try { System.out.println("Indexing to directory '" + indexPath + "'..."); Directory dir = FSDirectory.open(Paths.get(indexPath)); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); if (create) { // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(OpenMode.CREATE); } else { // Add new documents to an existing index: iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } // are indexing many documents, increase the RAM // buffer. But if you do this, increase the max heap // size to the JVM (eg add -Xmx512m or -Xmx1g): // // iwc.setRAMBufferSizeMB(256.0); IndexWriter writer = new IndexWriter(dir, iwc); indexDocs(writer, docDir); // NOTE: if you want to maximize search performance, // you can optionally call forceMerge here. This can be // a terribly costly operation, so generally it's only // worth it when your index is relatively static (ie // you're done adding documents to it): // // writer.forceMerge(1); writer.close(); Date end = new Date(); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } }
From source file:com.company.Indexer.java
License:Apache License
/** Index all text files under a directory. */ public static void main(String[] args) { String usage = "java org.apache.lucene.demo.IndexFiles" + " [-index INDEX_PATH] [-docs DOCS_PATH]\n\n" + "This indexes the documents in DOCS_PATH, creating a Lucene index" + "in INDEX_PATH that can be searched with SearchFiles"; String indexPath = "index"; String docsPath = null;//from w w w .j a va2s .com boolean create = true; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { indexPath = args[i + 1]; i++; } else if ("-docs".equals(args[i])) { docsPath = args[i + 1]; i++; } } if (docsPath == null) { System.err.println("Usage: " + usage); System.exit(1); } final Path docDir = Paths.get(docsPath); if (!Files.isReadable(docDir)) { System.out.println("Document directory '" + docDir.toAbsolutePath() + "' does not exist or is not readable, please check the path"); System.exit(1); } Date start = new Date(); try { System.out.println("Indexing to directory '" + indexPath + "'..."); PrintWriter tempwriter = new PrintWriter("pathToDocs.txt", "UTF-8"); tempwriter.print(docsPath); tempwriter.close(); Directory dir = FSDirectory.open(Paths.get(indexPath)); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); if (create) { // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); } else { // Add new documents to an existing index: iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); } // Optional: for better indexing performance, if you // are indexing many documents, increase the RAM // buffer. But if you do this, increase the max heap // size to the JVM (eg add -Xmx512m or -Xmx1g): // // iwc.setRAMBufferSizeMB(256.0); IndexWriter writer = new IndexWriter(dir, iwc); indexDocs(writer, docDir); // NOTE: if you want to maximize search performance, // you can optionally call forceMerge here. This can be // a terribly costly operation, so generally it's only // worth it when your index is relatively static (ie // you're done adding documents to it): // // writer.forceMerge(1); writer.close(); Date end = new Date(); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } }
From source file:com.company.IndexFiles.java
License:Apache License
/** Index all text files under a directory. */ public static void main(String[] args) { String usage = "java org.apache.lucene.demo.IndexFiles" + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n" + "This indexes the documents in DOCS_PATH, creating a Lucene index" + "in INDEX_PATH that can be searched with SearchFiles"; String indexPath = "index"; String docsPath = null;//from ww w. j ava 2s . c o m boolean create = true; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { indexPath = args[i + 1]; i++; } else if ("-docs".equals(args[i])) { docsPath = args[i + 1]; i++; } else if ("-update".equals(args[i])) { create = false; } } if (docsPath == null) { System.err.println("Usage: " + usage); System.exit(1); } final Path docDir = Paths.get(docsPath); if (!Files.isReadable(docDir)) { System.out.println("Document directory '" + docDir.toAbsolutePath() + "' does not exist or is not readable, please check the path"); System.exit(1); } Date start = new Date(); try { System.out.println("Indexing to directory '" + indexPath + "'..."); Directory dir = FSDirectory.open(Paths.get(indexPath)); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); if (create) { // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); } else { // Add new documents to an existing index: iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); } // Optional: for better indexing performance, if you // are indexing many documents, increase the RAM // buffer. But if you do this, increase the max heap // size to the JVM (eg add -Xmx512m or -Xmx1g): // // iwc.setRAMBufferSizeMB(256.0); IndexWriter writer = new IndexWriter(dir, iwc); indexDocs(writer, docDir); // NOTE: if you want to maximize search performance, // you can optionally call forceMerge here. This can be // a terribly costly operation, so generally it's only // worth it when your index is relatively static (ie // you're done adding documents to it): // // writer.forceMerge(1); writer.close(); Date end = new Date(); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } }
From source file:com.czw.search.lucene.example.facet.AssociationsFacetsExample.java
License:Apache License
/** Build the example index. */ private void index() throws IOException { IndexWriterConfig iwc = new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE); IndexWriter indexWriter = new IndexWriter(indexDir, iwc); // Writes facet ords to a separate directory from the main index DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); Document doc = new Document(); // 3 occurrences for tag 'lucene' doc.add(new IntAssociationFacetField(3, "tags", "lucene")); // 87% confidence level of genre 'computing' doc.add(new FloatAssociationFacetField(0.87f, "genre", "computing")); indexWriter.addDocument(config.build(taxoWriter, doc)); doc = new Document(); // 1 occurrence for tag 'lucene' doc.add(new IntAssociationFacetField(1, "tags", "lucene")); // 2 occurrence for tag 'solr' doc.add(new IntAssociationFacetField(2, "tags", "solr")); // 75% confidence level of genre 'computing' doc.add(new FloatAssociationFacetField(0.75f, "genre", "computing")); // 34% confidence level of genre 'software' doc.add(new FloatAssociationFacetField(0.34f, "genre", "software")); indexWriter.addDocument(config.build(taxoWriter, doc)); indexWriter.close();//from w w w . java 2 s . c o m taxoWriter.close(); }
From source file:com.czw.search.lucene.example.facet.DistanceFacetsExample.java
License:Apache License
/** Build the example index. */ public void index() throws IOException { IndexWriter writer = new IndexWriter(indexDir, new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE)); // TODO: we could index in radians instead ... saves all the conversions in getBoundingBoxFilter // Add documents with latitude/longitude location: // we index these both as DoublePoints (for bounding box/ranges) and as NumericDocValuesFields (for scoring) Document doc = new Document(); doc.add(new DoublePoint("latitude", 40.759011)); doc.add(new NumericDocValuesField("latitude", Double.doubleToRawLongBits(40.759011))); doc.add(new DoublePoint("longitude", -73.9844722)); doc.add(new NumericDocValuesField("longitude", Double.doubleToRawLongBits(-73.9844722))); writer.addDocument(doc);//from ww w . jav a 2 s . co m doc = new Document(); doc.add(new DoublePoint("latitude", 40.718266)); doc.add(new NumericDocValuesField("latitude", Double.doubleToRawLongBits(40.718266))); doc.add(new DoublePoint("longitude", -74.007819)); doc.add(new NumericDocValuesField("longitude", Double.doubleToRawLongBits(-74.007819))); writer.addDocument(doc); doc = new Document(); doc.add(new DoublePoint("latitude", 40.7051157)); doc.add(new NumericDocValuesField("latitude", Double.doubleToRawLongBits(40.7051157))); doc.add(new DoublePoint("longitude", -74.0088305)); doc.add(new NumericDocValuesField("longitude", Double.doubleToRawLongBits(-74.0088305))); writer.addDocument(doc); // Open near-real-time searcher searcher = new IndexSearcher(DirectoryReader.open(writer)); writer.close(); }
From source file:com.czw.search.lucene.example.facet.ExpressionAggregationFacetsExample.java
License:Apache License
/** Build the example index. */ private void index() throws IOException { IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE)); // Writes facet ords to a separate directory from the main index DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); Document doc = new Document(); doc.add(new TextField("c", "foo bar", Store.NO)); doc.add(new NumericDocValuesField("popularity", 5L)); doc.add(new FacetField("A", "B")); indexWriter.addDocument(config.build(taxoWriter, doc)); doc = new Document(); doc.add(new TextField("c", "foo foo bar", Store.NO)); doc.add(new NumericDocValuesField("popularity", 3L)); doc.add(new FacetField("A", "C")); indexWriter.addDocument(config.build(taxoWriter, doc)); indexWriter.close();/*from w ww. j a v a 2 s . c o m*/ taxoWriter.close(); }
From source file:com.czw.search.lucene.example.facet.MultiCategoryListsFacetsExample.java
License:Apache License
/** Build the example index. */ private void index() throws IOException { IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE)); // Writes facet ords to a separate directory from the main index DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); Document doc = new Document(); doc.add(new FacetField("Author", "Bob")); doc.add(new FacetField("Publish Date", "2010", "10", "15")); indexWriter.addDocument(config.build(taxoWriter, doc)); doc = new Document(); doc.add(new FacetField("Author", "Lisa")); doc.add(new FacetField("Publish Date", "2010", "10", "20")); indexWriter.addDocument(config.build(taxoWriter, doc)); doc = new Document(); doc.add(new FacetField("Author", "Lisa")); doc.add(new FacetField("Publish Date", "2012", "1", "1")); indexWriter.addDocument(config.build(taxoWriter, doc)); doc = new Document(); doc.add(new FacetField("Author", "Susan")); doc.add(new FacetField("Publish Date", "2012", "1", "7")); indexWriter.addDocument(config.build(taxoWriter, doc)); doc = new Document(); doc.add(new FacetField("Author", "Frank")); doc.add(new FacetField("Publish Date", "1999", "5", "5")); indexWriter.addDocument(config.build(taxoWriter, doc)); indexWriter.close();// ww w.j a v a 2s . c om taxoWriter.close(); }
From source file:com.czw.search.lucene.example.facet.RangeFacetsExample.java
License:Apache License
/** Build the example index. */ public void index() throws IOException { IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE)); // Add documents with a fake timestamp, 1000 sec before // "now", 2000 sec before "now", ...: for (int i = 0; i < 100; i++) { Document doc = new Document(); long then = nowSec - i * 1000; // Add as doc values field, so we can compute range facets: doc.add(new NumericDocValuesField("timestamp", then)); // Add as numeric field so we can drill-down: doc.add(new LongPoint("timestamp", then)); indexWriter.addDocument(doc);/*from w w w .j a va2 s. com*/ } // Open near-real-time searcher searcher = new IndexSearcher(DirectoryReader.open(indexWriter)); indexWriter.close(); }
From source file:com.czw.search.lucene.example.facet.SimpleFacetsExample.java
License:Apache License
/** * Build the example index.//from w w w . jav a 2 s . c o m */ private void index() throws IOException { IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE)); // Writes facet ords to a separate directory from the main index DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); Document doc = new Document(); doc.add(new FacetField("Author", "Bob")); doc.add(new FacetField("Publish Date", "2010", "10", "15")); doc.add(new FacetField("Category", "art")); indexWriter.addDocument(config.build(taxoWriter, doc)); doc = new Document(); doc.add(new FacetField("Author", "Bob")); doc.add(new FacetField("Publish Date", "2010", "10", "15")); doc.add(new FacetField("Category", "biography")); indexWriter.addDocument(config.build(taxoWriter, doc)); doc = new Document(); doc.add(new FacetField("Author", "Lisa")); doc.add(new FacetField("Publish Date", "2010", "10", "20")); doc.add(new FacetField("Category", "fiction")); indexWriter.addDocument(config.build(taxoWriter, doc)); doc = new Document(); doc.add(new FacetField("Author", "Lisa")); doc.add(new FacetField("Publish Date", "2012", "1", "1")); doc.add(new FacetField("Category", "food")); indexWriter.addDocument(config.build(taxoWriter, doc)); doc = new Document(); doc.add(new FacetField("Author", "Susan")); doc.add(new FacetField("Publish Date", "2012", "1", "7")); doc.add(new FacetField("Category", "science")); indexWriter.addDocument(config.build(taxoWriter, doc)); doc = new Document(); doc.add(new FacetField("Author", "Frank")); doc.add(new FacetField("Publish Date", "1999", "5", "5")); doc.add(new FacetField("Category", "crime")); indexWriter.addDocument(config.build(taxoWriter, doc)); doc = new Document(); doc.add(new FacetField("Author", "Frank")); doc.add(new FacetField("Publish Date", "2011", "5", "15")); doc.add(new FacetField("Category", "fiction")); indexWriter.addDocument(config.build(taxoWriter, doc)); doc = new Document(); doc.add(new FacetField("Author", "Frank")); doc.add(new FacetField("Publish Date", "2017", "6", "5")); doc.add(new FacetField("Category", "science")); indexWriter.addDocument(config.build(taxoWriter, doc)); indexWriter.close(); taxoWriter.close(); }