List of usage examples for org.apache.lucene.index IndexWriter IndexWriter
public IndexWriter(Directory d, IndexWriterConfig conf) throws IOException
conf
. From source file:com.company.IndexFiles.java
License:Apache License
/** Index all text files under a directory. */ public static void main(String[] args) { String usage = "java org.apache.lucene.demo.IndexFiles" + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n" + "This indexes the documents in DOCS_PATH, creating a Lucene index" + "in INDEX_PATH that can be searched with SearchFiles"; String indexPath = "index"; String docsPath = null;// w w w . j ava 2 s. co m boolean create = true; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { indexPath = args[i + 1]; i++; } else if ("-docs".equals(args[i])) { docsPath = args[i + 1]; i++; } else if ("-update".equals(args[i])) { create = false; } } if (docsPath == null) { System.err.println("Usage: " + usage); System.exit(1); } final Path docDir = Paths.get(docsPath); if (!Files.isReadable(docDir)) { System.out.println("Document directory '" + docDir.toAbsolutePath() + "' does not exist or is not readable, please check the path"); System.exit(1); } Date start = new Date(); try { System.out.println("Indexing to directory '" + indexPath + "'..."); Directory dir = FSDirectory.open(Paths.get(indexPath)); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); if (create) { // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); } else { // Add new documents to an existing index: iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); } // Optional: for better indexing performance, if you // are indexing many documents, increase the RAM // buffer. But if you do this, increase the max heap // size to the JVM (eg add -Xmx512m or -Xmx1g): // // iwc.setRAMBufferSizeMB(256.0); IndexWriter writer = new IndexWriter(dir, iwc); indexDocs(writer, docDir); // NOTE: if you want to maximize search performance, // you can optionally call forceMerge here. This can be // a terribly costly operation, so generally it's only // worth it when your index is relatively static (ie // you're done adding documents to it): // // writer.forceMerge(1); writer.close(); Date end = new Date(); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } }
From source file:com.concursive.connect.indexer.LuceneIndexer.java
License:Open Source License
public void obtainWriterLock() throws Exception { writeLock.lock();/*from www . j a v a2 s.c o m*/ // Open the full writer try { fullWriter = new IndexWriter(fullIndex, new StandardAnalyzer()); } catch (Exception e) { LOG.error("Begin indexing error on fullWriter", e); } // Open the directory writer try { directoryWriter = new IndexWriter(directoryIndex, new SnowballAnalyzer("English")); } catch (Exception e) { LOG.error("Begin indexing error on directoryWriter", e); } }
From source file:com.czw.search.lucene.example.facet.AssociationsFacetsExample.java
License:Apache License
/** Build the example index. */ private void index() throws IOException { IndexWriterConfig iwc = new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE); IndexWriter indexWriter = new IndexWriter(indexDir, iwc); // Writes facet ords to a separate directory from the main index DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); Document doc = new Document(); // 3 occurrences for tag 'lucene' doc.add(new IntAssociationFacetField(3, "tags", "lucene")); // 87% confidence level of genre 'computing' doc.add(new FloatAssociationFacetField(0.87f, "genre", "computing")); indexWriter.addDocument(config.build(taxoWriter, doc)); doc = new Document(); // 1 occurrence for tag 'lucene' doc.add(new IntAssociationFacetField(1, "tags", "lucene")); // 2 occurrence for tag 'solr' doc.add(new IntAssociationFacetField(2, "tags", "solr")); // 75% confidence level of genre 'computing' doc.add(new FloatAssociationFacetField(0.75f, "genre", "computing")); // 34% confidence level of genre 'software' doc.add(new FloatAssociationFacetField(0.34f, "genre", "software")); indexWriter.addDocument(config.build(taxoWriter, doc)); indexWriter.close();/*from ww w . j a va2 s.com*/ taxoWriter.close(); }
From source file:com.czw.search.lucene.example.facet.DistanceFacetsExample.java
License:Apache License
/** Build the example index. */ public void index() throws IOException { IndexWriter writer = new IndexWriter(indexDir, new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE)); // TODO: we could index in radians instead ... saves all the conversions in getBoundingBoxFilter // Add documents with latitude/longitude location: // we index these both as DoublePoints (for bounding box/ranges) and as NumericDocValuesFields (for scoring) Document doc = new Document(); doc.add(new DoublePoint("latitude", 40.759011)); doc.add(new NumericDocValuesField("latitude", Double.doubleToRawLongBits(40.759011))); doc.add(new DoublePoint("longitude", -73.9844722)); doc.add(new NumericDocValuesField("longitude", Double.doubleToRawLongBits(-73.9844722))); writer.addDocument(doc);/*from ww w. j a v a2 s. c o m*/ doc = new Document(); doc.add(new DoublePoint("latitude", 40.718266)); doc.add(new NumericDocValuesField("latitude", Double.doubleToRawLongBits(40.718266))); doc.add(new DoublePoint("longitude", -74.007819)); doc.add(new NumericDocValuesField("longitude", Double.doubleToRawLongBits(-74.007819))); writer.addDocument(doc); doc = new Document(); doc.add(new DoublePoint("latitude", 40.7051157)); doc.add(new NumericDocValuesField("latitude", Double.doubleToRawLongBits(40.7051157))); doc.add(new DoublePoint("longitude", -74.0088305)); doc.add(new NumericDocValuesField("longitude", Double.doubleToRawLongBits(-74.0088305))); writer.addDocument(doc); // Open near-real-time searcher searcher = new IndexSearcher(DirectoryReader.open(writer)); writer.close(); }
From source file:com.czw.search.lucene.example.facet.ExpressionAggregationFacetsExample.java
License:Apache License
/** Build the example index. */ private void index() throws IOException { IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE)); // Writes facet ords to a separate directory from the main index DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); Document doc = new Document(); doc.add(new TextField("c", "foo bar", Store.NO)); doc.add(new NumericDocValuesField("popularity", 5L)); doc.add(new FacetField("A", "B")); indexWriter.addDocument(config.build(taxoWriter, doc)); doc = new Document(); doc.add(new TextField("c", "foo foo bar", Store.NO)); doc.add(new NumericDocValuesField("popularity", 3L)); doc.add(new FacetField("A", "C")); indexWriter.addDocument(config.build(taxoWriter, doc)); indexWriter.close();/* w w w .j a va2 s . c om*/ taxoWriter.close(); }
From source file:com.czw.search.lucene.example.facet.MultiCategoryListsFacetsExample.java
License:Apache License
/** Build the example index. */ private void index() throws IOException { IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE)); // Writes facet ords to a separate directory from the main index DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); Document doc = new Document(); doc.add(new FacetField("Author", "Bob")); doc.add(new FacetField("Publish Date", "2010", "10", "15")); indexWriter.addDocument(config.build(taxoWriter, doc)); doc = new Document(); doc.add(new FacetField("Author", "Lisa")); doc.add(new FacetField("Publish Date", "2010", "10", "20")); indexWriter.addDocument(config.build(taxoWriter, doc)); doc = new Document(); doc.add(new FacetField("Author", "Lisa")); doc.add(new FacetField("Publish Date", "2012", "1", "1")); indexWriter.addDocument(config.build(taxoWriter, doc)); doc = new Document(); doc.add(new FacetField("Author", "Susan")); doc.add(new FacetField("Publish Date", "2012", "1", "7")); indexWriter.addDocument(config.build(taxoWriter, doc)); doc = new Document(); doc.add(new FacetField("Author", "Frank")); doc.add(new FacetField("Publish Date", "1999", "5", "5")); indexWriter.addDocument(config.build(taxoWriter, doc)); indexWriter.close();// w w w. j a va 2s . com taxoWriter.close(); }
From source file:com.czw.search.lucene.example.facet.RangeFacetsExample.java
License:Apache License
/** Build the example index. */ public void index() throws IOException { IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE)); // Add documents with a fake timestamp, 1000 sec before // "now", 2000 sec before "now", ...: for (int i = 0; i < 100; i++) { Document doc = new Document(); long then = nowSec - i * 1000; // Add as doc values field, so we can compute range facets: doc.add(new NumericDocValuesField("timestamp", then)); // Add as numeric field so we can drill-down: doc.add(new LongPoint("timestamp", then)); indexWriter.addDocument(doc);/*from ww w . jav a2 s. c om*/ } // Open near-real-time searcher searcher = new IndexSearcher(DirectoryReader.open(indexWriter)); indexWriter.close(); }
From source file:com.czw.search.lucene.example.facet.SimpleFacetsExample.java
License:Apache License
/** * Build the example index.//w w w . ja va2s . c o m */ private void index() throws IOException { IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE)); // Writes facet ords to a separate directory from the main index DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); Document doc = new Document(); doc.add(new FacetField("Author", "Bob")); doc.add(new FacetField("Publish Date", "2010", "10", "15")); doc.add(new FacetField("Category", "art")); indexWriter.addDocument(config.build(taxoWriter, doc)); doc = new Document(); doc.add(new FacetField("Author", "Bob")); doc.add(new FacetField("Publish Date", "2010", "10", "15")); doc.add(new FacetField("Category", "biography")); indexWriter.addDocument(config.build(taxoWriter, doc)); doc = new Document(); doc.add(new FacetField("Author", "Lisa")); doc.add(new FacetField("Publish Date", "2010", "10", "20")); doc.add(new FacetField("Category", "fiction")); indexWriter.addDocument(config.build(taxoWriter, doc)); doc = new Document(); doc.add(new FacetField("Author", "Lisa")); doc.add(new FacetField("Publish Date", "2012", "1", "1")); doc.add(new FacetField("Category", "food")); indexWriter.addDocument(config.build(taxoWriter, doc)); doc = new Document(); doc.add(new FacetField("Author", "Susan")); doc.add(new FacetField("Publish Date", "2012", "1", "7")); doc.add(new FacetField("Category", "science")); indexWriter.addDocument(config.build(taxoWriter, doc)); doc = new Document(); doc.add(new FacetField("Author", "Frank")); doc.add(new FacetField("Publish Date", "1999", "5", "5")); doc.add(new FacetField("Category", "crime")); indexWriter.addDocument(config.build(taxoWriter, doc)); doc = new Document(); doc.add(new FacetField("Author", "Frank")); doc.add(new FacetField("Publish Date", "2011", "5", "15")); doc.add(new FacetField("Category", "fiction")); indexWriter.addDocument(config.build(taxoWriter, doc)); doc = new Document(); doc.add(new FacetField("Author", "Frank")); doc.add(new FacetField("Publish Date", "2017", "6", "5")); doc.add(new FacetField("Category", "science")); indexWriter.addDocument(config.build(taxoWriter, doc)); indexWriter.close(); taxoWriter.close(); }
From source file:com.czw.search.lucene.example.facet.SimpleSortedSetFacetsExample.java
License:Apache License
/** * Build the example index.//w ww .j av a 2s . com */ private void index() throws IOException { IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE)); Document doc = new Document(); doc.add(new SortedSetDocValuesFacetField("Author", "Bob")); doc.add(new SortedSetDocValuesFacetField("Publish Year", "2010")); indexWriter.addDocument(config.build(doc)); doc = new Document(); doc.add(new SortedSetDocValuesFacetField("Author", "Lisa")); doc.add(new SortedSetDocValuesFacetField("Publish Year", "2010")); indexWriter.addDocument(config.build(doc)); doc = new Document(); doc.add(new SortedSetDocValuesFacetField("Author", "Lisa")); doc.add(new SortedSetDocValuesFacetField("Publish Year", "2012")); indexWriter.addDocument(config.build(doc)); doc = new Document(); doc.add(new SortedSetDocValuesFacetField("Author", "Susan")); doc.add(new SortedSetDocValuesFacetField("Publish Year", "2012")); indexWriter.addDocument(config.build(doc)); doc = new Document(); doc.add(new SortedSetDocValuesFacetField("Author", "Frank")); doc.add(new SortedSetDocValuesFacetField("Publish Year", "1999")); indexWriter.addDocument(config.build(doc)); indexWriter.close(); }
From source file:com.czw.search.lucene.example.xmlparser.FormBasedXmlQueryDemo.java
License:Apache License
private void openExampleIndex() throws IOException { //Create a RAM-based index from our test data file RAMDirectory rd = new RAMDirectory(); IndexWriterConfig iwConfig = new IndexWriterConfig(analyzer); IndexWriter writer = new IndexWriter(rd, iwConfig); InputStream dataIn = getServletContext().getResourceAsStream("/WEB-INF/data.tsv"); BufferedReader br = new BufferedReader(new InputStreamReader(dataIn, StandardCharsets.UTF_8)); String line = br.readLine();/*from w w w. j a v a 2 s . com*/ final FieldType textNoNorms = new FieldType(TextField.TYPE_STORED); textNoNorms.setOmitNorms(true); while (line != null) { line = line.trim(); if (line.length() > 0) { //parse row and create a document StringTokenizer st = new StringTokenizer(line, "\t"); Document doc = new Document(); doc.add(new Field("location", st.nextToken(), textNoNorms)); doc.add(new Field("salary", st.nextToken(), textNoNorms)); doc.add(new Field("type", st.nextToken(), textNoNorms)); doc.add(new Field("description", st.nextToken(), textNoNorms)); writer.addDocument(doc); } line = br.readLine(); } writer.close(); //open searcher // this example never closes it reader! IndexReader reader = DirectoryReader.open(rd); searcher = new IndexSearcher(reader); }