List of usage examples for org.apache.lucene.index IndexWriter close
@Override public void close() throws IOException
From source file:com.example.analyzer.server.database.DbFullTextIndex.java
License:Open Source License
public DbFullTextIndex(DbTable dbTable, int columnOffset) { try {// www. j a va2 s. c o m long beginTime = System.currentTimeMillis(); ramDirectory = new RAMDirectory(); IndexWriter writer = new IndexWriter(ramDirectory, new StandardAnalyzer(Version.LUCENE_30), new MaxFieldLength(50)); int rowCount = dbTable.getRowCount(); for (int rowOffset = 0; rowOffset < rowCount; rowOffset++) { String value = dbTable.coalesce(rowOffset, columnOffset, "").toString(); byte[] idArray = getBytes(rowOffset); Document document = new Document(); document.add(new Field(ID, idArray, Field.Store.YES)); document.add(new Field(VALUE, value, Store.YES, Index.ANALYZED)); // TODO: Determine whether we need to store value writer.addDocument(document); } writer.optimize(); writer.close(); long endTime = System.currentTimeMillis(); long elapsedTime = endTime - beginTime; System.out.println("created index in " + elapsedTime + " ms"); } catch (CorruptIndexException e) { throw new RuntimeException(e); } catch (LockObtainFailedException e) { throw new RuntimeException(e); } catch (IOException e) { throw new RuntimeException(e); } }
From source file:com.example.search.IndexFiles.java
License:Apache License
/** Index all text files under a directory. */ public static void main(String[] args) { String usage = "java org.apache.lucene.demo.IndexFiles" + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n" + "This indexes the documents in DOCS_PATH, creating a Lucene index" + "in INDEX_PATH that can be searched with SearchFiles"; String indexPath = "index"; // String docsPath = "E:\\work\\webExp\\exp\\sina_ansi"; String docsPath = ".\\doc"; boolean create = true; /* for(int i=0;i<args.length;i++) { if ("-index".equals(args[i])) { indexPath = args[i+1];/*from w ww .ja v a 2 s. c o m*/ i++; } else if ("-docs".equals(args[i])) { docsPath = args[i+1]; i++; } else if ("-update".equals(args[i])) { create = false; } }*/ /* if (docsPath == null) { System.err.println("Usage: " + usage); System.exit(1); }*/ final File docDir = new File(docsPath); if (!docDir.exists() || !docDir.canRead()) { System.out.println("Document directory '" + docDir.getAbsolutePath() + "' does not exist or is not readable, please check the path"); System.exit(1); } Date start = new Date(); try { System.out.println("Indexing to directory '" + indexPath + "'..."); Directory dir = FSDirectory.open(new File(indexPath)); // Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31); Analyzer analyzer = new ICTCLASAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_31, analyzer); if (create) { // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(OpenMode.CREATE); } else { // Add new documents to an existing index: iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } // Optional: for better indexing performance, if you // are indexing many documents, increase the RAM // buffer. But if you do this, increase the max heap // size to the JVM (eg add -Xmx512m or -Xmx1g): // // iwc.setRAMBufferSizeMB(256.0); IndexWriter writer = new IndexWriter(dir, iwc); indexDocs(writer, docDir); // NOTE: if you want to maximize search performance, // you can optionally call forceMerge here. This can be // a terribly costly operation, so generally it's only // worth it when your index is relatively static (ie // you're done adding documents to it): // // writer.forceMerge(1); writer.close(); Date end = new Date(); System.out.println(end.getMinutes() - start.getMinutes() + " total minutes"); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } }
From source file:com.flaptor.hounder.indexer.util.MergeIndexes.java
License:Apache License
public static void main(String args[]) { if (args.length < 3) { System.err.println("Usage: MergeIndexes outputIndex index1 index2 [... indexN]"); System.exit(-1);//from w w w .j a va 2 s . com } try { IndexWriter iw = new IndexWriter(args[0], new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED); iw.setMaxBufferedDocs(1000); IndexReader readers[] = new IndexReader[args.length - 1]; for (int i = 0; i < args.length - 1; i++) { readers[i] = IndexReader.open(args[i + 1]); } iw.addIndexes(readers); iw.optimize(); iw.close(); } catch (IOException e) { System.err.println(e); } }
From source file:com.flaptor.hounder.searcher.spell.SpellChecker.java
License:Apache License
public void clearIndex() throws IOException { IndexWriter.unlock(spellindex);/* w ww . ja v a2s .c o m*/ IndexWriter writer = new IndexWriter(spellindex, null, true, IndexWriter.MaxFieldLength.UNLIMITED); writer.close(); }
From source file:com.flaptor.hounder.searcher.spell.SpellChecker.java
License:Apache License
/** * Index a Dictionary//from w w w .j a v a 2s. co m * @param dict the dictionary to index * @throws IOException */ public void indexDictionary(Dictionary dict) throws IOException { IndexWriter.unlock(spellindex); IndexWriter writer = new IndexWriter(spellindex, new WhitespaceAnalyzer(), !IndexReader.indexExists(spellindex), IndexWriter.MaxFieldLength.UNLIMITED); writer.setMergeFactor(300); writer.setMaxBufferedDocs(150); for (Pair<String, Float> pair : dict) { String word = pair.first(); float boost = pair.last(); int len = word.length(); if (len < 3) { continue; // too short we bail but "too long" is fine... } if (this.exist(word)) { // if the word already exist in the gramindex continue; } // ok index the word Document doc = createDocument(word, boost, getMin(len), getMax(len)); writer.addDocument(doc); } // close writer writer.optimize(); writer.close(); // close reader reader.close(); reader = null; }
From source file:com.flaptor.hounder.util.Idx.java
License:Apache License
public static void main(String arg[]) throws Exception { check(arg.length > 1, null);/*from w ww . j a v a 2s . c o m*/ String cmd = arg[0]; File idx = new File(arg[1]); if ("list".equals(cmd)) { int num = (arg.length > 2) ? Integer.parseInt(arg[2]) : -1; check(idx.exists(), "Index dir not found"); IndexReader reader = IndexReader.open(idx); int docs = reader.numDocs(); int max = reader.maxDoc(); System.err.println("Index contains " + docs + " documents plus " + (max - docs) + " deleted."); if (num > -1) { if (num == 0) num = docs; for (int i = 0; i < max && i < num; i++) { System.out.println("----------------------------------------"); if (!reader.isDeleted(i)) { Document doc = reader.document(i); List flds = doc.getFields(); Iterator iter = flds.iterator(); while (iter.hasNext()) { Field fld = (Field) iter.next(); String attr = (fld.isIndexed() ? ",i" : "") + (fld.isStored() ? ",s" : "") + (fld.isTokenized() ? ",t" : ""); System.out.println(fld.name() + attr + ": " + fld.stringValue()); } } } reader.close(); System.out.println(); } } else if ("search".equals(cmd)) { check(idx.exists(), "Index dir not found"); check(arg.length > 3, "Not enough arguments"); String field = arg[2]; String value = arg[3]; IndexSearcher searcher = new IndexSearcher(IndexReader.open(idx)); ScorelessHitCollector collector = new HashSetScorelessHitCollector(); searcher.search(new TermQuery(new Term(field, value)), collector); Set<Integer> docIds = collector.getMatchingDocuments(); System.out.println("\nNumber of hits: " + docIds.size() + "\n"); for (Integer docId : docIds) { Document doc = searcher.doc(docId); List flds = doc.getFields(); Iterator iter = flds.iterator(); while (iter.hasNext()) { Field fld = (Field) iter.next(); System.out.println(fld.name() + ": " + fld.stringValue()); } } searcher.close(); System.out.println(); } else if ("delete".equals(cmd)) { check(idx.exists(), "Index dir not found"); check(arg.length > 3, "Not enough arguments"); String field = arg[2]; String value = arg[3]; IndexReader reader = IndexReader.open(idx); reader.deleteDocuments(new Term(field, value)); reader.close(); } else if ("optimize".equals(cmd)) { IndexWriter writer = new IndexWriter(idx, new StandardAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED); writer.optimize(); writer.close(); } else if ("merge".equals(cmd)) { check(arg.length == 3, "not enough parameters"); File idx2 = new File(arg[2]); check(idx.exists(), "Index dir 1 not found"); check(idx2.exists(), "Index dir 2 not found"); IndexReader reader = IndexReader.open(idx2); IndexWriter writer = new IndexWriter(idx, new StandardAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED); writer.addIndexes(new IndexReader[] { reader }); writer.close(); reader.close(); } else if ("term-count".equals(cmd)) { check(arg.length == 3, "not enough parameters"); check(idx.exists(), "Index dir not found"); IndexReader reader = IndexReader.open(idx); String field = arg[2]; int count = 0; TermEnum terms = reader.terms(); while (terms.next()) { Term term = terms.term(); if (term.field().equals(field)) count++; } terms.close(); reader.close(); System.out.println("Found " + count + " different values for field " + field); } else if ("hit-count".equals(cmd)) { check(arg.length > 3, "Not enough arguments"); check(idx.exists(), "Index dir not found"); String field = arg[2]; String value = arg[3]; IndexSearcher searcher = new IndexSearcher(IndexReader.open(idx)); CountingHitCollector collector = new CountingHitCollector(); searcher.search(new TermQuery(new Term(field, value)), collector); System.out.println("\nNumber of hits: " + collector.getDocCount() + "\n"); searcher.close(); } else if ("uncompound".equals(cmd)) { IndexWriter writer = new IndexWriter(idx, new StandardAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED); writer.setUseCompoundFile(false); writer.optimize(); writer.close(); } else if ("compound".equals(cmd)) { IndexWriter writer = new IndexWriter(idx, new StandardAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED); writer.setUseCompoundFile(true); writer.optimize(); writer.close(); } else if ("terms".equals(cmd)) { check(arg.length == 3, "not enough parameters"); check(idx.exists(), "Index dir not found"); String field = arg[2]; IndexReader reader = IndexReader.open(idx); TermEnum terms = reader.terms(); while (terms.next()) { Term t = terms.term(); if (t.field().equals(field)) { System.out.println(t.text()); } } } }
From source file:com.flycode.CRIBSearch.SearchEngine.Demo.IndexFiles.java
License:Apache License
/** * Index all text files under a directory. *///from www .j a v a 2 s.co m public static void main(String[] args) { String usage = "java com.flycode.CRIBSearch.demo.IndexFiles" + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n" + "This indexes the documents in DOCS_PATH, creating a Lucene index" + "in INDEX_PATH that can be searched with SearchFiles class"; String indexPath = "index"; String docsPath = null; boolean create = true; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { indexPath = args[i + 1]; i++; } else if ("-docs".equals(args[i])) { docsPath = args[i + 1]; i++; } else if ("-update".equals(args[i])) { create = false; } } if (docsPath == null) { System.err.println("Usage: " + usage); System.exit(1); } final Path docDir = Paths.get(docsPath); if (!Files.isReadable(docDir)) { System.out.println("Document directory '" + docDir.toAbsolutePath() + "' does not exist or is not readable, please check the path"); System.exit(1); } Date start = new Date(); try { System.out.println("Indexing to directory '" + indexPath + "'..."); Directory dir = FSDirectory.open(Paths.get(indexPath)); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); if (create) { // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(OpenMode.CREATE); } else { // Add new documents to an existing index: iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } // Optional: for better indexing performance, if you // are indexing many documents, increase the RAM // buffer. But if you do this, increase the max heap // size to the JVM (eg add -Xmx512m or -Xmx1g): // // iwc.setRAMBufferSizeMB(256.0); IndexWriter writer = new IndexWriter(dir, iwc); indexDocs(writer, docDir); // NOTE: if you want to maximize search performance, // you can optionally call forceMerge here. This can be // a terribly costly operation, so generally it's only // worth it when your index is relatively static (ie // you're done adding documents to it): // // writer.forceMerge(1); writer.close(); Date end = new Date(); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } }
From source file:com.FormBasedXmlQueryDemo.java
License:Apache License
private void openExampleIndex() throws IOException { //Create a RAM-based index from our test data file RAMDirectory rd = new RAMDirectory(); IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_40, analyzer); IndexWriter writer = new IndexWriter(rd, iwConfig); InputStream dataIn = getServletContext().getResourceAsStream("/WEB-INF/data.tsv"); BufferedReader br = new BufferedReader(new InputStreamReader(dataIn, IOUtils.CHARSET_UTF_8)); String line = br.readLine();/*ww w . j a va 2 s .c o m*/ final FieldType textNoNorms = new FieldType(TextField.TYPE_STORED); textNoNorms.setOmitNorms(true); while (line != null) { line = line.trim(); if (line.length() > 0) { //parse row and create a document StringTokenizer st = new StringTokenizer(line, "\t"); Document doc = new Document(); doc.add(new Field("location", st.nextToken(), textNoNorms)); doc.add(new Field("salary", st.nextToken(), textNoNorms)); doc.add(new Field("type", st.nextToken(), textNoNorms)); doc.add(new Field("description", st.nextToken(), textNoNorms)); writer.addDocument(doc); } line = br.readLine(); } writer.close(); //open searcher // this example never closes it reader! IndexReader reader = DirectoryReader.open(rd); searcher = new IndexSearcher(reader); }
From source file:com.foundationdb.lucene.SimpleTest.java
License:Open Source License
@Test public void indexBasic() throws Exception { StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_44); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_44, analyzer); // recreate the index on each execution config.setOpenMode(IndexWriterConfig.OpenMode.CREATE); config.setCodec(new FDBCodec()); FDBDirectory dir = createDirectoryForMethod(); IndexWriter writer = new IndexWriter(dir, config); try {/*from www .j ava2 s . c o m*/ writer.addDocument(Arrays.asList(new TextField("title", "The title of my first document", Store.YES), new TextField("content", "The content of the first document", Store.NO))); writer.addDocument(Arrays.asList(new TextField("title", "The title of the second document", Store.YES), new TextField("content", "And this is the content", Store.NO))); } finally { writer.close(); } assertDocumentsAreThere(dir, 2); }
From source file:com.fuerve.villageelder.actions.results.SearchResultItemTest.java
License:Apache License
private void buildDummyIndex(final Directory indexDirectory, final Directory taxonomyDirectory) throws IOException { IndexWriterConfig iwc = new IndexWriterConfig(Lucene.LUCENE_VERSION, Lucene.getPerFieldAnalyzer()); iwc.setOpenMode(OpenMode.CREATE);/* w w w . ja v a2 s .c o m*/ IndexWriter iw = new IndexWriter(indexDirectory, iwc); TaxonomyWriter tw = new DirectoryTaxonomyWriter(taxonomyDirectory, OpenMode.CREATE); List<CategoryPath> categories = new ArrayList<CategoryPath>(); FacetFields facetFields = new FacetFields(tw); Document doc = new Document(); categories.clear(); doc.add(new StringField("Author", "foo", Store.YES)); categories.add(new CategoryPath("Author", "foo")); doc.add(new LongField("RevisionNumber", 50L, Store.YES)); doc.add(new StringField("Revision", "50", Store.YES)); doc.add(new TextField("Message", "stuff", Store.YES)); iw.addDocument(doc); facetFields.addFields(doc, categories); doc = new Document(); facetFields = new FacetFields(tw); categories.clear(); doc.add(new StringField("Author", "bar", Store.YES)); categories.add(new CategoryPath("Author", "bar")); doc.add(new LongField("RevisionNumber", 5000L, Store.YES)); doc.add(new StringField("Revision", "5000", Store.YES)); doc.add(new TextField("Message", "stuff", Store.YES)); iw.addDocument(doc); facetFields.addFields(doc, categories); tw.commit(); tw.close(); iw.commit(); iw.close(); }