List of usage examples for org.apache.lucene.index IndexWriter close
@Override public void close() throws IOException
From source file:com.sxc.lucene.index.IndexingTest.java
License:Apache License
protected void setUp() throws Exception { // 1 directory = FSDirectory.open(new File("D:/programming/lucene/indexingTest")); IndexWriter writer = getWriter(); // 2 writer.deleteAll();//from ww w . j av a2 s. c o m for (int i = 0; i < ids.length; i++) { // 3 Document doc = new Document(); doc.add(new StringField("id", ids[i], Field.Store.YES)); doc.add(new StringField("country", unindexed[i], Field.Store.YES)); doc.add(new TextField("contents", unstored[i], Field.Store.NO)); doc.add(new TextField("city", text[i], Field.Store.YES)); writer.addDocument(doc); } writer.close(); }
From source file:com.sxc.lucene.index.IndexingTest.java
License:Apache License
public void testIndexWriter() throws IOException { IndexWriter writer = getWriter(); assertEquals(ids.length, writer.numDocs()); // 7 writer.close(); }
From source file:com.sxc.lucene.index.IndexingTest.java
License:Apache License
public void testDeleteBeforeOptimize() throws IOException { IndexWriter writer = getWriter(); assertEquals(2, writer.numDocs()); // A writer.deleteDocuments(new Term("id", "1")); // B writer.commit();/* w w w. j ava2 s. c o m*/ assertTrue(writer.hasDeletions()); // 1 assertEquals(2, writer.maxDoc()); // 2 assertEquals(1, writer.numDocs()); // 2 writer.close(); }
From source file:com.sxc.lucene.index.IndexingTest.java
License:Apache License
public void testDeleteAfterOptimize() throws IOException { IndexWriter writer = getWriter(); assertEquals(2, writer.numDocs());// www . j a v a 2 s . co m writer.deleteDocuments(new Term("id", "1")); writer.forceMergeDeletes(); // 3 writer.commit(); assertFalse(writer.hasDeletions()); assertEquals(1, writer.maxDoc()); // C assertEquals(1, writer.numDocs()); // C writer.close(); }
From source file:com.sxc.lucene.index.IndexingTest.java
License:Apache License
public void testUpdate() throws IOException { assertEquals(1, getHitCount("city", "amsterdam")); IndexWriter writer = getWriter(); Document doc = new Document(); // A doc.add(new StringField("id", "1", Field.Store.YES)); // A doc.add(new TextField("country", "Netherlands", Field.Store.YES)); // A doc.add(new TextField("contents", "Den Haag has a lot of museums", Field.Store.NO)); // A doc.add(new TextField("city", "Den Haag", Field.Store.YES)); // A writer.updateDocument(new Term("id", "1"), // B doc); // B writer.close(); assertEquals(0, getHitCount("city", "amsterdam"));// C assertEquals(1, getHitCount("city", "haag")); // D }
From source file:com.sxc.lucene.searching.PhraseQueryTest.java
License:Apache License
protected void setUp() throws IOException { dir = FSDirectory.open(new File("D:/programming/lucene/PhraseQueryTest")); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_47, new SmartChineseAnalyzer(Version.LUCENE_47)); config.setOpenMode(OpenMode.CREATE); IndexWriter writer = new IndexWriter(dir, config); Document doc = new Document(); doc.add(new TextField("field", // 1 "the quick brown fox jumped over the lazy dog", // 1 Field.Store.YES)); // 1 writer.addDocument(doc);//w w w .java 2s .com writer.close(); searcher = new IndexSearcher(DirectoryReader.open(dir)); }
From source file:com.tamingtext.frankenstein.Frankenstein.java
License:Apache License
/** * Index the content of Frankenstein//w w w .ja v a 2 s. c om * * @throws IOException */ private void index() throws IOException { System.out.println("Indexing Frankenstein"); InputStream stream = getClass().getClassLoader().getResourceAsStream("frankenstein-gutenberg.txt"); BufferedReader reader = new BufferedReader(new InputStreamReader(stream)); //let's index paragraphs at a time IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_36, new StandardAnalyzer(Version.LUCENE_36)); directory = new RAMDirectory(); IndexWriter iw = new IndexWriter(directory, conf); String line; StringBuilder paraBuffer = new StringBuilder(2048); int lines = 0; int paragraphs = 0; int paragraphLines = 0; while ((line = reader.readLine()) != null) { if (line.contains("End of the Project Gutenberg")) {//we are in the license section at the end of the book break; } if (line.startsWith("#")) {//skip comments continue; } //if the line is blank, we have a paragraph, so let's index it if (line.matches("^\\s*$") && paraBuffer.length() > 0) { Document doc = new Document(); //We can retrieve by paragraph number if we want String theString = paraBuffer.toString(); theString.trim(); if (theString.length() > 0 && theString.matches("^\\s*$") == false) { addMetadata(doc, lines, paragraphs, paragraphLines); doc.add(new Field("paragraph", theString, Field.Store.YES, Field.Index.ANALYZED));//add the main content iw.addDocument(doc);//Index the document paragraphs++; } //reset some of our state paraBuffer.setLength(0);//we are done w/ this paragraph paragraphLines = 0; } else { paraBuffer.append(line).append(' '); } lines++; paragraphLines++; } System.out.println("Processed " + lines + " lines. Paragraphs: " + paragraphs); iw.close(); }
From source file:com.tamingtext.fuzzy.OverlapMeasures.java
License:Apache License
public TopDocs cosine(String queryTerm, int n, String... terms) throws IOException, ParseException { Directory directory = new RAMDirectory(); final Pattern pattern = Pattern.compile("."); Analyzer analyzer = new Analyzer() { @Override// w w w . j a v a 2 s. c o m public TokenStream tokenStream(String fieldName, Reader reader) { TokenStream result = null; try { result = new PatternTokenizer(reader, pattern, 0); } catch (IOException e) { } return result; } }; IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_36, analyzer); IndexWriter writer = new IndexWriter(directory, conf); for (String term : terms) { Document doc = new Document(); doc.add(new Field("chars", term, Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(doc); } writer.close(); IndexReader reader = IndexReader.open(directory); IndexSearcher searcher = new IndexSearcher(reader); TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), terms.length); for (int i = 0; i < topDocs.scoreDocs.length; i++) { System.out.println("Id: " + topDocs.scoreDocs[i].doc + " Val: " + searcher.doc(topDocs.scoreDocs[i].doc).get("chars")); } QueryParser qp = new QueryParser(Version.LUCENE_36, "chars", analyzer); Query query = qp.parse(queryTerm); return searcher.search(query, n); }
From source file:com.taobao.common.tedis.support.lucene.analysis.xanalyzer.TestHighLight.java
License:Open Source License
/** * @param args/* ww w . j ava 2s. co m*/ */ public static void main(String[] args) { Directory ramDir = new RAMDirectory(); try { IndexWriter writer = new IndexWriter(ramDir, /* * new * StandardAnalyzer()/ */XFactory.getWriterAnalyzer()); Document doc = new Document(); Field fd = new Field(FIELD_NAME, CONTENT, Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS); doc.add(fd); writer.addDocument(doc); writer.optimize(); writer.close(); IndexReader reader = IndexReader.open(ramDir); String queryString = QUERY; QueryParser parser = new QueryParser(FIELD_NAME, /* * new * StandardAnalyzer * ()/ */XFactory.getWriterAnalyzer()); Query query = parser.parse(queryString); System.out.println(query); Searcher searcher = new IndexSearcher(ramDir); query = query.rewrite(reader); System.out.println(query); System.out.println("Searching for: " + query.toString(FIELD_NAME)); Hits hits = searcher.search(query); BoldFormatter formatter = new BoldFormatter(); Highlighter highlighter = new Highlighter(formatter, new QueryScorer(query)); highlighter.setTextFragmenter(new SimpleFragmenter(50)); for (int i = 0; i < hits.length(); i++) { String text = hits.doc(i).get(FIELD_NAME); int maxNumFragmentsRequired = 5; String fragmentSeparator = "..."; TermPositionVector tpv = (TermPositionVector) reader.getTermFreqVector(hits.id(i), FIELD_NAME); TokenStream tokenStream = TokenSources.getTokenStream(tpv); /* * TokenStream tokenStream2= (new StandardAnalyzer()) * //XFactory.getWriterAnalyzer() .tokenStream(FIELD_NAME,new * StringReader(text)); * * do { Token t = tokenStream2.next(); if(t==null)break; * System.out.println("\t" + t.startOffset() + "," + * t.endOffset() + "\t" + t.termText()); }while(true); */ String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, fragmentSeparator); System.out.println("\n" + result); } reader.close(); } catch (Exception e) { e.printStackTrace(); } }
From source file:com.tekstosense.stemmer.index.Indexer.java
License:Open Source License
/** * Indexer./*from w w w. j av a2s. com*/ * * @throws IOException * Signals that an I/O exception has occurred. */ private static void indexer() throws IOException { StandardAnalyzer analyzer = new StandardAnalyzer(); Path indexDirectoryPath = new File(INDEX_PATH).toPath(); FSDirectory indexDirectory = new SimpleFSDirectory(indexDirectoryPath); IndexWriterConfig conf = new IndexWriterConfig(analyzer); IndexWriter w = new IndexWriter(indexDirectory, conf); addDoc(w, "Lucene in Action", "193398817"); addDoc(w, "Lucene for Dummies", "55320055Z"); addDoc(w, "Managing Gigabytes", "55063554A"); addDoc(w, "The Art of Computer Science", "9900333X"); w.close(); }