List of usage examples for org.apache.lucene.store RAMDirectory close
@Override public void close()
From source file:fr.ericlab.sondy.core.DataManipulation.java
License:Open Source License
public String[] getFrequentCoocurringTerms(String document, int numTerms, String baseTerm, AppVariables appVariables) {//from w w w. j a v a 2s . co m String[] frequentTerms = new String[numTerms]; try { StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_36); RAMDirectory index = new RAMDirectory(); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_36, analyzer); IndexWriter w = new IndexWriter(index, config); Document doc = new Document(); doc.add(new Field("content", document, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES)); w.addDocument(doc); w.commit(); IndexReader r = IndexReader.open(w, true); TermEnum allTerms = r.terms(); int minFreq = 0; TermInfoList termList = new TermInfoList(); StopWords stopWords = appVariables.currentStopWords; HashSet<String> stopWordsSet = stopWords.getSet(); stopWords.add(baseTerm); while (allTerms.next()) { String term = allTerms.term().text(); if (term.length() > 1 && !stopWordsSet.contains(term)) { float cf = getTermOccurenceCount(r, term); if (cf > minFreq) { termList.addTermInfo(new TermInfo(term, (int) cf)); termList.sortList(); if (termList.size() > numTerms) { termList.removeLast(); } minFreq = termList.get(termList.size() - 1).occurence; } } } for (int i = 0; i < termList.size(); i++) { frequentTerms[i] = termList.get(i).text; } w.close(); r.close(); index.close(); } catch (Exception ex) { Logger.getLogger(DataManipulation.class.getName()).log(Level.SEVERE, null, ex); } return frequentTerms; }
From source file:fr.ericlab.sondy.core.DataManipulation.java
License:Open Source License
public String[] getFrequentCoocurringTermsFromFile(int numTerms, String baseTerm, AppVariables appVariables) { String[] frequentTerms = new String[numTerms]; try {/*from w w w . j ava2 s .c om*/ BufferedReader input = new BufferedReader(new FileReader("tmp.msg")); StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_36); RAMDirectory index = new RAMDirectory(); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_36, analyzer); IndexWriter w = new IndexWriter(index, config); String line = ""; String document = ""; int count = 0; while ((line = input.readLine()) != null) { count++; document += line; if (count == 2000) { Document doc = new Document(); doc.add(new Field("content", document, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES)); w.addDocument(doc); w.commit(); count = 0; document = ""; } } Document doc = new Document(); doc.add(new Field("content", document, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES)); w.addDocument(doc); w.commit(); input.close(); IndexReader r = IndexReader.open(w, true); TermEnum allTerms = r.terms(); int minFreq = 0; TermInfoList termList = new TermInfoList(); StopWords stopWords = appVariables.currentStopWords; HashSet<String> stopWordsSet = stopWords.getSet(); stopWords.add(baseTerm); while (allTerms.next()) { String term = allTerms.term().text(); if (term.length() > 1 && !stopWordsSet.contains(term)) { float cf = getTermOccurenceCount(r, term); if (cf > minFreq) { termList.addTermInfo(new TermInfo(term, (int) cf)); termList.sortList(); if (termList.size() > numTerms) { termList.removeLast(); } minFreq = termList.get(termList.size() - 1).occurence; } } } for (int i = 0; i < termList.size(); i++) { frequentTerms[i] = termList.get(i).text; } w.close(); r.close(); index.close(); } catch (Exception ex) { Logger.getLogger(DataManipulation.class.getName()).log(Level.SEVERE, null, ex); } return frequentTerms; }
From source file:framework.retrieval.engine.index.create.impl.RIndexWriteProvider.java
License:Apache License
/** * ?RIndexWriterWrap/* www . ja v a2 s .co m*/ * @return */ public RIndexWriterWrap createRamIndexWriter() { RIndexWriterWrap indexWriterWrap = new RIndexWriterWrap(); RAMDirectory ramDir = new RAMDirectory(); IndexWriter ramWriter = null; try { ramWriter = new IndexWriter(ramDir, analyzerFactory.createIndexAnalyzer(), true, MaxFieldLength.UNLIMITED); } catch (Exception e) { ramDir.close(); throw new RetrievalCreateIndexException(e); } indexWriterWrap.setDirectory(ramDir); indexWriterWrap.setIndexWriter(ramWriter); return indexWriterWrap; }
From source file:luceneexamples.AddDocument.java
License:Apache License
@Test public void index() throws Exception { RAMDirectory directory = new RAMDirectory(); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_31, analyzer); IndexWriter writer = new IndexWriter(directory, iwc); Document doc = new Document(); doc.add(new Field("str_field", "quick brown fox jumped over the lazy dog.", Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(doc);/* ww w . jav a 2 s . c om*/ writer.commit(); IndexReader reader = IndexReader.open(writer, true); IndexSearcher searcher = new IndexSearcher(reader); QueryParser parser = new QueryParser(Version.LUCENE_31, "str_field", analyzer); TopDocs td = searcher.search(parser.parse("fox"), 1000); assertThat(td.totalHits, is(1)); Document doc2 = new Document(); doc2.add(new Field("str_field", "quick brown dog jumped over the lazy fox.", Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(doc2); writer.commit(); td = searcher.search(parser.parse("fox"), 1000); assertThat(td.totalHits, is(1)); searcher.close(); reader = reader.reopen(); searcher = new IndexSearcher(reader); td = searcher.search(parser.parse("fox"), 1000); assertThat(td.totalHits, is(2)); writer.close(); searcher.close(); directory.close(); }
From source file:luceneexamples.IndexAndSearch.java
License:Apache License
@Test public void index() throws Exception { RAMDirectory directory = new RAMDirectory(); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_31, analyzer); IndexWriter writer = new IndexWriter(directory, iwc); Document doc = new Document(); doc.add(new Field("str_field", "quick brown fox jumped over the lazy dog.", Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(doc);/*from w ww. ja v a 2 s.c o m*/ Document doc2 = new Document(); doc2.add(new Field("str_field", "?????", Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(doc2); writer.close(); IndexSearcher searcher = new IndexSearcher(directory, true); QueryParser parser = new QueryParser(Version.LUCENE_31, "str_field", analyzer); TopDocs td = searcher.search(parser.parse("fox"), 1000); assertThat(td.totalHits, is(1)); Document doc3 = searcher.doc(td.scoreDocs[0].doc); assertEquals("quick brown fox jumped over the lazy dog.", doc3.get("str_field")); searcher.close(); directory.close(); }
From source file:luceneexamples.NumericFieldDocument.java
License:Apache License
@Test public void index() throws Exception { RAMDirectory directory = new RAMDirectory(); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_31, analyzer); IndexWriter writer = new IndexWriter(directory, iwc); for (int i = 8; i < 12; i++) { Document doc = new Document(); doc.add(new NumericField("int_field", Field.Store.YES, true).setIntValue(i)); System.out.println(doc);//from ww w. j av a 2 s . c om writer.addDocument(doc); } writer.commit(); IndexReader reader = IndexReader.open(writer, true); IndexSearcher searcher = new IndexSearcher(reader); TopDocs td = searcher.search(new MatchAllDocsQuery(), 1000, new Sort(new SortField("int_field", SortField.INT))); assertThat(td.totalHits, is(4)); assertThat(searcher.doc(td.scoreDocs[0].doc).get("int_field"), equalTo("8")); assertThat(searcher.doc(td.scoreDocs[1].doc).get("int_field"), equalTo("9")); assertThat(searcher.doc(td.scoreDocs[2].doc).get("int_field"), equalTo("10")); assertThat(searcher.doc(td.scoreDocs[3].doc).get("int_field"), equalTo("11")); reader.close(); writer.close(); searcher.close(); directory.close(); }
From source file:luceneexamples.SortDocuments.java
License:Apache License
@Test public void index() throws Exception { RAMDirectory directory = new RAMDirectory(); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_31, analyzer); IndexWriter writer = new IndexWriter(directory, iwc); Document doc = new Document(); doc.add(new Field("str_field", "abc", Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(doc);/*from www . j ava 2 s. c o m*/ Document doc2 = new Document(); doc2.add(new Field("str_field", "def", Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(doc2); Document doc3 = new Document(); doc3.add(new Field("str_field", "hij", Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(doc3); writer.commit(); IndexReader reader = IndexReader.open(writer, true); IndexSearcher searcher = new IndexSearcher(reader); TopDocs td = searcher.search(new MatchAllDocsQuery(), 1000, new Sort(new SortField("str_field", SortField.STRING))); assertThat(td.totalHits, is(3)); assertThat(searcher.doc(td.scoreDocs[0].doc).get("str_field"), equalTo("abc")); assertThat(searcher.doc(td.scoreDocs[1].doc).get("str_field"), equalTo("def")); assertThat(searcher.doc(td.scoreDocs[2].doc).get("str_field"), equalTo("hij")); td = searcher.search(new MatchAllDocsQuery(), 1000, new Sort(new SortField("str_field", SortField.STRING, true))); assertThat(td.totalHits, is(3)); assertThat(searcher.doc(td.scoreDocs[0].doc).get("str_field"), equalTo("hij")); assertThat(searcher.doc(td.scoreDocs[1].doc).get("str_field"), equalTo("def")); assertThat(searcher.doc(td.scoreDocs[2].doc).get("str_field"), equalTo("abc")); reader.close(); writer.close(); searcher.close(); directory.close(); }
From source file:luceneexamples.UpdateDocument.java
License:Apache License
@Test public void index() throws Exception { RAMDirectory directory = new RAMDirectory(); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_31, analyzer); IndexWriter writer = new IndexWriter(directory, iwc); Document doc = new Document(); doc.add(new Field("id", "001", Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("str_field", "quick brown fox jumped over the lazy dog.", Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(doc);// w ww .ja v a 2 s.c om writer.commit(); IndexReader reader = IndexReader.open(writer, true); IndexSearcher searcher = new IndexSearcher(reader); QueryParser parser = new QueryParser(Version.LUCENE_31, "str_field", analyzer); TopDocs td = searcher.search(parser.parse("fox"), 1000); assertThat(td.totalHits, is(1)); Document doc2 = new Document(); doc.add(new Field("id", "001", Field.Store.YES, Field.Index.NOT_ANALYZED)); doc2.add(new Field("str_field", "quick brown fox jumped over the lazy whale.", Field.Store.YES, Field.Index.ANALYZED)); writer.updateDocument(new Term("id", "001"), doc2); writer.commit(); searcher.close(); reader = reader.reopen(); searcher = new IndexSearcher(reader); td = searcher.search(parser.parse("dog"), 1000); assertThat(td.totalHits, is(0)); td = searcher.search(parser.parse("whale"), 1000); assertThat(td.totalHits, is(1)); writer.close(); searcher.close(); directory.close(); }
From source file:net.bible.service.sword.PdaLuceneIndexCreator.java
License:Open Source License
/** * @param book//from ww w .ja v a 2 s . co m * @param job * @param analyzer * @param errors * @param tempPath * @throws CorruptIndexException * @throws LockObtainFailedException * @throws IOException * @throws BookException */ private void createIndex(Book book, Progress job, Analyzer analyzer, List errors, File tempPath) throws CorruptIndexException, LockObtainFailedException, IOException, BookException { Log.d(TAG, "Getting global key list for " + book.getName()); Key allKeys = book.getGlobalKeyList(); Log.d(TAG, "Got global key list"); int numKeys = allKeys.getCardinality(); Log.d(TAG, "Total keys:" + numKeys); int tenthCount = numKeys / 10; Directory[] tempIndexFiles = new Directory[10]; for (int tenth = 0; tenth < 2; tenth++) { int startKey = tenth * tenthCount; int endKey = (tenth + 1) * tenthCount; Key currentKeys = book.createEmptyKeyList(); for (int i = startKey; i < endKey; i++) { Key key = allKeys.get(i); Log.d(TAG, "Adding key:" + key.getName()); currentKeys.addAll(key); } Log.d(TAG, "1"); // Create the index in core. final RAMDirectory ramDir = new RAMDirectory(); Log.d(TAG, "2"); IndexWriter writer = new IndexWriter(ramDir, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED); Log.d(TAG, "3"); generateSearchIndexImpl(job, errors, writer, currentKeys, 0); Log.d(TAG, "4"); job.setSectionName(UserMsg.OPTIMIZING.toString()); // job.setWork(95); // Consolidate the index into the minimum number of files. // writer.optimize(); /* Optimize is done by addIndexes */ writer.close(); Log.d(TAG, "5"); // Write the core index to disk. String tempFilePath = tempPath.getCanonicalPath() + tenth; Log.d(TAG, "temp index path:" + tempFilePath); final Directory destination = FSDirectory.open(new File(tempFilePath)); Log.d(TAG, "6"); IndexWriter fsWriter = new IndexWriter(destination, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED); Log.d(TAG, "7"); fsWriter.addIndexesNoOptimize(new Directory[] { ramDir }); Log.d(TAG, "8"); fsWriter.optimize(); Log.d(TAG, "9"); fsWriter.close(); Log.d(TAG, "10"); // Free up the space used by the ram directory ramDir.close(); Log.d(TAG, "11"); tempIndexFiles[tenth] = destination; Log.d(TAG, "12"); } Log.d(TAG, "13"); final Directory destination = FSDirectory.open(new File(tempPath.getCanonicalPath())); Log.d(TAG, "14"); IndexWriter fsWriter = new IndexWriter(destination, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED); Log.d(TAG, "15"); fsWriter.addIndexesNoOptimize(tempIndexFiles); Log.d(TAG, "16"); fsWriter.optimize(); Log.d(TAG, "17"); fsWriter.close(); }
From source file:net.skyatlas.icd.test.RAMDirectoryDemo.java
public static void main(String[] args) throws IOException { long startTime = System.currentTimeMillis(); System.err.println("*************************** ****************************"); RAMDirectory directory = new RAMDirectory(); Version matchVersion = Version.LUCENE_48; Analyzer analyzer = new StandardAnalyzer(matchVersion); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_48, analyzer); iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); IndexWriter writer = new IndexWriter(directory, iwc); Document doc = new Document(); doc.add(new Field("name", "Chenghui", Field.Store.YES, Field.Index.ANALYZED)); doc.add(new Field("sex", "", Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("dosometing", "I am learning lucene ", Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(doc);/*from ww w . j av a 2s . c o m*/ writer.close(); IndexReader reader = DirectoryReader.open(directory); IndexSearcher searcher = new IndexSearcher(reader); Query query = new TermQuery(new Term("dosometing", "lucene")); TopDocs rs = searcher.search(query, null, 10); long endTime = System.currentTimeMillis(); System.out.println( "" + (endTime - startTime) + "" + rs.totalHits + "?"); for (int i = 0; i < rs.scoreDocs.length; i++) { // rs.scoreDocs[i].doc ??id, 0 Document firstHit = searcher.doc(rs.scoreDocs[i].doc); System.out.println("name:" + firstHit.getField("name").stringValue()); System.out.println("sex:" + firstHit.getField("sex").stringValue()); System.out.println("dosomething:" + firstHit.getField("dosometing").stringValue()); } reader.close(); directory.close(); System.out.println("*****************?**********************"); }