List of usage examples for org.apache.lucene.store RAMDirectory RAMDirectory
public RAMDirectory()
From source file:eu.planets_project.ifr.core.techreg.formats.FormatRegistryIndexer.java
License:Open Source License
/** * no arg constructor, initialisation/*from ww w . j ava 2 s . c om*/ */ FormatRegistryIndexer() { // Init the index: // Store the index in memory: directory = new RAMDirectory(); // To store an index on disk, use this instead: //Directory directory = FSDirectory.getDirectory("/tmp/testindex"); }
From source file:examples.JenaTextExample1.java
License:Apache License
public static Dataset createCode() { log.info("Construct an in-memory dataset with in-memory lucene index using code"); // Build a text dataset by code. // Here , in-memory base data and in-memeory Lucene index // Base data//from w w w . j a v a 2s.co m Dataset ds1 = DatasetFactory.create(); // Define the index mapping EntityDefinition entDef = new EntityDefinition("uri", "text"); entDef.setPrimaryPredicate(RDFS.label.asNode()); // Lucene, in memory. Directory dir = new RAMDirectory(); // Join together into a dataset Dataset ds = TextDatasetFactory.createLucene(ds1, dir, new TextIndexConfig(entDef)); return ds; }
From source file:fi.passiba.services.bibledata.sword.index.lucene.LuceneIndex.java
License:Open Source License
/** * Generate an index to use, telling the job about progress as you go. * @throws BookException If we fail to read the index files *///from w w w . j a v a 2 s . c om public LuceneIndex(Book book, URI storage, boolean create) throws BookException { assert create; this.book = book; File finalPath = null; try { finalPath = NetUtil.getAsFile(storage); this.path = finalPath.getCanonicalPath(); } catch (IOException ex) { throw new BookException(UserMsg.LUCENE_INIT, ex); } System.out.println("index path " + finalPath.getAbsolutePath()); // Indexing the book is a good way to police data errors. DataPolice.setBook(book.getBookMetaData()); IndexStatus finalStatus = IndexStatus.UNDONE; Analyzer analyzer = new LuceneAnalyzer(book); List errors = new ArrayList(); File tempPath = new File(path + '.' + IndexStatus.CREATING.toString()); try { synchronized (CREATING) { book.setIndexStatus(IndexStatus.CREATING); // An index is created by opening an IndexWriter with the create argument set to true. //IndexWriter writer = new IndexWriter(tempPath.getCanonicalPath(), analyzer, true); // Create the index in core. RAMDirectory ramDir = new RAMDirectory(); IndexWriter writer = new IndexWriter(ramDir, analyzer, true); generateSearchIndexImpl(errors, writer, book.getGlobalKeyList(), 0); // Consolidate the index into the minimum number of files. // writer.optimize(); /* Optimize is done by addIndexes */ writer.close(); // Write the core index to disk. IndexWriter fsWriter = new IndexWriter(tempPath.getCanonicalPath(), analyzer, true); fsWriter.addIndexes(new Directory[] { ramDir }); fsWriter.close(); // Free up the space used by the ram directory ramDir.close(); tempPath.renameTo(finalPath); if (finalPath.exists()) { finalStatus = IndexStatus.DONE; } if (errors.size() > 0) { StringBuffer buf = new StringBuffer(); Iterator iter = errors.iterator(); while (iter.hasNext()) { buf.append(iter.next()); buf.append('\n'); } Reporter.informUser(this, UserMsg.BAD_VERSE, buf); } } } catch (IOException ex) { throw new BookException(UserMsg.LUCENE_INIT, ex); } finally { book.setIndexStatus(finalStatus); } }
From source file:fr.ericlab.sondy.algo.eventdetection.ET.java
License:Open Source License
public static LinkedList<String> getFrequentBigrams(String tweets, HashSet<String> bigrams) { try {//from ww w .jav a 2s. co m LinkedList<String> FCB = new LinkedList<String>(); WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer(Version.LUCENE_36); RAMDirectory temporaryIndex = new RAMDirectory(); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_36, analyzer); IndexWriter temporaryWriter = new IndexWriter(temporaryIndex, config); Document doc = new Document(); doc.add(new Field("content", tweets, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES)); temporaryWriter.addDocument(doc); temporaryWriter.commit(); IndexReader temporaryReader = IndexReader.open(temporaryWriter, true); TermEnum allTerms = temporaryReader.terms(); while (allTerms.next()) { String term = allTerms.term().text(); if (bigrams.contains(term)) { FCB.add(term); } } temporaryWriter.close(); temporaryReader.close(); temporaryIndex.close(); return FCB; } catch (LockObtainFailedException ex) { Logger.getLogger(ET.class.getName()).log(Level.SEVERE, null, ex); } catch (IOException ex) { Logger.getLogger(ET.class.getName()).log(Level.SEVERE, null, ex); } return new LinkedList<>(); }
From source file:fr.ericlab.sondy.algo.eventdetection.MABED.java
License:Open Source License
MABEDTopic getRefinedTopic(MABEDTopic simpleTopic, int nbrelatedTerms) { MABEDTopic refinedTopic = new MABEDTopic(); String[] frequentTerms = new String[nbrelatedTerms]; try {// w w w . j a v a 2s . c o m StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_36); RAMDirectory temporaryIndex = new RAMDirectory(); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_36, analyzer); IndexWriter temporaryWriter = new IndexWriter(temporaryIndex, config); Document doc = new Document(); doc.add(new Field("content", dbAccess.getMessagesAsString(appVariables, simpleTopic.mainTerm, simpleTopic.I.timeSliceA, simpleTopic.I.timeSliceB), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES)); temporaryWriter.addDocument(doc); temporaryWriter.commit(); IndexReader temporaryReader = IndexReader.open(temporaryWriter, true); TermEnum allTerms = temporaryReader.terms(); int minFreq = 0; TermInfoList termList = new TermInfoList(); while (allTerms.next()) { String term = allTerms.term().text(); if (!term.equals(simpleTopic.mainTerm) && term.length() > 1 && !appVariables.isStopWord(term)) { int cf = IndexAccess.getTermOccurenceCount(temporaryReader, term); if (cf > minFreq) { termList.addTermInfo(new TermInfo(term, (int) cf)); termList.sortList(); if (termList.size() > nbrelatedTerms) { termList.removeLast(); } minFreq = termList.get(termList.size() - 1).occurence; } } } for (int i = 0; i < termList.size() && i < nbrelatedTerms; i++) { frequentTerms[i] = termList.get(i).text; } temporaryWriter.close(); temporaryReader.close(); temporaryIndex.close(); float ref[] = indexAccess.getTermFrequency(appVariables, simpleTopic.mainTerm); float comp[]; refinedTopic = new MABEDTopic(simpleTopic.mainTerm, simpleTopic.I, simpleTopic.score, simpleTopic.anomaly); for (int j = 0; j < nbrelatedTerms && frequentTerms[j] != null; j++) { comp = indexAccess.getTermFrequency(appVariables, frequentTerms[j]); double w = getErdemCoefficient(ref, comp, simpleTopic.I.timeSliceA, simpleTopic.I.timeSliceB); if (w >= _THETA_) { refinedTopic.relatedTerms.add(new MABEDWeightedTerm(frequentTerms[j], w)); } } } catch (IOException ex) { Logger.getLogger(MABED.class.getName()).log(Level.SEVERE, null, ex); } return refinedTopic; }
From source file:fr.ericlab.sondy.core.DataManipulation.java
License:Open Source License
public String[] getFrequentCoocurringTerms(String document, int numTerms, String baseTerm, AppVariables appVariables) {/* w w w . jav a2 s . c o m*/ String[] frequentTerms = new String[numTerms]; try { StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_36); RAMDirectory index = new RAMDirectory(); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_36, analyzer); IndexWriter w = new IndexWriter(index, config); Document doc = new Document(); doc.add(new Field("content", document, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES)); w.addDocument(doc); w.commit(); IndexReader r = IndexReader.open(w, true); TermEnum allTerms = r.terms(); int minFreq = 0; TermInfoList termList = new TermInfoList(); StopWords stopWords = appVariables.currentStopWords; HashSet<String> stopWordsSet = stopWords.getSet(); stopWords.add(baseTerm); while (allTerms.next()) { String term = allTerms.term().text(); if (term.length() > 1 && !stopWordsSet.contains(term)) { float cf = getTermOccurenceCount(r, term); if (cf > minFreq) { termList.addTermInfo(new TermInfo(term, (int) cf)); termList.sortList(); if (termList.size() > numTerms) { termList.removeLast(); } minFreq = termList.get(termList.size() - 1).occurence; } } } for (int i = 0; i < termList.size(); i++) { frequentTerms[i] = termList.get(i).text; } w.close(); r.close(); index.close(); } catch (Exception ex) { Logger.getLogger(DataManipulation.class.getName()).log(Level.SEVERE, null, ex); } return frequentTerms; }
From source file:fr.ericlab.sondy.core.DataManipulation.java
License:Open Source License
public String[] getFrequentCoocurringTermsFromFile(int numTerms, String baseTerm, AppVariables appVariables) { String[] frequentTerms = new String[numTerms]; try {/*from w ww.j a v a 2s. c o m*/ BufferedReader input = new BufferedReader(new FileReader("tmp.msg")); StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_36); RAMDirectory index = new RAMDirectory(); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_36, analyzer); IndexWriter w = new IndexWriter(index, config); String line = ""; String document = ""; int count = 0; while ((line = input.readLine()) != null) { count++; document += line; if (count == 2000) { Document doc = new Document(); doc.add(new Field("content", document, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES)); w.addDocument(doc); w.commit(); count = 0; document = ""; } } Document doc = new Document(); doc.add(new Field("content", document, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES)); w.addDocument(doc); w.commit(); input.close(); IndexReader r = IndexReader.open(w, true); TermEnum allTerms = r.terms(); int minFreq = 0; TermInfoList termList = new TermInfoList(); StopWords stopWords = appVariables.currentStopWords; HashSet<String> stopWordsSet = stopWords.getSet(); stopWords.add(baseTerm); while (allTerms.next()) { String term = allTerms.term().text(); if (term.length() > 1 && !stopWordsSet.contains(term)) { float cf = getTermOccurenceCount(r, term); if (cf > minFreq) { termList.addTermInfo(new TermInfo(term, (int) cf)); termList.sortList(); if (termList.size() > numTerms) { termList.removeLast(); } minFreq = termList.get(termList.size() - 1).occurence; } } } for (int i = 0; i < termList.size(); i++) { frequentTerms[i] = termList.get(i).text; } w.close(); r.close(); index.close(); } catch (Exception ex) { Logger.getLogger(DataManipulation.class.getName()).log(Level.SEVERE, null, ex); } return frequentTerms; }
From source file:fr.univ_tours.etu.searcher.LikeThisTest.java
public void init() throws IOException { analyzer = new StandardAnalyzer(); config = new IndexWriterConfig(analyzer); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); indexDir = new RAMDirectory(); //don't write on disk //indexDir = FSDirectory.open(new File("/Path/to/luceneIndex/")); //write on disk }
From source file:framework.retrieval.engine.index.create.impl.RIndexWriteProvider.java
License:Apache License
/** * ?RIndexWriterWrap/*from w ww . j a v a 2 s.c o m*/ * @return */ public RIndexWriterWrap createRamIndexWriter() { RIndexWriterWrap indexWriterWrap = new RIndexWriterWrap(); RAMDirectory ramDir = new RAMDirectory(); IndexWriter ramWriter = null; try { ramWriter = new IndexWriter(ramDir, analyzerFactory.createIndexAnalyzer(), true, MaxFieldLength.UNLIMITED); } catch (Exception e) { ramDir.close(); throw new RetrievalCreateIndexException(e); } indexWriterWrap.setDirectory(ramDir); indexWriterWrap.setIndexWriter(ramWriter); return indexWriterWrap; }
From source file:gov.nasa.ensemble.core.plan.editor.search.PlanIndexer.java
License:Open Source License
/** * Constructor for PlanIndexer//from ww w . ja v a 2s .c o m */ public PlanIndexer() { booleanAttributes = new Vector<String>(); attributes = new Vector<String>(); dir = new RAMDirectory(); try { writer = new IndexWriter(dir, analyzer, true, new KeepOnlyLastCommitDeletionPolicy(), new MaxFieldLength(IndexWriter.DEFAULT_MAX_FIELD_LENGTH)); } catch (IOException e) { System.out.println("IOException in opening IndexWriter: " + e.getMessage()); } }