Example usage for org.apache.lucene.store RAMDirectory RAMDirectory

Introduction

In this page you can find the example usage for org.apache.lucene.store RAMDirectory RAMDirectory.

Prototype

public RAMDirectory()

Source Link

Document

Constructs an empty Directory .

Usage

From source file:eu.planets_project.ifr.core.techreg.formats.FormatRegistryIndexer.java

License:Open Source License

/**
 * no arg constructor, initialisation/*from  ww  w .  j ava 2 s  .  c  om*/
 */
FormatRegistryIndexer() {
    // Init the index:
    // Store the index in memory:
    directory = new RAMDirectory();
    // To store an index on disk, use this instead:
    //Directory directory = FSDirectory.getDirectory("/tmp/testindex");
}

From source file:examples.JenaTextExample1.java

License:Apache License

public static Dataset createCode() {
    log.info("Construct an in-memory dataset with in-memory lucene index using code");
    // Build a text dataset by code.
    // Here , in-memory base data and in-memeory Lucene index

    // Base data//from w  w w  .  j a v a 2s.co m
    Dataset ds1 = DatasetFactory.create();

    // Define the index mapping 
    EntityDefinition entDef = new EntityDefinition("uri", "text");
    entDef.setPrimaryPredicate(RDFS.label.asNode());

    // Lucene, in memory.
    Directory dir = new RAMDirectory();

    // Join together into a dataset
    Dataset ds = TextDatasetFactory.createLucene(ds1, dir, new TextIndexConfig(entDef));

    return ds;
}

From source file:fi.passiba.services.bibledata.sword.index.lucene.LuceneIndex.java

License:Open Source License

/**
 * Generate an index to use, telling the job about progress as you go.
 * @throws BookException If we fail to read the index files
 *///from  w  w w  .  j a  v a  2 s .  c  om
public LuceneIndex(Book book, URI storage, boolean create) throws BookException {
    assert create;

    this.book = book;
    File finalPath = null;
    try {
        finalPath = NetUtil.getAsFile(storage);
        this.path = finalPath.getCanonicalPath();
    } catch (IOException ex) {
        throw new BookException(UserMsg.LUCENE_INIT, ex);
    }
    System.out.println("index path " + finalPath.getAbsolutePath());
    // Indexing the book is a good way to police data errors.
    DataPolice.setBook(book.getBookMetaData());

    IndexStatus finalStatus = IndexStatus.UNDONE;

    Analyzer analyzer = new LuceneAnalyzer(book);

    List errors = new ArrayList();
    File tempPath = new File(path + '.' + IndexStatus.CREATING.toString());

    try {
        synchronized (CREATING) {

            book.setIndexStatus(IndexStatus.CREATING);

            // An index is created by opening an IndexWriter with the create argument set to true.
            //IndexWriter writer = new IndexWriter(tempPath.getCanonicalPath(), analyzer, true);

            // Create the index in core.
            RAMDirectory ramDir = new RAMDirectory();
            IndexWriter writer = new IndexWriter(ramDir, analyzer, true);

            generateSearchIndexImpl(errors, writer, book.getGlobalKeyList(), 0);

            // Consolidate the index into the minimum number of files.
            // writer.optimize(); /* Optimize is done by addIndexes */
            writer.close();

            // Write the core index to disk.
            IndexWriter fsWriter = new IndexWriter(tempPath.getCanonicalPath(), analyzer, true);
            fsWriter.addIndexes(new Directory[] { ramDir });
            fsWriter.close();

            // Free up the space used by the ram directory
            ramDir.close();

            tempPath.renameTo(finalPath);

            if (finalPath.exists()) {
                finalStatus = IndexStatus.DONE;
            }

            if (errors.size() > 0) {
                StringBuffer buf = new StringBuffer();
                Iterator iter = errors.iterator();
                while (iter.hasNext()) {
                    buf.append(iter.next());
                    buf.append('\n');
                }
                Reporter.informUser(this, UserMsg.BAD_VERSE, buf);
            }

        }
    } catch (IOException ex) {

        throw new BookException(UserMsg.LUCENE_INIT, ex);
    } finally {
        book.setIndexStatus(finalStatus);

    }
}

From source file:fr.ericlab.sondy.algo.eventdetection.ET.java

License:Open Source License

public static LinkedList<String> getFrequentBigrams(String tweets, HashSet<String> bigrams) {
    try {//from   ww  w  .jav  a 2s. co m
        LinkedList<String> FCB = new LinkedList<String>();
        WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer(Version.LUCENE_36);
        RAMDirectory temporaryIndex = new RAMDirectory();
        IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_36, analyzer);
        IndexWriter temporaryWriter = new IndexWriter(temporaryIndex, config);
        Document doc = new Document();
        doc.add(new Field("content", tweets, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES));
        temporaryWriter.addDocument(doc);
        temporaryWriter.commit();
        IndexReader temporaryReader = IndexReader.open(temporaryWriter, true);
        TermEnum allTerms = temporaryReader.terms();
        while (allTerms.next()) {
            String term = allTerms.term().text();
            if (bigrams.contains(term)) {
                FCB.add(term);
            }
        }
        temporaryWriter.close();
        temporaryReader.close();
        temporaryIndex.close();
        return FCB;
    } catch (LockObtainFailedException ex) {
        Logger.getLogger(ET.class.getName()).log(Level.SEVERE, null, ex);
    } catch (IOException ex) {
        Logger.getLogger(ET.class.getName()).log(Level.SEVERE, null, ex);
    }
    return new LinkedList<>();
}

From source file:fr.ericlab.sondy.algo.eventdetection.MABED.java

License:Open Source License

MABEDTopic getRefinedTopic(MABEDTopic simpleTopic, int nbrelatedTerms) {
    MABEDTopic refinedTopic = new MABEDTopic();
    String[] frequentTerms = new String[nbrelatedTerms];
    try {//  w  w  w . j  a  v a  2s  . c  o m
        StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
        RAMDirectory temporaryIndex = new RAMDirectory();
        IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_36, analyzer);
        IndexWriter temporaryWriter = new IndexWriter(temporaryIndex, config);
        Document doc = new Document();
        doc.add(new Field("content",
                dbAccess.getMessagesAsString(appVariables, simpleTopic.mainTerm, simpleTopic.I.timeSliceA,
                        simpleTopic.I.timeSliceB),
                Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES));
        temporaryWriter.addDocument(doc);
        temporaryWriter.commit();
        IndexReader temporaryReader = IndexReader.open(temporaryWriter, true);
        TermEnum allTerms = temporaryReader.terms();
        int minFreq = 0;
        TermInfoList termList = new TermInfoList();
        while (allTerms.next()) {
            String term = allTerms.term().text();
            if (!term.equals(simpleTopic.mainTerm) && term.length() > 1 && !appVariables.isStopWord(term)) {
                int cf = IndexAccess.getTermOccurenceCount(temporaryReader, term);
                if (cf > minFreq) {
                    termList.addTermInfo(new TermInfo(term, (int) cf));
                    termList.sortList();
                    if (termList.size() > nbrelatedTerms) {
                        termList.removeLast();
                    }
                    minFreq = termList.get(termList.size() - 1).occurence;
                }
            }
        }
        for (int i = 0; i < termList.size() && i < nbrelatedTerms; i++) {
            frequentTerms[i] = termList.get(i).text;
        }
        temporaryWriter.close();
        temporaryReader.close();
        temporaryIndex.close();

        float ref[] = indexAccess.getTermFrequency(appVariables, simpleTopic.mainTerm);
        float comp[];
        refinedTopic = new MABEDTopic(simpleTopic.mainTerm, simpleTopic.I, simpleTopic.score,
                simpleTopic.anomaly);
        for (int j = 0; j < nbrelatedTerms && frequentTerms[j] != null; j++) {
            comp = indexAccess.getTermFrequency(appVariables, frequentTerms[j]);
            double w = getErdemCoefficient(ref, comp, simpleTopic.I.timeSliceA, simpleTopic.I.timeSliceB);
            if (w >= _THETA_) {
                refinedTopic.relatedTerms.add(new MABEDWeightedTerm(frequentTerms[j], w));
            }
        }
    } catch (IOException ex) {
        Logger.getLogger(MABED.class.getName()).log(Level.SEVERE, null, ex);
    }
    return refinedTopic;
}

From source file:fr.ericlab.sondy.core.DataManipulation.java

License:Open Source License

public String[] getFrequentCoocurringTerms(String document, int numTerms, String baseTerm,
        AppVariables appVariables) {/*  w  w  w  .  jav a2  s . c  o m*/
    String[] frequentTerms = new String[numTerms];
    try {
        StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
        RAMDirectory index = new RAMDirectory();
        IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_36, analyzer);
        IndexWriter w = new IndexWriter(index, config);
        Document doc = new Document();
        doc.add(new Field("content", document, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES));
        w.addDocument(doc);
        w.commit();
        IndexReader r = IndexReader.open(w, true);
        TermEnum allTerms = r.terms();
        int minFreq = 0;
        TermInfoList termList = new TermInfoList();
        StopWords stopWords = appVariables.currentStopWords;
        HashSet<String> stopWordsSet = stopWords.getSet();
        stopWords.add(baseTerm);
        while (allTerms.next()) {
            String term = allTerms.term().text();
            if (term.length() > 1 && !stopWordsSet.contains(term)) {
                float cf = getTermOccurenceCount(r, term);
                if (cf > minFreq) {
                    termList.addTermInfo(new TermInfo(term, (int) cf));
                    termList.sortList();
                    if (termList.size() > numTerms) {
                        termList.removeLast();
                    }
                    minFreq = termList.get(termList.size() - 1).occurence;
                }
            }
        }
        for (int i = 0; i < termList.size(); i++) {
            frequentTerms[i] = termList.get(i).text;
        }
        w.close();
        r.close();
        index.close();
    } catch (Exception ex) {
        Logger.getLogger(DataManipulation.class.getName()).log(Level.SEVERE, null, ex);
    }
    return frequentTerms;
}

From source file:fr.ericlab.sondy.core.DataManipulation.java

License:Open Source License

public String[] getFrequentCoocurringTermsFromFile(int numTerms, String baseTerm, AppVariables appVariables) {
    String[] frequentTerms = new String[numTerms];
    try {/*from w  ww.j  a v a 2s.  c  o  m*/
        BufferedReader input = new BufferedReader(new FileReader("tmp.msg"));
        StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
        RAMDirectory index = new RAMDirectory();
        IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_36, analyzer);
        IndexWriter w = new IndexWriter(index, config);
        String line = "";
        String document = "";
        int count = 0;
        while ((line = input.readLine()) != null) {
            count++;
            document += line;
            if (count == 2000) {
                Document doc = new Document();
                doc.add(new Field("content", document, Field.Store.NO, Field.Index.ANALYZED,
                        Field.TermVector.YES));
                w.addDocument(doc);
                w.commit();
                count = 0;
                document = "";
            }
        }
        Document doc = new Document();
        doc.add(new Field("content", document, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES));
        w.addDocument(doc);
        w.commit();
        input.close();
        IndexReader r = IndexReader.open(w, true);
        TermEnum allTerms = r.terms();
        int minFreq = 0;
        TermInfoList termList = new TermInfoList();
        StopWords stopWords = appVariables.currentStopWords;
        HashSet<String> stopWordsSet = stopWords.getSet();
        stopWords.add(baseTerm);
        while (allTerms.next()) {
            String term = allTerms.term().text();
            if (term.length() > 1 && !stopWordsSet.contains(term)) {
                float cf = getTermOccurenceCount(r, term);
                if (cf > minFreq) {
                    termList.addTermInfo(new TermInfo(term, (int) cf));
                    termList.sortList();
                    if (termList.size() > numTerms) {
                        termList.removeLast();
                    }
                    minFreq = termList.get(termList.size() - 1).occurence;
                }
            }
        }
        for (int i = 0; i < termList.size(); i++) {
            frequentTerms[i] = termList.get(i).text;
        }
        w.close();
        r.close();
        index.close();
    } catch (Exception ex) {
        Logger.getLogger(DataManipulation.class.getName()).log(Level.SEVERE, null, ex);
    }
    return frequentTerms;
}

From source file:fr.univ_tours.etu.searcher.LikeThisTest.java

public void init() throws IOException {
    analyzer = new StandardAnalyzer();
    config = new IndexWriterConfig(analyzer);
    config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);

    indexDir = new RAMDirectory(); //don't write on disk
    //indexDir = FSDirectory.open(new File("/Path/to/luceneIndex/")); //write on disk
}

From source file:framework.retrieval.engine.index.create.impl.RIndexWriteProvider.java

License:Apache License

/**
 * ?RIndexWriterWrap/*from   w  ww  . j  a v  a 2  s.c  o m*/
 * @return
 */
public RIndexWriterWrap createRamIndexWriter() {

    RIndexWriterWrap indexWriterWrap = new RIndexWriterWrap();

    RAMDirectory ramDir = new RAMDirectory();

    IndexWriter ramWriter = null;
    try {
        ramWriter = new IndexWriter(ramDir, analyzerFactory.createIndexAnalyzer(), true,
                MaxFieldLength.UNLIMITED);
    } catch (Exception e) {
        ramDir.close();
        throw new RetrievalCreateIndexException(e);
    }

    indexWriterWrap.setDirectory(ramDir);
    indexWriterWrap.setIndexWriter(ramWriter);

    return indexWriterWrap;
}

From source file:gov.nasa.ensemble.core.plan.editor.search.PlanIndexer.java

License:Open Source License

/**
 * Constructor for PlanIndexer//from  ww w  .  ja  v  a  2s .c  o m
 */
public PlanIndexer() {
    booleanAttributes = new Vector<String>();
    attributes = new Vector<String>();
    dir = new RAMDirectory();

    try {
        writer = new IndexWriter(dir, analyzer, true, new KeepOnlyLastCommitDeletionPolicy(),
                new MaxFieldLength(IndexWriter.DEFAULT_MAX_FIELD_LENGTH));
    } catch (IOException e) {
        System.out.println("IOException in opening IndexWriter: " + e.getMessage());
    }
}