Example usage for org.apache.lucene.store RAMDirectory RAMDirectory

List of usage examples for org.apache.lucene.store RAMDirectory RAMDirectory

Introduction

In this page you can find the example usage for org.apache.lucene.store RAMDirectory RAMDirectory.

Prototype

public RAMDirectory() 

Source Link

Document

Constructs an empty Directory .

Usage

From source file:eu.planets_project.ifr.core.techreg.formats.FormatRegistryIndexer.java

License:Open Source License

/**
 * no arg constructor, initialisation/*from  ww  w .  j ava 2 s  .  c  om*/
 */
FormatRegistryIndexer() {
    // Init the index:
    // Store the index in memory:
    directory = new RAMDirectory();
    // To store an index on disk, use this instead:
    //Directory directory = FSDirectory.getDirectory("/tmp/testindex");
}

From source file:examples.JenaTextExample1.java

License:Apache License

public static Dataset createCode() {
    log.info("Construct an in-memory dataset with in-memory lucene index using code");
    // Build a text dataset by code.
    // Here , in-memory base data and in-memeory Lucene index

    // Base data//from w  w w  .  j a v a 2s.co m
    Dataset ds1 = DatasetFactory.create();

    // Define the index mapping 
    EntityDefinition entDef = new EntityDefinition("uri", "text");
    entDef.setPrimaryPredicate(RDFS.label.asNode());

    // Lucene, in memory.
    Directory dir = new RAMDirectory();

    // Join together into a dataset
    Dataset ds = TextDatasetFactory.createLucene(ds1, dir, new TextIndexConfig(entDef));

    return ds;
}

From source file:fi.passiba.services.bibledata.sword.index.lucene.LuceneIndex.java

License:Open Source License

/**
 * Generate an index to use, telling the job about progress as you go.
 * @throws BookException If we fail to read the index files
 *///from  w  w w  .  j a  v a  2 s .  c  om
public LuceneIndex(Book book, URI storage, boolean create) throws BookException {
    assert create;

    this.book = book;
    File finalPath = null;
    try {
        finalPath = NetUtil.getAsFile(storage);
        this.path = finalPath.getCanonicalPath();
    } catch (IOException ex) {
        throw new BookException(UserMsg.LUCENE_INIT, ex);
    }
    System.out.println("index path " + finalPath.getAbsolutePath());
    // Indexing the book is a good way to police data errors.
    DataPolice.setBook(book.getBookMetaData());

    IndexStatus finalStatus = IndexStatus.UNDONE;

    Analyzer analyzer = new LuceneAnalyzer(book);

    List errors = new ArrayList();
    File tempPath = new File(path + '.' + IndexStatus.CREATING.toString());

    try {
        synchronized (CREATING) {

            book.setIndexStatus(IndexStatus.CREATING);

            // An index is created by opening an IndexWriter with the create argument set to true.
            //IndexWriter writer = new IndexWriter(tempPath.getCanonicalPath(), analyzer, true);

            // Create the index in core.
            RAMDirectory ramDir = new RAMDirectory();
            IndexWriter writer = new IndexWriter(ramDir, analyzer, true);

            generateSearchIndexImpl(errors, writer, book.getGlobalKeyList(), 0);

            // Consolidate the index into the minimum number of files.
            // writer.optimize(); /* Optimize is done by addIndexes */
            writer.close();

            // Write the core index to disk.
            IndexWriter fsWriter = new IndexWriter(tempPath.getCanonicalPath(), analyzer, true);
            fsWriter.addIndexes(new Directory[] { ramDir });
            fsWriter.close();

            // Free up the space used by the ram directory
            ramDir.close();

            tempPath.renameTo(finalPath);

            if (finalPath.exists()) {
                finalStatus = IndexStatus.DONE;
            }

            if (errors.size() > 0) {
                StringBuffer buf = new StringBuffer();
                Iterator iter = errors.iterator();
                while (iter.hasNext()) {
                    buf.append(iter.next());
                    buf.append('\n');
                }
                Reporter.informUser(this, UserMsg.BAD_VERSE, buf);
            }

        }
    } catch (IOException ex) {

        throw new BookException(UserMsg.LUCENE_INIT, ex);
    } finally {
        book.setIndexStatus(finalStatus);

    }
}

From source file:fr.ericlab.sondy.algo.eventdetection.ET.java

License:Open Source License

public static LinkedList<String> getFrequentBigrams(String tweets, HashSet<String> bigrams) {
    try {//from   ww  w  .jav  a 2s. co m
        LinkedList<String> FCB = new LinkedList<String>();
        WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer(Version.LUCENE_36);
        RAMDirectory temporaryIndex = new RAMDirectory();
        IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_36, analyzer);
        IndexWriter temporaryWriter = new IndexWriter(temporaryIndex, config);
        Document doc = new Document();
        doc.add(new Field("content", tweets, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES));
        temporaryWriter.addDocument(doc);
        temporaryWriter.commit();
        IndexReader temporaryReader = IndexReader.open(temporaryWriter, true);
        TermEnum allTerms = temporaryReader.terms();
        while (allTerms.next()) {
            String term = allTerms.term().text();
            if (bigrams.contains(term)) {
                FCB.add(term);
            }
        }
        temporaryWriter.close();
        temporaryReader.close();
        temporaryIndex.close();
        return FCB;
    } catch (LockObtainFailedException ex) {
        Logger.getLogger(ET.class.getName()).log(Level.SEVERE, null, ex);
    } catch (IOException ex) {
        Logger.getLogger(ET.class.getName()).log(Level.SEVERE, null, ex);
    }
    return new LinkedList<>();
}

From source file:fr.ericlab.sondy.algo.eventdetection.MABED.java

License:Open Source License

MABEDTopic getRefinedTopic(MABEDTopic simpleTopic, int nbrelatedTerms) {
    MABEDTopic refinedTopic = new MABEDTopic();
    String[] frequentTerms = new String[nbrelatedTerms];
    try {//  w  w  w . j  a  v a  2s  . c  o m
        StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
        RAMDirectory temporaryIndex = new RAMDirectory();
        IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_36, analyzer);
        IndexWriter temporaryWriter = new IndexWriter(temporaryIndex, config);
        Document doc = new Document();
        doc.add(new Field("content",
                dbAccess.getMessagesAsString(appVariables, simpleTopic.mainTerm, simpleTopic.I.timeSliceA,
                        simpleTopic.I.timeSliceB),
                Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES));
        temporaryWriter.addDocument(doc);
        temporaryWriter.commit();
        IndexReader temporaryReader = IndexReader.open(temporaryWriter, true);
        TermEnum allTerms = temporaryReader.terms();
        int minFreq = 0;
        TermInfoList termList = new TermInfoList();
        while (allTerms.next()) {
            String term = allTerms.term().text();
            if (!term.equals(simpleTopic.mainTerm) && term.length() > 1 && !appVariables.isStopWord(term)) {
                int cf = IndexAccess.getTermOccurenceCount(temporaryReader, term);
                if (cf > minFreq) {
                    termList.addTermInfo(new TermInfo(term, (int) cf));
                    termList.sortList();
                    if (termList.size() > nbrelatedTerms) {
                        termList.removeLast();
                    }
                    minFreq = termList.get(termList.size() - 1).occurence;
                }
            }
        }
        for (int i = 0; i < termList.size() && i < nbrelatedTerms; i++) {
            frequentTerms[i] = termList.get(i).text;
        }
        temporaryWriter.close();
        temporaryReader.close();
        temporaryIndex.close();

        float ref[] = indexAccess.getTermFrequency(appVariables, simpleTopic.mainTerm);
        float comp[];
        refinedTopic = new MABEDTopic(simpleTopic.mainTerm, simpleTopic.I, simpleTopic.score,
                simpleTopic.anomaly);
        for (int j = 0; j < nbrelatedTerms && frequentTerms[j] != null; j++) {
            comp = indexAccess.getTermFrequency(appVariables, frequentTerms[j]);
            double w = getErdemCoefficient(ref, comp, simpleTopic.I.timeSliceA, simpleTopic.I.timeSliceB);
            if (w >= _THETA_) {
                refinedTopic.relatedTerms.add(new MABEDWeightedTerm(frequentTerms[j], w));
            }
        }
    } catch (IOException ex) {
        Logger.getLogger(MABED.class.getName()).log(Level.SEVERE, null, ex);
    }
    return refinedTopic;
}

From source file:fr.ericlab.sondy.core.DataManipulation.java

License:Open Source License

public String[] getFrequentCoocurringTerms(String document, int numTerms, String baseTerm,
        AppVariables appVariables) {/*  w  w  w  .  jav a2  s . c  o m*/
    String[] frequentTerms = new String[numTerms];
    try {
        StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
        RAMDirectory index = new RAMDirectory();
        IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_36, analyzer);
        IndexWriter w = new IndexWriter(index, config);
        Document doc = new Document();
        doc.add(new Field("content", document, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES));
        w.addDocument(doc);
        w.commit();
        IndexReader r = IndexReader.open(w, true);
        TermEnum allTerms = r.terms();
        int minFreq = 0;
        TermInfoList termList = new TermInfoList();
        StopWords stopWords = appVariables.currentStopWords;
        HashSet<String> stopWordsSet = stopWords.getSet();
        stopWords.add(baseTerm);
        while (allTerms.next()) {
            String term = allTerms.term().text();
            if (term.length() > 1 && !stopWordsSet.contains(term)) {
                float cf = getTermOccurenceCount(r, term);
                if (cf > minFreq) {
                    termList.addTermInfo(new TermInfo(term, (int) cf));
                    termList.sortList();
                    if (termList.size() > numTerms) {
                        termList.removeLast();
                    }
                    minFreq = termList.get(termList.size() - 1).occurence;
                }
            }
        }
        for (int i = 0; i < termList.size(); i++) {
            frequentTerms[i] = termList.get(i).text;
        }
        w.close();
        r.close();
        index.close();
    } catch (Exception ex) {
        Logger.getLogger(DataManipulation.class.getName()).log(Level.SEVERE, null, ex);
    }
    return frequentTerms;
}

From source file:fr.ericlab.sondy.core.DataManipulation.java

License:Open Source License

public String[] getFrequentCoocurringTermsFromFile(int numTerms, String baseTerm, AppVariables appVariables) {
    String[] frequentTerms = new String[numTerms];
    try {/*from w  ww.j  a v a 2s.  c  o  m*/
        BufferedReader input = new BufferedReader(new FileReader("tmp.msg"));
        StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
        RAMDirectory index = new RAMDirectory();
        IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_36, analyzer);
        IndexWriter w = new IndexWriter(index, config);
        String line = "";
        String document = "";
        int count = 0;
        while ((line = input.readLine()) != null) {
            count++;
            document += line;
            if (count == 2000) {
                Document doc = new Document();
                doc.add(new Field("content", document, Field.Store.NO, Field.Index.ANALYZED,
                        Field.TermVector.YES));
                w.addDocument(doc);
                w.commit();
                count = 0;
                document = "";
            }
        }
        Document doc = new Document();
        doc.add(new Field("content", document, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES));
        w.addDocument(doc);
        w.commit();
        input.close();
        IndexReader r = IndexReader.open(w, true);
        TermEnum allTerms = r.terms();
        int minFreq = 0;
        TermInfoList termList = new TermInfoList();
        StopWords stopWords = appVariables.currentStopWords;
        HashSet<String> stopWordsSet = stopWords.getSet();
        stopWords.add(baseTerm);
        while (allTerms.next()) {
            String term = allTerms.term().text();
            if (term.length() > 1 && !stopWordsSet.contains(term)) {
                float cf = getTermOccurenceCount(r, term);
                if (cf > minFreq) {
                    termList.addTermInfo(new TermInfo(term, (int) cf));
                    termList.sortList();
                    if (termList.size() > numTerms) {
                        termList.removeLast();
                    }
                    minFreq = termList.get(termList.size() - 1).occurence;
                }
            }
        }
        for (int i = 0; i < termList.size(); i++) {
            frequentTerms[i] = termList.get(i).text;
        }
        w.close();
        r.close();
        index.close();
    } catch (Exception ex) {
        Logger.getLogger(DataManipulation.class.getName()).log(Level.SEVERE, null, ex);
    }
    return frequentTerms;
}

From source file:fr.univ_tours.etu.searcher.LikeThisTest.java

public void init() throws IOException {
    analyzer = new StandardAnalyzer();
    config = new IndexWriterConfig(analyzer);
    config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);

    indexDir = new RAMDirectory(); //don't write on disk
    //indexDir = FSDirectory.open(new File("/Path/to/luceneIndex/")); //write on disk
}

From source file:framework.retrieval.engine.index.create.impl.RIndexWriteProvider.java

License:Apache License

/**
 * ?RIndexWriterWrap/*from   w  ww  . j  a v  a 2  s.c  o m*/
 * @return
 */
public RIndexWriterWrap createRamIndexWriter() {

    RIndexWriterWrap indexWriterWrap = new RIndexWriterWrap();

    RAMDirectory ramDir = new RAMDirectory();

    IndexWriter ramWriter = null;
    try {
        ramWriter = new IndexWriter(ramDir, analyzerFactory.createIndexAnalyzer(), true,
                MaxFieldLength.UNLIMITED);
    } catch (Exception e) {
        ramDir.close();
        throw new RetrievalCreateIndexException(e);
    }

    indexWriterWrap.setDirectory(ramDir);
    indexWriterWrap.setIndexWriter(ramWriter);

    return indexWriterWrap;
}

From source file:gov.nasa.ensemble.core.plan.editor.search.PlanIndexer.java

License:Open Source License

/**
 * Constructor for PlanIndexer//from  ww w  .  ja  v  a  2s .c  o m
 */
public PlanIndexer() {
    booleanAttributes = new Vector<String>();
    attributes = new Vector<String>();
    dir = new RAMDirectory();

    try {
        writer = new IndexWriter(dir, analyzer, true, new KeepOnlyLastCommitDeletionPolicy(),
                new MaxFieldLength(IndexWriter.DEFAULT_MAX_FIELD_LENGTH));
    } catch (IOException e) {
        System.out.println("IOException in opening IndexWriter: " + e.getMessage());
    }
}