Example usage for org.apache.lucene.store RAMDirectory RAMDirectory

List of usage examples for org.apache.lucene.store RAMDirectory RAMDirectory

Introduction

In this page you can find the example usage for org.apache.lucene.store RAMDirectory RAMDirectory.

Prototype

public RAMDirectory() 

Source Link

Document

Constructs an empty Directory .

Usage

From source file:dk.dbc.opensearch.fedora.search.WriteAheadLogTest.java

License:Open Source License

@Before
public void setUp() throws IOException {
    TieredMergePolicy tieredMergePolicy = new TieredMergePolicy();
    IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_41, new SimpleAnalyzer(Version.LUCENE_41))
            .setWriteLockTimeout(1000L).setMergePolicy(tieredMergePolicy);

    writer = new IndexWriter(new RAMDirectory(), conf);
}

From source file:dk.dma.msinm.lucene.SpatialLuceneTest.java

License:Open Source License

@Test
public void testSpatialSearch() throws IOException, ParseException {

    int maxLevels = 11;//results in sub-meter precision for geohash
    SpatialPrefixTree grid = new GeohashPrefixTree(ctx, maxLevels);

    strategy = new RecursivePrefixTreeStrategy(grid, "myGeoField");
    Directory directory = new RAMDirectory();

    IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_47, null);
    IndexWriter indexWriter = new IndexWriter(directory, iwConfig);
    indexWriter.addDocument(newSampleDocument(2, ctx.makePoint(-80.93, 33.77)));
    indexWriter.addDocument(newSampleDocument(4, ctx.readShapeFromWkt("POINT(60.9289094 -50.7693246)")));
    indexWriter.addDocument(newSampleDocument(20, ctx.makePoint(0.1, 0.1), ctx.makePoint(0, 0)));
    indexWriter.addDocument(newSampleDocument(30,
            JtsSpatialContext.GEO.readShapeFromWkt("POLYGON((0 0, -90 0, -90 40, 0 40, 0 0))")));
    indexWriter.close();//from   w w w . j  a  va 2s  . c  o m

    IndexReader indexReader = DirectoryReader.open(directory);
    IndexSearcher indexSearcher = new IndexSearcher(indexReader);
    Sort idSort = new Sort(new SortField("id", SortField.Type.INT));

    // Search 1
    SpatialArgs args = new SpatialArgs(SpatialOperation.Intersects,
            ctx.makeCircle(-80.0, 33.0, DistanceUtils.dist2Degrees(200, DistanceUtils.EARTH_MEAN_RADIUS_KM)));
    TopDocs docs = indexSearcher.search(new MatchAllDocsQuery(), strategy.makeFilter(args), 10, idSort);
    assertDocMatchedIds(indexSearcher, docs, 2, 30);

    // Search 2
    args = new SpatialArgs(SpatialOperation.Intersects,
            JtsSpatialContext.GEO.readShapeFromWkt("POLYGON((-10 10, -20 0, -20 20, -10 20, -10 10))"));
    docs = indexSearcher.search(new MatchAllDocsQuery(), strategy.makeFilter(args), 10, idSort);
    assertDocMatchedIds(indexSearcher, docs, 30);
}

From source file:Dl4j.Doc2VecWithAutoEncoder.java

public static void main(String[] args) throws FileNotFoundException, IOException {

    if (args.length < 1) {
        args = new String[1];
        args[0] = "/home/procheta/NetBeansProjects/Dl4jTest/src/dl4jtest/init.properties";
    }/*from  w  w w. j  av  a 2 s. c o  m*/
    String[] docs = { "The cat sat on the mat", "The dog sat on the mat", "The chicken ate the corn",
            "The corn was sweet", "The milk was sweet", "The dog sat on the mat", "The cat drank the milk",
            "The dog ate the bone" };

    try {
        Properties prop = new Properties();
        prop.load(new FileReader(args[0]));
        LuceneDocFetcher luceneDocFetcher;

        // test loading a simple collection of docs...
        // Create in-memory index
        RAMDirectory ramdir = new RAMDirectory();

        IndexWriterConfig iwcfg = new IndexWriterConfig(new EnglishAnalyzer());
        iwcfg.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
        IndexWriter writer = new IndexWriter(ramdir, iwcfg);
        for (String doc : docs) {
            try {
                Document lDoc = new Document();
                lDoc.add(new Field(LuceneDocFetcher.CONTENET_FIELD_NAME, doc, Field.Store.NO,
                        Field.Index.ANALYZED, Field.TermVector.YES));
                writer.addDocument(lDoc);
            } catch (Exception e) {
            }
        }
        writer.close();
        Path path = Paths.get(prop.getProperty("index"));
        Directory dir = FSDirectory.open(path);

        Doc2VecWithAutoEncoder dva = new Doc2VecWithAutoEncoder();
        System.out.println(prop.getProperty("depth"));
        ArrayList<String> docIds;
        dva.getDocIds(prop.getProperty("qid"), prop.getProperty("qrel"));
        //   docIds = dva.subsample(Integer.parseInt(prop.getProperty("depth")), prop.getProperty("fileList"), prop.getProperty("qid"), prop.getProperty("folderPath"));
        //  dva.saveSampleDocId(docIds, prop.getProperty("sampleOutput"));
        // pass the in-mem index reader to the vectorizer
        //  luceneDocFetcher = new LuceneDocFetcher(dir, dva.docIds);
        luceneDocFetcher = new LuceneDocFetcher(dir, dva.docIds, dva.labels);

        DataSetIterator iter = new BaseDatasetIterator(1, 50, luceneDocFetcher);
        while (iter.hasNext()) {
            DataSet v = iter.next();

            System.out.println(v.getFeatures());
        }

        // test auto-encoding
        final int vocabSize = luceneDocFetcher.getDimension();
        //int seed = Random.nextInt(vocabSize);
        int iterations = 2;
        int listenerFreq = iterations / 5;

        MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
                //.seed(seed)
                .iterations(iterations).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
                .list(2)
                .layer(0,
                        new RBM.Builder().nIn(vocabSize).nOut(5)
                                .lossFunction(LossFunctions.LossFunction.RMSE_XENT).build())
                .layer(1,
                        new RBM.Builder().nIn(5).nOut(10).lossFunction(LossFunctions.LossFunction.RMSE_XENT)
                                .build())
                //.pretrain(true)
                //.backprop(true)

                //.layer(2, new RBM.Builder().nIn(500).nOut(250).lossFunction(LossFunctions.LossFunction.RMSE_XENT).build())
                //.layer(3, new RBM.Builder().nIn(250).nOut(100).lossFunction(LossFunctions.LossFunction.RMSE_XENT).build())
                //.layer(4, new RBM.Builder().nIn(100).nOut(30).lossFunction(LossFunctions.LossFunction.RMSE_XENT).build()) 

                /*
                 //encoding stops
                 .layer(5, new RBM.Builder().nIn(30).nOut(100).lossFunction(LossFunctions.LossFunction.RMSE_XENT).build())    
                        
                 //decoding starts
                 .layer(6, new RBM.Builder().nIn(100).nOut(250).lossFunction(LossFunctions.LossFunction.RMSE_XENT).build())
                 .layer(7, new RBM.Builder().nIn(250).nOut(500).lossFunction(LossFunctions.LossFunction.RMSE_XENT).build())
                 .layer(8, new RBM.Builder().nIn(500).nOut(1000).lossFunction(LossFunctions.LossFunction.RMSE_XENT).build())
                 .layer(9, new OutputLayer.Builder(LossFunctions.LossFunction.RMSE_XENT).nIn(1000).nOut(vocabSize).build())
                 .pretrain(true).backprop(true)
                 */
                .build();

        MultiLayerNetwork model = new MultiLayerNetwork(conf);
        model.init();

        model.setListeners(Arrays.asList((IterationListener) new ScoreIterationListener(listenerFreq)));
        model.fit(iter);

        System.out.println("Output layer: ");
        iter.reset();
        while (iter.hasNext()) {
            DataSet v = iter.next();

            // System.out.println(model.output(v.getFeatures()));
        }
        //++Procheta
        iter.reset();
        dva.saveModel(iter, prop.getProperty("output"), model);//*/
    } catch (Exception ex) {
        ex.printStackTrace();
    }

}

From source file:driver651.Driver651.java

License:Apache License

public static void main(String[] args) throws Exception {
    //@FieldCacheImpl.java
    int threadNo = Integer.parseInt(args[0]);
    // 138 vs 179                 507 (original)  all-threads-one-cache
    // 1295      1779               one-thread-one-cache
    RAMDirectory directory = new RAMDirectory();
    IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true);
    int theInt = Integer.MAX_VALUE;
    for (int j = 0; j < NUM_FIELDS; j++) {
        for (int i = 0; i < NUM_DOCS; i++) {
            Document doc = new Document();
            doc.add(new Field("theField" + j, String.valueOf(theInt--), Field.Store.NO,
                    Field.Index.UN_TOKENIZED));// notice the field "theFieldj"
            writer.addDocument(doc);/* ww  w  .j av a  2s  .  c  o m*/
        }
    }

    writer.close();
    reader = IndexReader.open(directory);

    FieldCacheImpl cache = new FieldCacheImpl();// move it out of the loop, then you get the all-threads-one-cache scenario!

    WorkerThread[] workers = new WorkerThread[threadNo];
    for (int i = 0; i < threadNo; i++) {

        workers[i] = new WorkerThread(cache);
    }
    long start = System.currentTimeMillis();
    for (int i = 0; i < threadNo; i++) {
        workers[i].start();
    }

    for (int i = 0; i < threadNo; i++) {
        workers[i].join();
    }

    long end = System.currentTimeMillis();
    System.out.println("duration: " + (end - start));
}

From source file:ead.editor.model.ModelIndex.java

License:Open Source License

/**
* Configure Lucene indexing//from w w w  . ja v a 2  s  .  co m
*/
public ModelIndex() {
    try {
        searchIndex = new RAMDirectory();
        // use a very simple analyzer; no fancy stopwords, stemming, ...
        searchAnalyzer = new WhitespaceAnalyzer(Version.LUCENE_35);
        IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_35, searchAnalyzer);
        indexWriter = new IndexWriter(searchIndex, config);
    } catch (Exception e) {
        logger.error("Could not initialize search index (?)", e);
        throw new IllegalArgumentException("Could not initialize search index (?)", e);
    }
}

From source file:edu.cmu.lti.oaqa.baseqa.concept.rerank.LuceneInMemoryConceptReranker.java

License:Apache License

@Override
public void process(JCas jcas) throws AnalysisEngineProcessException {
    List<ConceptSearchResult> results = TypeUtil.getRankedConceptSearchResults(jcas);
    // calculate field scores
    Map<String, ConceptSearchResult> uri2result = results.stream().collect(toMap(ConceptSearchResult::getUri,
            Function.identity(), (r1, r2) -> r1.getScore() > r2.getScore() ? r1 : r2));
    List<Document> luceneDocs = results.stream().map(LuceneInMemoryConceptReranker::toLuceneDocument)
            .collect(toList());//from  w w  w  .  j  av  a 2  s.  c om
    RAMDirectory index = new RAMDirectory();
    try (IndexWriter writer = new IndexWriter(index, new IndexWriterConfig(analyzer))) {
        writer.addDocuments(luceneDocs);
    } catch (IOException e) {
        throw new AnalysisEngineProcessException(e);
    }
    AbstractQuery aquery = TypeUtil.getAbstractQueries(jcas).iterator().next();
    String queryString = queryStringConstructor.construct(aquery);
    LOG.info("Query string: {}", queryString);
    Map<String, Float> uri2score = new HashMap<>();
    try (IndexReader reader = DirectoryReader.open(index)) {
        IndexSearcher searcher = new IndexSearcher(reader);
        Query query = parser.parse(queryString);
        ScoreDoc[] scoreDocs = searcher.search(query, hits).scoreDocs;
        for (ScoreDoc scoreDoc : scoreDocs) {
            uri2score.put(searcher.doc(scoreDoc.doc).get("uri"), scoreDoc.score);
        }
    } catch (IOException | ParseException e) {
        throw new AnalysisEngineProcessException(e);
    }
    // calculate score
    for (Map.Entry<String, ConceptSearchResult> entry : uri2result.entrySet()) {
        String uri = entry.getKey();
        ConceptSearchResult result = entry.getValue();
        double score = uri2score.getOrDefault(uri, 0F) * weight + result.getScore();
        result.setScore(score);
    }
    TypeUtil.rankedSearchResultsByScore(results, limit);
    LOG.info("Reranked {} concepts.", uri2score.size());
    if (LOG.isDebugEnabled()) {
        results.stream().sorted(TypeUtil.SEARCH_RESULT_RANK_COMPARATOR).limit(20).map(TypeUtil::toString)
                .forEachOrdered(s -> LOG.debug(" - {}", s));
    }
}

From source file:edu.cmu.lti.oaqa.baseqa.document.rerank.LogRegDocumentReranker.java

License:Apache License

@Override
public void process(JCas jcas) throws AnalysisEngineProcessException {
    /*//from   w  w w .  jav a2s  .  c om
    * ("arthritis"[MeSH Terms] OR "arthritis"[All Fields])
    *  AND common[All Fields] AND ("men"[MeSH Terms] OR "men"[All Fields])) OR ("women"[MeSH Terms] OR "women"[All Fields])
    */
    // calculate field scores
    List<Document> documents = TypeUtil.getRankedDocuments(jcas);
    Map<String, Document> id2doc = documents.stream().collect(toMap(Document::getDocId, Function.identity()));
    List<org.apache.lucene.document.Document> luceneDocs = documents.stream()
            .map(LogRegDocumentReranker::toLuceneDocument).collect(toList());
    RAMDirectory index = new RAMDirectory();
    try (IndexWriter writer = new IndexWriter(index, new IndexWriterConfig(analyzer))) {
        writer.addDocuments(luceneDocs);
    } catch (IOException e) {
        throw new AnalysisEngineProcessException(e);
    }
    AbstractQuery aquery = TypeUtil.getAbstractQueries(jcas).iterator().next();
    String queryString = queryStringConstructor.construct(aquery);
    LOG.info("Search for query: {}", queryString);
    Map<String, Float> id2titleScore = new HashMap<>();
    Map<String, Float> id2textScore = new HashMap<>();
    try (IndexReader reader = DirectoryReader.open(index)) {
        IndexSearcher searcher = new IndexSearcher(reader);
        searcher.setSimilarity(new BM25Similarity());
        Query titleQuery = parser.createBooleanQuery("title", queryString);
        ScoreDoc[] titleScoreDocs = searcher.search(titleQuery, hits).scoreDocs;
        LOG.info(" - Title matches: {}", titleScoreDocs.length);
        for (ScoreDoc titleScoreDoc : titleScoreDocs) {
            id2titleScore.put(searcher.doc(titleScoreDoc.doc).get("id"), titleScoreDoc.score);
        }
        Query textQuery = parser.createBooleanQuery("text", queryString);
        ScoreDoc[] textScoreDocs = searcher.search(textQuery, hits).scoreDocs;
        LOG.info(" - Text matches: {}", textScoreDocs.length);
        for (ScoreDoc textScoreDoc : textScoreDocs) {
            id2textScore.put(searcher.doc(textScoreDoc.doc).get("id"), textScoreDoc.score);
        }
    } catch (IOException e) {
        throw new AnalysisEngineProcessException(e);
    }
    // set score
    for (Map.Entry<String, Document> entry : id2doc.entrySet()) {
        String id = entry.getKey();
        Document doc = entry.getValue();
        doc.setScore(calculateScore(doc.getRank(), id2titleScore.getOrDefault(id, 0f),
                id2textScore.getOrDefault(id, 0f)));
    }
    TypeUtil.rankedSearchResultsByScore(documents, hits);
}

From source file:edu.cmu.lti.oaqa.baseqa.passage.rerank.scorers.LuceneInMemoryPassageScorer.java

License:Apache License

@Override
public void prepare(JCas jcas) throws AnalysisEngineProcessException {
    uri2conf2score = HashBasedTable.create();
    uri2conf2rank = HashBasedTable.create();
    // index/* w w w.  j  a  v a  2 s  .c o  m*/
    List<Passage> passages = TypeUtil.getRankedPassages(jcas);
    RAMDirectory index = new RAMDirectory();
    try (IndexWriter writer = new IndexWriter(index, new IndexWriterConfig(analyzer))) {
        for (Passage passage : passages) {
            Document doc = new Document();
            doc.add(new StringField("uri", TypeUtil.getUriOffsets(passage, ":"), Field.Store.YES));
            doc.add(new TextField("text", passage.getText(), Field.Store.NO));
            writer.addDocument(doc);
        }
        writer.close();
        reader = DirectoryReader.open(index);
        searcher = new IndexSearcher(reader);
    } catch (IOException e) {
        throw new AnalysisEngineProcessException(e);
    }
    // queries
    List<String> tokens = TypeUtil.getOrderedTokens(jcas).stream().map(Token::getCoveredText)
            .map(QueryParser::escape).filter(name -> !name.isEmpty() && !stoplist.contains(name.toLowerCase()))
            .collect(toList());
    Multimap<String, String> ctype2names = HashMultimap.create();
    for (Concept concept : TypeUtil.getConcepts(jcas)) {
        Set<String> ctypes = TypeUtil.getConceptTypes(concept).stream().map(ConceptType::getAbbreviation)
                .collect(toSet());
        String cnames = TypeUtil.getConceptNames(concept).stream()
                .map(LuceneInMemoryPassageScorer::normalizeQuoteName).distinct().collect(joining(" "));
        ctypes.stream().filter(t -> !FORBIDDEN_CTYPES.contains(t))
                .forEach(ctype -> ctype2names.put(ctype, cnames));
    }
    Multimap<String, String> ctypepre2names = HashMultimap.create();
    ctype2names.asMap().entrySet().forEach(e -> ctypepre2names.putAll(e.getKey().split(":")[0], e.getValue()));
    Multimap<String, String> ctype2mentions = HashMultimap.create();
    for (Concept concept : TypeUtil.getConcepts(jcas)) {
        Set<String> ctypes = TypeUtil.getConceptTypes(concept).stream().map(ConceptType::getAbbreviation)
                .collect(toSet());
        String cmentions = TypeUtil.getConceptMentions(concept).stream().map(ConceptMention::getMatchedName)
                .map(LuceneInMemoryPassageScorer::normalizeQuoteName).distinct().collect(joining(" "));
        ctypes.stream().filter(t -> !FORBIDDEN_CTYPES.contains(t))
                .forEach(ctype -> ctype2mentions.put(ctype, cmentions));
    }
    Multimap<String, String> ctypepre2mentions = HashMultimap.create();
    ctypepre2mentions.asMap().entrySet()
            .forEach(e -> ctypepre2mentions.putAll(e.getKey().split(":")[0], e.getValue()));
    LOG.debug("Query strings");
    ExecutorService service = Executors.newCachedThreadPool();
    // execute against all tokens
    service.submit(() -> {
        String concatTokens = String.join(" ", tokens);
        LOG.debug(" - Concatenated tokens: {}", concatTokens);
        search(concatTokens, "tokens_concatenated@all");
    });
    // execute against concatenated concept names
    service.submit(() -> {
        String concatCnames = String.join(" ", ctype2names.values());
        LOG.debug(" - Concatenated concept names: {}", concatCnames);
        search(concatCnames, "cnames_concatenated@all");
    });
    // execute against concatenated concept mentions
    service.submit(() -> {
        String concatCmentions = String.join(" ", ctype2mentions.values());
        LOG.debug(" - Concatenated concept mentions: {}", concatCmentions);
        search(concatCmentions, "cmentions_concatenated@all");
    });
    // execute against concept names for each concept
    service.submit(() -> {
        for (String cnames : ImmutableSet.copyOf(ctype2names.values())) {
            LOG.debug(" - Concatenated concept names: {}", cnames);
            search(cnames, "cnames_individual@all");
        }
    });
    // execute against concept names for each concept type
    service.submit(() -> {
        for (String ctype : ctype2names.keySet()) {
            String concatCnames = String.join(" ", ctype2names.get(ctype));
            LOG.debug(" - Concatenated concept names for {}: {}", ctype, concatCnames);
            search(concatCnames, "cnames@" + ctype + "@all");
        }
    });
    // execute against concept names for each concept type prefix
    service.submit(() -> {
        for (String ctypepre : ctypepre2names.keySet()) {
            String concatCnames = String.join(" ", ctypepre2names.get(ctypepre));
            LOG.debug(" - Concatenated concept names for {}: {}", ctypepre, concatCnames);
            search(concatCnames, "cnames@" + ctypepre + "@all");
        }
    });
    // execute against concept mentions for each concept
    service.submit(() -> {
        for (String cmentions : ImmutableSet.copyOf(ctype2mentions.values())) {
            LOG.debug(" - Concatenated concept mentions: {}", cmentions);
            search(cmentions, "cmentions_individual@all");
        }
    });
    // execute against concept mentions for each concept type
    service.submit(() -> {
        for (String ctype : ctype2mentions.keySet()) {
            String concatCmentions = String.join(" ", ctype2mentions.get(ctype));
            LOG.debug(" - Concatenated concept mentions for {}: {}", ctype, concatCmentions);
            search(concatCmentions, "cmentions@" + ctype + "@all");
        }
    });
    // execute against concept mentions for each concept type prefix
    service.submit(() -> {
        for (String ctypepre : ctypepre2mentions.keySet()) {
            String concatCmentions = String.join(" ", ctypepre2mentions.get(ctypepre));
            LOG.debug(" - Concatenated concept mentions for {}: {}", ctypepre, concatCmentions);
            search(concatCmentions, "cmentions@" + ctypepre + "@all");
        }
    });
    service.shutdown();
    try {
        service.awaitTermination(1, TimeUnit.MINUTES);
    } catch (InterruptedException e) {
        throw new AnalysisEngineProcessException(e);
    }
    confs = uri2conf2score.columnKeySet();
}

From source file:edu.cmu.lti.oaqa.baseqa.passage.retrieval.ImprovedLuceneInMemorySentenceRetrievalExecutor.java

License:Apache License

@Override
public void process(JCas jcas) throws AnalysisEngineProcessException {
    // create lucene documents for all sentences in all sections and delete the duplicate ones
    Map<Integer, Passage> hash2passage = new HashMap<Integer, Passage>();
    for (Passage d : TypeUtil.getRankedPassages(jcas)) {
        for (Passage s : RetrievalUtil.extractSentences(jcas, d, chunker)) {
            if (!hash2passage.containsKey(TypeUtil.hash(s))) {
                hash2passage.put(TypeUtil.hash(s), s);
            }/*from   w w  w  .  jav  a  2s .  c o  m*/
        }
    }
    // remove the documents from pipeline
    TypeUtil.getRankedPassages(jcas).forEach(Passage::removeFromIndexes);
    List<Document> luceneDocs = hash2passage.values().stream().map(RetrievalUtil::createLuceneDocument)
            .collect(toList());
    // create lucene index
    RAMDirectory index = new RAMDirectory();
    try (IndexWriter writer = new IndexWriter(index, new IndexWriterConfig(analyzer))) {
        writer.addDocuments(luceneDocs);
    } catch (IOException e) {
        throw new AnalysisEngineProcessException(e);
    }
    // search in the index
    AbstractQuery aquery = TypeUtil.getAbstractQueries(jcas).stream().findFirst().get();
    Map<Integer, Float> hash2score = new HashMap<>();
    try (IndexReader reader = DirectoryReader.open(index)) {
        IndexSearcher searcher = new IndexSearcher(reader);
        String queryString = queryStringConstructor.construct(aquery).replace("\"", " ").replace("/", " ")
                .replace("[", " ").replace("]", " ");
        LOG.info("Search for query: {}", queryString);

        // construct the query
        Query query = parser.parse(queryString);
        LOG.trace(query.toString());
        searcher.setSimilarity(new BM25Similarity());
        ScoreDoc[] scoreDocs = searcher.search(query, hits).scoreDocs;
        for (ScoreDoc scoreDoc : scoreDocs) {
            float score = scoreDoc.score;
            int hash;
            hash = Integer.parseInt(searcher.doc(scoreDoc.doc).get("hash"));
            hash2score.put(hash, score);
        }
    } catch (IOException | ParseException e) {
        throw new AnalysisEngineProcessException(e);
    }
    LOG.info("The size of Returned Sentences: {}", hash2score.size());
    // add to CAS
    hash2score.entrySet().stream().map(entry -> {
        Passage passage = hash2passage.get(entry.getKey());
        passage.setScore(entry.getValue());
        return passage;
    }).sorted(Comparator.comparing(Passage::getScore).reversed()).forEach(Passage::addToIndexes);

    Collection<Passage> snippets = TypeUtil.getRankedPassages(jcas);

    // rank the snippet and add them to pipeline
    rankSnippets(jcas, calSkip(jcas, hash2passage), calBM25(jcas, hash2passage),
            calAlignment(jcas, hash2passage), calSentenceLength(hash2passage), hash2passage);

}

From source file:edu.cmu.lti.oaqa.baseqa.passage.retrieval.ImprovedLuceneInMemorySentenceRetrievalExecutor.java

License:Apache License

private Map<Integer, Float> calBM25(JCas jcas, Map<Integer, Passage> hash2passage)
        throws AnalysisEngineProcessException {
    // index the documents using lucene
    List<Document> luceneDocs = hash2passage.values().stream().map(RetrievalUtil::createLuceneDocument)
            .collect(toList());/*from   w w w .  ja  va  2 s  . co  m*/
    // create lucene index
    RAMDirectory index = new RAMDirectory();
    try (IndexWriter writer = new IndexWriter(index, new IndexWriterConfig(analyzer))) {
        writer.addDocuments(luceneDocs);
    } catch (IOException e) {
        throw new AnalysisEngineProcessException(e);
    }
    // search in the index
    AbstractQuery aquery = TypeUtil.getAbstractQueries(jcas).stream().findFirst().get();
    Map<Integer, Float> hash2score = new HashMap<>();
    try (IndexReader reader = DirectoryReader.open(index)) {
        IndexSearcher searcher = new IndexSearcher(reader);
        String queryString = queryStringConstructor.construct(aquery).replace("\"", " ").replace("/", " ")
                .replace("[", " ").replace("]", " ");
        LOG.info("Search for query: {}", queryString);

        // construct the query
        Query query = parser.parse(queryString);
        searcher.setSimilarity(new BM25Similarity());
        ScoreDoc[] scoreDocs = searcher.search(query, hits).scoreDocs;
        for (ScoreDoc scoreDoc : scoreDocs) {
            float score = scoreDoc.score;
            int hash;
            hash = Integer.parseInt(searcher.doc(scoreDoc.doc).get("hash"));
            hash2score.put(hash, score);
        }
    } catch (IOException | ParseException e) {
        throw new AnalysisEngineProcessException(e);
    }
    return hash2score;
}