Example usage for org.apache.lucene.index IndexReader document

List of usage examples for org.apache.lucene.index IndexReader document

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader document.

Prototype




public final Document document(int docID) throws IOException 

Source Link

Document

Returns the stored fields of the nth Document in this index.

Usage

From source file:it.doqui.index.ecmengine.business.personalization.multirepository.index.lucene.RepositoryAwareAbstractLuceneIndexerImpl.java

License:Open Source License

/**
 * Delete all index entries which do not start with the goven prefix
 *
 * @param prefix/*  w  ww .  j  av a2 s.co  m*/
 */
public void deleteAll(String prefix) {
    IndexReader mainReader = null;
    try {
        //            mainReader = getReader();
        for (int doc = 0; doc < mainReader.maxDoc(); doc++) {
            if (!mainReader.isDeleted(doc)) {
                Document document = mainReader.document(doc);
                String[] ids = document.getValues("ID");
                if ((prefix == null) || nonStartwWith(ids, prefix)) {
                    deletions.add(ids[ids.length - 1]);
                }
            }
        }

    } catch (IOException e) {
        // If anything goes wrong we try and do a roll back
        throw new LuceneIndexException("Failed to delete all entries from the index", e);
    } finally {
        if (mainReader != null) {
            try {
                mainReader.close();
            } catch (IOException e) {
                throw new LuceneIndexException("Filed to close main reader", e);
            }
        }
    }
}

From source file:it.doqui.index.ecmengine.business.personalization.multirepository.index.lucene.RepositoryAwareADMLuceneIndexerImpl.java

License:Open Source License

private void addRootNodesToDeletionList() {
    IndexReader mainReader = null;
    try {/*  w w  w.j a va2  s  .  c  om*/
        //                mainReader = getReader();
        mainReader = service.getReader(getRepoStorePath());
        TermDocs td = mainReader.termDocs(new Term("ISROOT", "T"));
        while (td.next()) {
            int doc = td.doc();
            Document document = mainReader.document(doc);
            String id = document.get("ID");
            NodeRef ref = new NodeRef(id);
            deleteImpl(ref.toString(), false, true, mainReader);
            //              service.delete(ref.toString(), false, true,deletions);
        }
    } catch (IOException e) {
        throw new LuceneIndexException("Failed to delete all primary nodes", e);
    } finally {
        try {
            //service.closeDeltaReader(deltaId, getRepoStorePath());
            if (mainReader != null) {
                //                     mainReader.close();
                service.closeMainReader(getRepoStorePath());
            }

        } catch (IOException e) {
            throw new LuceneIndexException("Filed to close main reader", e);
        }
    }
}

From source file:it.doqui.index.ecmengine.business.personalization.splitting.index.lucene.MultiRepositorySplittingADMIndexerImpl.java

License:Open Source License

private void addRootNodesToDeletionList() {
    IndexReader mainReader = null;
    try {//ww  w . j a  v  a2  s .  c  om
        //                mainReader = getReader();
        mainReader = service.getReader(getRepoStorePath());
        TermDocs td = mainReader.termDocs(new Term("ISROOT", "T"));
        while (td.next()) {
            int doc = td.doc();
            Document document = mainReader.document(doc);
            String id = document.get("ID");
            NodeRef ref = new NodeRef(id);
            deleteImpl(ref.toString(), false, true, mainReader);
            //                    service.delete(ref.toString(), false, true, deletions);
        }
    } catch (IOException e) {
        throw new LuceneIndexException("Failed to delete all primary nodes", e);
    } finally {
        try {
            //service.closeDeltaReader(deltaId, getRepoStorePath());
            if (mainReader != null) {
                //                      mainReader.close();
                service.closeMainReader(getRepoStorePath());
            }

        } catch (IOException e) {
            throw new LuceneIndexException("Filed to close main reader", e);
        }
    }
}

From source file:it.doqui.index.ecmengine.business.personalization.splitting.index.lucene.SplittingADMIndexerImpl.java

License:Open Source License

private void addRootNodesToDeletionList() {
    IndexReader mainReader = null;
    try {//from ww  w.  j  a  v  a 2 s. co  m
        try {
            mainReader = getReader();
            TermDocs td = mainReader.termDocs(new Term("ISROOT", "T"));
            while (td.next()) {
                int doc = td.doc();
                Document document = mainReader.document(doc);
                String id = document.get("ID");
                NodeRef ref = new NodeRef(id);
                deleteImpl(ref.toString(), false, true, mainReader);
            }
        } catch (IOException e) {
            throw new LuceneIndexException("Failed to delete all primary nodes", e);
        }
    } finally {
        if (mainReader != null) {
            try {
                mainReader.close();
            } catch (IOException e) {
                throw new LuceneIndexException("Filed to close main reader", e);
            }
        }
    }
}

From source file:it.drwolf.ridire.index.sketch.SketchDifferenceManager.java

License:Apache License

private void getSketchesFromIndex(IndexReader reader, String lemma, boolean allResults) {
    this.noResults = false;
    BooleanQuery bq = new BooleanQuery();
    TermQuery tqLemma = new TermQuery(new Term("lemma", lemma));
    bq.add(tqLemma, Occur.MUST);//w  w  w.j  ava2s. c  o  m
    if (this.getFunctionalMetadatum() >= 0) {
        FunctionalMetadatum fm = this.entityManager.find(FunctionalMetadatum.class,
                this.getFunctionalMetadatum());
        TermQuery funcQuery = new TermQuery(new Term("functional", fm.getDescription()));
        bq.add(funcQuery, Occur.MUST);
    } else if (this.getSemanticMetadatum() >= 0) {
        SemanticMetadatum sm = this.entityManager.find(SemanticMetadatum.class, this.getSemanticMetadatum());
        TermQuery semQuery = new TermQuery(new Term("semantic", sm.getDescription()));
        bq.add(semQuery, Occur.MUST);
    }
    if (this.getSemanticMetadatum() < 0 && this.getFunctionalMetadatum() < 0) {
        TermQuery allCorporaQuery = new TermQuery(new Term("allcorpora", "yes"));
        bq.add(allCorporaQuery, Occur.MUST);
    }
    IndexSearcher indexSearcher = new IndexSearcher(reader);
    TopDocs results = null;
    try {
        results = indexSearcher.search(bq, Integer.MAX_VALUE);
        if (results != null) {
            if (results.totalHits == 0) {
                this.noResults = true;
            }
            List<String> orderList = SketchDifferenceManager.nounOrderList;
            if (this.getPos().equals("verbo")) {
                orderList = SketchDifferenceManager.verbOrderList;
            } else if (this.getPos().equals("aggettivo")) {
                orderList = SketchDifferenceManager.adjectiveOrderList;
            } else if (this.getPos().equals("avverbio")) {
                orderList = SketchDifferenceManager.adverbOrderList;
            }
            this.sketchTables.clear();
            this.sketchTablesFirst.clear();
            this.sketchTablesSecond.clear();
            this.sketchTablesThird.clear();
            for (String n : orderList) {
                this.sketchTables.add(new SketchTable(n));
            }
            for (int i = 0; i < results.totalHits; i++) {
                Document d = reader.document(results.scoreDocs[i].doc);
                String sketch = d.get("sketch");
                String tabella = d.get("tabella");
                String overallFrequency = d.get("overallfrequency");
                String goodFor = d.get("goodFor");
                if (goodFor != null && !goodFor.equals(this.getPos())) {
                    continue;
                }
                // HACK: change table names
                String sketchName = sketch.trim();
                // if (this.sketch1) {
                // if (sketchName.equals("AofN")) {
                // sketchName = "NofA";
                // } else if (sketchName.equals("NofA")) {
                // sketchName = "AofN";
                // } else if (sketchName.equals("preADV_V")) {
                // sketchName = "postV_ADV";
                // } else if (sketchName.equals("postV_ADV")) {
                // sketchName = "preADV_V";
                // }
                // }
                if (!SketchList.isSketchNameGoodFor(sketchName, this.getPos())) {
                    continue;
                }
                int index = orderList.indexOf(sketchName);
                SketchTable sketchTable = this.sketchTables.get(index);
                sketchTable.setGlobalFrequency(Integer.parseInt(overallFrequency.trim()));
                String[] righe = StringUtils.split(tabella, "\n");
                int maxJ = righe.length;
                if (!allResults) {
                    maxJ = Math.min(20, righe.length);
                }
                List<SketchResultRow> rows = new ArrayList<SketchResultRow>();
                for (int j = 0; j < maxJ; j++) {
                    SketchResultRow sketchResultRow = new SketchResultRow();
                    String[] tokens = StringUtils.split(righe[j], "\t");
                    sketchResultRow.setItem(tokens[0].trim());
                    sketchResultRow.setFrequency(Integer.parseInt(tokens[1].trim()));
                    double score = Double.parseDouble(tokens[2].trim());
                    // do not add rows with logdice < 0
                    if (score < 0.0) {
                        continue;
                    }
                    sketchResultRow.setScore(score);
                    rows.add(sketchResultRow);
                }
                // sorting comes in reverse order
                Collections.sort(rows);
                // take the first 25 results
                sketchTable.getRows().addAll(rows.subList(0, Math.min(25, rows.size())));
            }
        }
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    this.compactResults();
}

From source file:it.drwolf.ridire.index.sketch.SketchRetriever.java

License:Apache License

private void getSketchesFromIndex(IndexReader reader) {
    this.noResults = false;
    BooleanQuery bq = new BooleanQuery();
    TermQuery tqLemma = new TermQuery(new Term("lemma", this.getLemma()));
    bq.add(tqLemma, Occur.MUST);/*from   w ww. jav  a 2 s  . c  o m*/
    if (this.getFunctionalMetadatum() >= 0) {
        FunctionalMetadatum fm = this.entityManager.find(FunctionalMetadatum.class,
                this.getFunctionalMetadatum());
        TermQuery funcQuery = new TermQuery(new Term("functional", fm.getDescription()));
        bq.add(funcQuery, Occur.MUST);
    } else if (this.getSemanticMetadatum() >= 0) {
        SemanticMetadatum sm = this.entityManager.find(SemanticMetadatum.class, this.getSemanticMetadatum());
        TermQuery semQuery = new TermQuery(new Term("semantic", sm.getDescription()));
        bq.add(semQuery, Occur.MUST);
    }
    if (this.getSemanticMetadatum() < 0 && this.getFunctionalMetadatum() < 0) {
        TermQuery allCorporaQuery = new TermQuery(new Term("allcorpora", "yes"));
        bq.add(allCorporaQuery, Occur.MUST);
    }
    if (!this.getSketchToExtract().equals("Tutti")) {
        if (this.getSketchToExtract().startsWith("pp_")) {
            PrefixQuery prefixQuery = new PrefixQuery(new Term("sketch", "pp_"));
            bq.add(prefixQuery, Occur.MUST);
        } else {
            TermQuery sq = new TermQuery(new Term("sketch", this.getSketchToExtract()));
            bq.add(sq, Occur.MUST);
        }
    }
    IndexSearcher indexSearcher = new IndexSearcher(reader);
    TopDocs results = null;
    try {
        results = indexSearcher.search(bq, Integer.MAX_VALUE);
        if (results != null) {
            if (results.totalHits == 0) {
                this.noResults = true;
            }
            List<String> orderList = SketchRetriever.nounOrderList;
            if (this.getPos().equals("verbo")) {
                orderList = SketchRetriever.verbOrderList;
            } else if (this.getPos().equals("aggettivo")) {
                orderList = SketchRetriever.adjectiveOrderList;
            } else if (this.getPos().equals("avverbio")) {
                orderList = SketchRetriever.adverbOrderList;
            }
            this.sketchTables.clear();
            this.sketchTablesFirst.clear();
            this.sketchTablesSecond.clear();
            this.sketchTablesThird.clear();
            for (String n : orderList) {
                this.sketchTables.add(new SketchTable(n));
            }
            for (int i = 0; i < results.totalHits; i++) {
                Document d = reader.document(results.scoreDocs[i].doc);
                String sketch = d.get("sketch");
                String tabella = d.get("tabella");
                String overallFrequency = d.get("overallfrequency");
                String goodFor = d.get("goodFor");
                if (goodFor != null && !goodFor.equals(this.getPos())) {
                    continue;
                }
                // HACK: change table names
                String sketchName = sketch.trim();
                // if (this.sketch1) {
                // if (sketchName.equals("AofN")) {
                // sketchName = "NofA";
                // } else if (sketchName.equals("NofA")) {
                // sketchName = "AofN";
                // } else if (sketchName.equals("preADV_V")) {
                // sketchName = "postV_ADV";
                // } else if (sketchName.equals("postV_ADV")) {
                // sketchName = "preADV_V";
                // }
                // }
                if (!SketchList.isSketchNameGoodFor(sketchName, this.getPos())) {
                    continue;
                }
                int index = orderList.indexOf(sketchName);
                SketchTable sketchTable = this.sketchTables.get(index);
                sketchTable.setGlobalFrequency(Integer.parseInt(overallFrequency.trim()));
                String[] righe = StringUtils.split(tabella, "\n");
                for (int j = 0; j < Math.min(20, righe.length); j++) {
                    SketchResultRow sketchResultRow = new SketchResultRow();
                    String[] tokens = StringUtils.split(righe[j], "\t");
                    sketchResultRow.setItem(tokens[0].trim());
                    sketchResultRow.setFrequency(Integer.parseInt(tokens[1].trim()));
                    sketchResultRow.setScore(Double.parseDouble(tokens[2].trim()));
                    sketchTable.getRows().add(sketchResultRow);
                }
            }
        }
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    this.compactResults();
}

From source file:it.drwolf.ridire.utility.IndexQuery.java

License:Apache License

public IndexQuery(String[] args) {
    this.createOptions();
    this.parseOptions(args);
    try {//  w  ww.j a  v  a 2  s  .  co  m
        IndexReader indexReader = IndexReader.open(new MMapDirectory(new File(this.dirName)));
        IndexSearcher indexSearcher = new IndexSearcher(indexReader);
        TermQuery tqLemma = new TermQuery(new Term("lemma", this.term));
        TopDocs results = indexSearcher.search(tqLemma, Integer.MAX_VALUE);
        System.out.println("Total results: " + results.totalHits);
        for (int i = 0; i < results.totalHits; i++) {
            Document d = indexReader.document(results.scoreDocs[i].doc);
            String sketch = d.get("sketch");
            System.out.println(sketch);
        }
    } catch (CorruptIndexException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
}

From source file:it.polito.tellmefirst.lucene.IndexesUtil.java

License:Open Source License

public static ArrayList<String> getBagOfConcepts(String uri, String lang) {
    LOG.debug("[getBagOfConcepts] - BEGIN");
    ArrayList<String> result = new ArrayList<String>();
    try {/*ww  w.j  av  a 2s .c o m*/
        String KBPath = (lang.equals("it")) ? TMFVariables.KB_IT : TMFVariables.KB_EN;
        MMapDirectory directory = new MMapDirectory(new File(KBPath));
        IndexReader reader = IndexReader.open(directory, true);
        IndexSearcher is = new IndexSearcher(directory, true);
        Query q = new TermQuery(new Term("URI", uri));
        TopDocs hits = is.search(q, 1);
        is.close();
        if (hits.totalHits != 0) {
            int docId = hits.scoreDocs[0].doc;
            org.apache.lucene.document.Document doc = reader.document(docId);
            String wikilinksMerged = doc.getField("KB").stringValue();
            String[] wikiSplits = wikilinksMerged.split(" ");
            //no prod
            LOG.debug("Bag of concepts for the resource " + uri + ": ");
            for (String s : wikiSplits) {
                result.add(s);
                //no prod
                LOG.debug("* " + s);
            }
        }
        reader.close();
    } catch (Exception e) {
        LOG.error("[getBagOfConcepts] - EXCEPTION: ", e);
    }
    LOG.debug("[getBagOfConcepts] - END");
    return result;
}

From source file:it.polito.tellmefirst.lucene.IndexesUtil.java

License:Open Source License

public static ArrayList<String> getResidualBagOfConcepts(String uri, String lang) {
    LOG.debug("[getResidualBagOfConcepts] - BEGIN");
    ArrayList<String> result = new ArrayList<String>();
    try {//from   www.  j  av a2s  .com
        String residualKBPath = (lang.equals("it")) ? TMFVariables.RESIDUAL_KB_IT : TMFVariables.RESIDUAL_KB_EN;
        MMapDirectory directory = new MMapDirectory(new File(residualKBPath));
        IndexReader reader = IndexReader.open(directory, true);
        IndexSearcher is = new IndexSearcher(directory, true);
        Query q = new TermQuery(new Term("URI", uri));
        TopDocs hits = is.search(q, 1);
        is.close();
        if (hits.totalHits != 0) {
            int docId = hits.scoreDocs[0].doc;
            org.apache.lucene.document.Document doc = reader.document(docId);
            String wikilinksMerged = doc.getField("KB").stringValue();
            String[] wikiSplits = wikilinksMerged.split(" ");
            //no prod
            LOG.debug("Residual bag of concepts for the resource " + uri + ": ");
            for (String s : wikiSplits) {
                result.add(s);
                //no prod
                LOG.debug("* " + s);
            }
        }
        reader.close();
    } catch (Exception e) {
        LOG.error("[getResidualBagOfConcepts] - EXCEPTION: ", e);
    }
    LOG.debug("[getResidualBagOfConcepts] - END");
    return result;
}

From source file:it.polito.tellmefirst.lucene.KBIndexSearcher.java

License:Open Source License

/**
 * Get DBpedia concepts related to a specific URI from the Lucene Index. These DBpedia concepts appear as wikilink
 * more than once in the Wikipedia page identified by the URI.
 *
 * @param uri Input URI./*from  w  ww  .jav a 2  s.  c  o m*/
 *
 * In the previous versions of TellMeFirst, the getBagOfConcepts method take as input the
 * URI of a DBpedia resource (String) and the language parameter (String). We have decide to
 * modify the API in order to separate this module from the core of TellMeFirst.
 *
 * @since 3.0.0.0.
 */
public List<String> getBagOfConcepts(String uri) throws Exception {
    LOG.debug("[getBagOfConcepts]  BEGIN");
    List<String> result = new ArrayList<String>();

    try {
        MMapDirectory directory = new MMapDirectory(new File(kb));
        IndexReader reader = IndexReader.open(directory, true);
        IndexSearcher is = new IndexSearcher(directory, true);
        Query q = new TermQuery(new Term("URI", uri));
        TopDocs hits = is.search(q, 1);
        is.close();
        if (hits.totalHits != 0) {
            int docId = hits.scoreDocs[0].doc;
            org.apache.lucene.document.Document doc = reader.document(docId);
            String wikilinksMerged = doc.getField("KB").stringValue();
            String[] wikiSplits = wikilinksMerged.split(" ");
            LOG.debug("Bag of concepts for the resource " + uri + ": ");
            for (String s : wikiSplits) {
                result.add(s);
                LOG.debug("* " + s);
            }
        }
        reader.close();
    } catch (Exception e) {
        LOG.error("[getBagOfConcepts]  EXCEPTION: ", e);
        throw new Exception(e);
    }
    LOG.debug("[getBagOfConcepts]  END");
    return result;
}