List of usage examples for org.apache.lucene.index IndexReader document
public final Document document(int docID) throws IOException
n
th Document
in this index. From source file:it.doqui.index.ecmengine.business.personalization.multirepository.index.lucene.RepositoryAwareAbstractLuceneIndexerImpl.java
License:Open Source License
/** * Delete all index entries which do not start with the goven prefix * * @param prefix/* w ww . j av a2 s.co m*/ */ public void deleteAll(String prefix) { IndexReader mainReader = null; try { // mainReader = getReader(); for (int doc = 0; doc < mainReader.maxDoc(); doc++) { if (!mainReader.isDeleted(doc)) { Document document = mainReader.document(doc); String[] ids = document.getValues("ID"); if ((prefix == null) || nonStartwWith(ids, prefix)) { deletions.add(ids[ids.length - 1]); } } } } catch (IOException e) { // If anything goes wrong we try and do a roll back throw new LuceneIndexException("Failed to delete all entries from the index", e); } finally { if (mainReader != null) { try { mainReader.close(); } catch (IOException e) { throw new LuceneIndexException("Filed to close main reader", e); } } } }
From source file:it.doqui.index.ecmengine.business.personalization.multirepository.index.lucene.RepositoryAwareADMLuceneIndexerImpl.java
License:Open Source License
private void addRootNodesToDeletionList() { IndexReader mainReader = null; try {/* w w w.j a va2 s . c om*/ // mainReader = getReader(); mainReader = service.getReader(getRepoStorePath()); TermDocs td = mainReader.termDocs(new Term("ISROOT", "T")); while (td.next()) { int doc = td.doc(); Document document = mainReader.document(doc); String id = document.get("ID"); NodeRef ref = new NodeRef(id); deleteImpl(ref.toString(), false, true, mainReader); // service.delete(ref.toString(), false, true,deletions); } } catch (IOException e) { throw new LuceneIndexException("Failed to delete all primary nodes", e); } finally { try { //service.closeDeltaReader(deltaId, getRepoStorePath()); if (mainReader != null) { // mainReader.close(); service.closeMainReader(getRepoStorePath()); } } catch (IOException e) { throw new LuceneIndexException("Filed to close main reader", e); } } }
From source file:it.doqui.index.ecmengine.business.personalization.splitting.index.lucene.MultiRepositorySplittingADMIndexerImpl.java
License:Open Source License
private void addRootNodesToDeletionList() { IndexReader mainReader = null; try {//ww w . j a v a2 s . c om // mainReader = getReader(); mainReader = service.getReader(getRepoStorePath()); TermDocs td = mainReader.termDocs(new Term("ISROOT", "T")); while (td.next()) { int doc = td.doc(); Document document = mainReader.document(doc); String id = document.get("ID"); NodeRef ref = new NodeRef(id); deleteImpl(ref.toString(), false, true, mainReader); // service.delete(ref.toString(), false, true, deletions); } } catch (IOException e) { throw new LuceneIndexException("Failed to delete all primary nodes", e); } finally { try { //service.closeDeltaReader(deltaId, getRepoStorePath()); if (mainReader != null) { // mainReader.close(); service.closeMainReader(getRepoStorePath()); } } catch (IOException e) { throw new LuceneIndexException("Filed to close main reader", e); } } }
From source file:it.doqui.index.ecmengine.business.personalization.splitting.index.lucene.SplittingADMIndexerImpl.java
License:Open Source License
private void addRootNodesToDeletionList() { IndexReader mainReader = null; try {//from ww w. j a v a 2 s. co m try { mainReader = getReader(); TermDocs td = mainReader.termDocs(new Term("ISROOT", "T")); while (td.next()) { int doc = td.doc(); Document document = mainReader.document(doc); String id = document.get("ID"); NodeRef ref = new NodeRef(id); deleteImpl(ref.toString(), false, true, mainReader); } } catch (IOException e) { throw new LuceneIndexException("Failed to delete all primary nodes", e); } } finally { if (mainReader != null) { try { mainReader.close(); } catch (IOException e) { throw new LuceneIndexException("Filed to close main reader", e); } } } }
From source file:it.drwolf.ridire.index.sketch.SketchDifferenceManager.java
License:Apache License
private void getSketchesFromIndex(IndexReader reader, String lemma, boolean allResults) { this.noResults = false; BooleanQuery bq = new BooleanQuery(); TermQuery tqLemma = new TermQuery(new Term("lemma", lemma)); bq.add(tqLemma, Occur.MUST);//w w w.j ava2s. c o m if (this.getFunctionalMetadatum() >= 0) { FunctionalMetadatum fm = this.entityManager.find(FunctionalMetadatum.class, this.getFunctionalMetadatum()); TermQuery funcQuery = new TermQuery(new Term("functional", fm.getDescription())); bq.add(funcQuery, Occur.MUST); } else if (this.getSemanticMetadatum() >= 0) { SemanticMetadatum sm = this.entityManager.find(SemanticMetadatum.class, this.getSemanticMetadatum()); TermQuery semQuery = new TermQuery(new Term("semantic", sm.getDescription())); bq.add(semQuery, Occur.MUST); } if (this.getSemanticMetadatum() < 0 && this.getFunctionalMetadatum() < 0) { TermQuery allCorporaQuery = new TermQuery(new Term("allcorpora", "yes")); bq.add(allCorporaQuery, Occur.MUST); } IndexSearcher indexSearcher = new IndexSearcher(reader); TopDocs results = null; try { results = indexSearcher.search(bq, Integer.MAX_VALUE); if (results != null) { if (results.totalHits == 0) { this.noResults = true; } List<String> orderList = SketchDifferenceManager.nounOrderList; if (this.getPos().equals("verbo")) { orderList = SketchDifferenceManager.verbOrderList; } else if (this.getPos().equals("aggettivo")) { orderList = SketchDifferenceManager.adjectiveOrderList; } else if (this.getPos().equals("avverbio")) { orderList = SketchDifferenceManager.adverbOrderList; } this.sketchTables.clear(); this.sketchTablesFirst.clear(); this.sketchTablesSecond.clear(); this.sketchTablesThird.clear(); for (String n : orderList) { this.sketchTables.add(new SketchTable(n)); } for (int i = 0; i < results.totalHits; i++) { Document d = reader.document(results.scoreDocs[i].doc); String sketch = d.get("sketch"); String tabella = d.get("tabella"); String overallFrequency = d.get("overallfrequency"); String goodFor = d.get("goodFor"); if (goodFor != null && !goodFor.equals(this.getPos())) { continue; } // HACK: change table names String sketchName = sketch.trim(); // if (this.sketch1) { // if (sketchName.equals("AofN")) { // sketchName = "NofA"; // } else if (sketchName.equals("NofA")) { // sketchName = "AofN"; // } else if (sketchName.equals("preADV_V")) { // sketchName = "postV_ADV"; // } else if (sketchName.equals("postV_ADV")) { // sketchName = "preADV_V"; // } // } if (!SketchList.isSketchNameGoodFor(sketchName, this.getPos())) { continue; } int index = orderList.indexOf(sketchName); SketchTable sketchTable = this.sketchTables.get(index); sketchTable.setGlobalFrequency(Integer.parseInt(overallFrequency.trim())); String[] righe = StringUtils.split(tabella, "\n"); int maxJ = righe.length; if (!allResults) { maxJ = Math.min(20, righe.length); } List<SketchResultRow> rows = new ArrayList<SketchResultRow>(); for (int j = 0; j < maxJ; j++) { SketchResultRow sketchResultRow = new SketchResultRow(); String[] tokens = StringUtils.split(righe[j], "\t"); sketchResultRow.setItem(tokens[0].trim()); sketchResultRow.setFrequency(Integer.parseInt(tokens[1].trim())); double score = Double.parseDouble(tokens[2].trim()); // do not add rows with logdice < 0 if (score < 0.0) { continue; } sketchResultRow.setScore(score); rows.add(sketchResultRow); } // sorting comes in reverse order Collections.sort(rows); // take the first 25 results sketchTable.getRows().addAll(rows.subList(0, Math.min(25, rows.size()))); } } } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } this.compactResults(); }
From source file:it.drwolf.ridire.index.sketch.SketchRetriever.java
License:Apache License
private void getSketchesFromIndex(IndexReader reader) { this.noResults = false; BooleanQuery bq = new BooleanQuery(); TermQuery tqLemma = new TermQuery(new Term("lemma", this.getLemma())); bq.add(tqLemma, Occur.MUST);/*from w ww. jav a 2 s . c o m*/ if (this.getFunctionalMetadatum() >= 0) { FunctionalMetadatum fm = this.entityManager.find(FunctionalMetadatum.class, this.getFunctionalMetadatum()); TermQuery funcQuery = new TermQuery(new Term("functional", fm.getDescription())); bq.add(funcQuery, Occur.MUST); } else if (this.getSemanticMetadatum() >= 0) { SemanticMetadatum sm = this.entityManager.find(SemanticMetadatum.class, this.getSemanticMetadatum()); TermQuery semQuery = new TermQuery(new Term("semantic", sm.getDescription())); bq.add(semQuery, Occur.MUST); } if (this.getSemanticMetadatum() < 0 && this.getFunctionalMetadatum() < 0) { TermQuery allCorporaQuery = new TermQuery(new Term("allcorpora", "yes")); bq.add(allCorporaQuery, Occur.MUST); } if (!this.getSketchToExtract().equals("Tutti")) { if (this.getSketchToExtract().startsWith("pp_")) { PrefixQuery prefixQuery = new PrefixQuery(new Term("sketch", "pp_")); bq.add(prefixQuery, Occur.MUST); } else { TermQuery sq = new TermQuery(new Term("sketch", this.getSketchToExtract())); bq.add(sq, Occur.MUST); } } IndexSearcher indexSearcher = new IndexSearcher(reader); TopDocs results = null; try { results = indexSearcher.search(bq, Integer.MAX_VALUE); if (results != null) { if (results.totalHits == 0) { this.noResults = true; } List<String> orderList = SketchRetriever.nounOrderList; if (this.getPos().equals("verbo")) { orderList = SketchRetriever.verbOrderList; } else if (this.getPos().equals("aggettivo")) { orderList = SketchRetriever.adjectiveOrderList; } else if (this.getPos().equals("avverbio")) { orderList = SketchRetriever.adverbOrderList; } this.sketchTables.clear(); this.sketchTablesFirst.clear(); this.sketchTablesSecond.clear(); this.sketchTablesThird.clear(); for (String n : orderList) { this.sketchTables.add(new SketchTable(n)); } for (int i = 0; i < results.totalHits; i++) { Document d = reader.document(results.scoreDocs[i].doc); String sketch = d.get("sketch"); String tabella = d.get("tabella"); String overallFrequency = d.get("overallfrequency"); String goodFor = d.get("goodFor"); if (goodFor != null && !goodFor.equals(this.getPos())) { continue; } // HACK: change table names String sketchName = sketch.trim(); // if (this.sketch1) { // if (sketchName.equals("AofN")) { // sketchName = "NofA"; // } else if (sketchName.equals("NofA")) { // sketchName = "AofN"; // } else if (sketchName.equals("preADV_V")) { // sketchName = "postV_ADV"; // } else if (sketchName.equals("postV_ADV")) { // sketchName = "preADV_V"; // } // } if (!SketchList.isSketchNameGoodFor(sketchName, this.getPos())) { continue; } int index = orderList.indexOf(sketchName); SketchTable sketchTable = this.sketchTables.get(index); sketchTable.setGlobalFrequency(Integer.parseInt(overallFrequency.trim())); String[] righe = StringUtils.split(tabella, "\n"); for (int j = 0; j < Math.min(20, righe.length); j++) { SketchResultRow sketchResultRow = new SketchResultRow(); String[] tokens = StringUtils.split(righe[j], "\t"); sketchResultRow.setItem(tokens[0].trim()); sketchResultRow.setFrequency(Integer.parseInt(tokens[1].trim())); sketchResultRow.setScore(Double.parseDouble(tokens[2].trim())); sketchTable.getRows().add(sketchResultRow); } } } } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } this.compactResults(); }
From source file:it.drwolf.ridire.utility.IndexQuery.java
License:Apache License
public IndexQuery(String[] args) { this.createOptions(); this.parseOptions(args); try {// w ww.j a v a 2 s . co m IndexReader indexReader = IndexReader.open(new MMapDirectory(new File(this.dirName))); IndexSearcher indexSearcher = new IndexSearcher(indexReader); TermQuery tqLemma = new TermQuery(new Term("lemma", this.term)); TopDocs results = indexSearcher.search(tqLemma, Integer.MAX_VALUE); System.out.println("Total results: " + results.totalHits); for (int i = 0; i < results.totalHits; i++) { Document d = indexReader.document(results.scoreDocs[i].doc); String sketch = d.get("sketch"); System.out.println(sketch); } } catch (CorruptIndexException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } }
From source file:it.polito.tellmefirst.lucene.IndexesUtil.java
License:Open Source License
public static ArrayList<String> getBagOfConcepts(String uri, String lang) { LOG.debug("[getBagOfConcepts] - BEGIN"); ArrayList<String> result = new ArrayList<String>(); try {/*ww w.j av a 2s .c o m*/ String KBPath = (lang.equals("it")) ? TMFVariables.KB_IT : TMFVariables.KB_EN; MMapDirectory directory = new MMapDirectory(new File(KBPath)); IndexReader reader = IndexReader.open(directory, true); IndexSearcher is = new IndexSearcher(directory, true); Query q = new TermQuery(new Term("URI", uri)); TopDocs hits = is.search(q, 1); is.close(); if (hits.totalHits != 0) { int docId = hits.scoreDocs[0].doc; org.apache.lucene.document.Document doc = reader.document(docId); String wikilinksMerged = doc.getField("KB").stringValue(); String[] wikiSplits = wikilinksMerged.split(" "); //no prod LOG.debug("Bag of concepts for the resource " + uri + ": "); for (String s : wikiSplits) { result.add(s); //no prod LOG.debug("* " + s); } } reader.close(); } catch (Exception e) { LOG.error("[getBagOfConcepts] - EXCEPTION: ", e); } LOG.debug("[getBagOfConcepts] - END"); return result; }
From source file:it.polito.tellmefirst.lucene.IndexesUtil.java
License:Open Source License
public static ArrayList<String> getResidualBagOfConcepts(String uri, String lang) { LOG.debug("[getResidualBagOfConcepts] - BEGIN"); ArrayList<String> result = new ArrayList<String>(); try {//from www. j av a2s .com String residualKBPath = (lang.equals("it")) ? TMFVariables.RESIDUAL_KB_IT : TMFVariables.RESIDUAL_KB_EN; MMapDirectory directory = new MMapDirectory(new File(residualKBPath)); IndexReader reader = IndexReader.open(directory, true); IndexSearcher is = new IndexSearcher(directory, true); Query q = new TermQuery(new Term("URI", uri)); TopDocs hits = is.search(q, 1); is.close(); if (hits.totalHits != 0) { int docId = hits.scoreDocs[0].doc; org.apache.lucene.document.Document doc = reader.document(docId); String wikilinksMerged = doc.getField("KB").stringValue(); String[] wikiSplits = wikilinksMerged.split(" "); //no prod LOG.debug("Residual bag of concepts for the resource " + uri + ": "); for (String s : wikiSplits) { result.add(s); //no prod LOG.debug("* " + s); } } reader.close(); } catch (Exception e) { LOG.error("[getResidualBagOfConcepts] - EXCEPTION: ", e); } LOG.debug("[getResidualBagOfConcepts] - END"); return result; }
From source file:it.polito.tellmefirst.lucene.KBIndexSearcher.java
License:Open Source License
/** * Get DBpedia concepts related to a specific URI from the Lucene Index. These DBpedia concepts appear as wikilink * more than once in the Wikipedia page identified by the URI. * * @param uri Input URI./*from w ww .jav a 2 s. c o m*/ * * In the previous versions of TellMeFirst, the getBagOfConcepts method take as input the * URI of a DBpedia resource (String) and the language parameter (String). We have decide to * modify the API in order to separate this module from the core of TellMeFirst. * * @since 3.0.0.0. */ public List<String> getBagOfConcepts(String uri) throws Exception { LOG.debug("[getBagOfConcepts] BEGIN"); List<String> result = new ArrayList<String>(); try { MMapDirectory directory = new MMapDirectory(new File(kb)); IndexReader reader = IndexReader.open(directory, true); IndexSearcher is = new IndexSearcher(directory, true); Query q = new TermQuery(new Term("URI", uri)); TopDocs hits = is.search(q, 1); is.close(); if (hits.totalHits != 0) { int docId = hits.scoreDocs[0].doc; org.apache.lucene.document.Document doc = reader.document(docId); String wikilinksMerged = doc.getField("KB").stringValue(); String[] wikiSplits = wikilinksMerged.split(" "); LOG.debug("Bag of concepts for the resource " + uri + ": "); for (String s : wikiSplits) { result.add(s); LOG.debug("* " + s); } } reader.close(); } catch (Exception e) { LOG.error("[getBagOfConcepts] EXCEPTION: ", e); throw new Exception(e); } LOG.debug("[getBagOfConcepts] END"); return result; }