List of usage examples for org.apache.lucene.search IndexSearcher getIndexReader
public IndexReader getIndexReader()
From source file:lia.chapter3.NearRealTimeTest.java
License:Apache License
public void testNearRealTime() throws Exception { Directory dir = new RAMDirectory(); IndexWriter writer = Utils.getIndexWriter(dir); for (int i = 0; i < 10; i++) { Document doc = new Document(); doc.add(new Field("id", "" + i, StringField.TYPE_STORED)); doc.add(new Field("text", "aaa", TextField.TYPE_STORED)); writer.addDocument(doc);//from w w w . j ava 2 s.c o m } IndexSearcher searcher = Utils.getIndexSearcher(dir); // #A Query query = new TermQuery(new Term("text", "aaa")); TopDocs docs = searcher.search(query, 1); assertEquals(10, docs.totalHits); // #B writer.deleteDocuments(new Term("id", "7")); // #2 Document doc = new Document(); // #3 doc.add(new Field("id", // #3 "11", // #3 StringField.TYPE_STORED)); // #3 doc.add(new Field("text", // #3 "bbb", // #3 TextField.TYPE_STORED)); // #3 writer.addDocument(doc); // #3 IndexReader newReader = searcher.getIndexReader(); // #4 assertFalse(searcher.getIndexReader() == newReader); // #5 searcher.getIndexReader().close(); // #6 searcher = new IndexSearcher(newReader); TopDocs hits = searcher.search(query, 10); // #7 assertEquals(9, hits.totalHits); // #7 query = new TermQuery(new Term("text", "bbb")); // #8 hits = searcher.search(query, 1); // #8 assertEquals(1, hits.totalHits); // #8 newReader.close(); writer.close(); }
From source file:lia.chapter5.CategorizerTest.java
License:Apache License
private void buildCategoryVectors() throws IOException { IndexSearcher searcher = Utils.getBookIndexSearcher(); IndexReader reader = searcher.getIndexReader(); int maxDoc = reader.maxDoc(); System.out.println(maxDoc);/* w ww . j ava 2s . co m*/ for (int i = 0; i < maxDoc; i++) { Document doc = reader.document(i); String category = doc.get("category"); System.out.println("\n" + doc.get("subject") + "\n"); Map vectorMap = (Map) categoryMap.get(category); if (vectorMap == null) { vectorMap = new TreeMap(); categoryMap.put(category, vectorMap); } Terms termsVector = reader.getTermVector(i, "subject"); addTermFreqToMap(vectorMap, termsVector); } }
From source file:lia.tools.FastVectorHighlighterSample.java
License:Apache License
static void searchIndex(String filename) throws Exception { QueryParser parser = new QueryParser(Version.LUCENE_30, F, analyzer); Query query = parser.parse(QUERY); FastVectorHighlighter highlighter = getHighlighter(); // #C FieldQuery fieldQuery = highlighter.getFieldQuery(query); // #D IndexSearcher searcher = new IndexSearcher(dir); TopDocs docs = searcher.search(query, 10); FileWriter writer = new FileWriter(filename); writer.write("<html>"); writer.write("<body>"); writer.write("<p>QUERY : " + QUERY + "</p>"); for (ScoreDoc scoreDoc : docs.scoreDocs) { String snippet = highlighter.getBestFragment( // #E fieldQuery, searcher.getIndexReader(), // #E scoreDoc.doc, F, 100); // #E if (snippet != null) { writer.write(scoreDoc.doc + " : " + snippet + "<br/>"); }/*from ww w . ja v a 2 s.co m*/ } writer.write("</body></html>"); writer.close(); searcher.close(); }
From source file:lux.search.LuxSearcher.java
License:Mozilla Public License
/** * creates a Lux searcher based on an existing Lucene IndexSearcher * @param searcher the underlying {@link IndexSearcher} *//*w ww. j a va 2 s .c o m*/ public LuxSearcher(IndexSearcher searcher) { super(searcher.getIndexReader()); indexReader = null; wrappedSearcher = searcher; }
From source file:Main.WebAPI.Search.java
/** * /*w w w . j a v a 2 s . com*/ * @param args args[0] is a query * * @throws IOException * @throws ParseException * @throws InvalidTokenOffsetsException */ public static void main(String[] args) throws IOException, ParseException, InvalidTokenOffsetsException { //... Above, create documents with two fields, one with term vectors (tv) and one without (notv) Analyzer analyzer = new ThaiAnalyzer(Version.LUCENE_45); Directory index = FSDirectory.open(new File("data/indexing")); String querystr = args.length > 0 ? args[0] : "mike lab"; // the "title" arg specifies the default field to use // when no field is explicitly specified in the query. Query query = new MultiFieldQueryParser(Version.LUCENE_45, new String[] { "content" }, analyzer) .parse(querystr); // 3. search int hitsPerPage = 10; IndexReader reader = DirectoryReader.open(index); IndexSearcher searcher = new IndexSearcher(reader); TopDocs hits = searcher.search(query, 10); SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter(); Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query)); String Preview; for (int i = 0; i < 10; i++) { int id = hits.scoreDocs[i].doc; Document doc = searcher.doc(id); String text; Preview = ""; System.out.println(doc.get("url")); System.out.println(doc.get("title")); text = doc.get("content"); TokenStream tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), id, "content", analyzer); TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, text, false, 10);//highlighter.getBestFragments(tokenStream, text, 3, "..."); int k = 0; for (TextFragment frag1 : frag) { if ((frag1 != null) && (frag1.getScore() > 0)) { Preview += (frag1.toString()) + "...<br>"; k++; // Get 2 Line Preview if (k >= 2) break; } } //Term vector System.out.println("-------------"); } }
From source file:net.conquiris.search.DefaultSearcherService.java
License:Apache License
void disposeIndexSearcher(IndexSearcher searcher) { try {/*from w w w . j a v a 2 s . c o m*/ searcher.close(); } catch (IOException e) { // TODO: log } try { searcher.getIndexReader().close(); } catch (IOException e) { // TODO: log } }
From source file:net.sf.jclal.util.dataset.LuceneIndexToWekaDataSet.java
License:Open Source License
/** * It converts a index file of Lucene to a weka file for classification. The * weka file class are nominal. The classifiers will work with nominal * class.//from w w w . jav a 2 s . co m * * * @param wekaFileName Path of weka file. * @param indexFile Path of index file based on Lucene. The document indexes * must have fields called "class" and "content". WARNING: The fields must * not contains any puntuaction sign. * * @return Instances of weka. The instances are sparse since it is about * text information. * * @throws FileNotFoundException If the file does not exists. * @throws IOException If happens a error while writing the file. */ public Instances convertLuceneToWekaClassification(String wekaFileName, String indexFile) throws FileNotFoundException, IOException { File nuevo = new File(wekaFileName); if (!verify(nuevo)) { return null; } FileUtil.writeFile(nuevo, "@RELATION " + nuevo.getName() + doubleLine); IndexSearcher searcher = new IndexSearcher(indexFile); IndexReader reader = searcher.getIndexReader(); int total = reader.maxDoc(); HashMap<String, Integer> terms = new HashMap<String, Integer>(total * 2); Set<String> labels = new HashSet<String>(total * 2); int i; for (int l = 0; l < total; l++) { if (!reader.isDeleted(l)) { TermFreqVector vector = reader.getTermFreqVector(l, content); Document doc = reader.document(l); String current = doc.getField(classF).stringValue(); labels.add(current); if (vector != null) { String listosI[] = vector.getTerms(); for (i = 0; i < listosI.length; i++) { if (!terms.containsKey(listosI[i])) { terms.put(listosI[i], terms.size()); } } } } } String[] labelReady = new String[labels.size()]; int posLabel = 0; for (String string : labels) { labelReady[posLabel] = string; posLabel++; } Container[] terminos = convertir(terms); Arrays.sort(terminos); for (int j = 0; j < terminos.length; j++) { FileUtil.writeFile(nuevo, "@ATTRIBUTE " + (int) terminos[j].getKey() + " NUMERIC" + "\n"); } FileUtil.writeFile(nuevo, "@ATTRIBUTE class {"); for (int j = 0; j < labelReady.length - 1; j++) { FileUtil.writeFile(nuevo, labelReady[j] + ","); } FileUtil.writeFile(nuevo, labelReady[labelReady.length - 1] + "}" + doubleLine); FileUtil.writeFile(nuevo, "@DATA\n"); for (int pos = 0; pos < searcher.maxDoc(); pos++) { if (!reader.isDeleted(pos)) { TermFreqVector vector = reader.getTermFreqVector(pos, content); if (vector != null) { int[] origen = vector.getTermFrequencies(); String[] termsI = vector.getTerms(); int[] positions = new int[origen.length]; for (int k = 0; k < origen.length; k++) { positions[k] = terms.get(termsI[k]); } Container[] escribir = convertir(positions, origen); Arrays.sort(escribir); FileUtil.writeFile(nuevo, "{"); for (int j = 0; j < escribir.length; j++) { FileUtil.writeFile(nuevo, (int) escribir[j].getKey() + " " + escribir[j].getValue() + ","); } FileUtil.writeFile(nuevo, terms.size() + " " + searcher.doc(pos).getField(classF).stringValue() + "}\n"); } } } //close files closeReaders(searcher, reader); //Test if the weka file works Instances test = testWekaFile(wekaFileName); return test; }
From source file:net.sf.jclal.util.dataset.LuceneIndexToWekaDataSet.java
License:Open Source License
/** * It converts a index file of Lucene to a weka file for regression. The * weka file class are real. The used classifiers will work with numeric * real classe.//from w w w.j a v a 2 s . com * * @param wekaFileName Path of weka file. * @param indexFile Path of index file based on Lucene. The document indexes * must have fields called "class" and "content". WARNING: The fields must * not contains any puntuaction sign. * * @return Instances of weka. The instances are sparse since it is about * text information. * * @throws FileNotFoundException If the file does not exists. * @throws IOException If happens a error while writing the file. */ public Instances convertLuceneToWekaRegression(String wekaFileName, String indexFile) throws FileNotFoundException, IOException { File nuevo = new File(wekaFileName); if (!verify(nuevo)) { return null; } FileUtil.writeFile(nuevo, "@RELATION " + nuevo.getName() + doubleLine); IndexSearcher searcher = new IndexSearcher(indexFile); IndexReader reader = searcher.getIndexReader(); int total = reader.maxDoc(); HashMap<String, Integer> terms = new HashMap<String, Integer>(total * 2); HashMap<String, Integer> labels = new HashMap<String, Integer>(total * 2); int i; for (int l = 0; l < total; l++) { if (!reader.isDeleted(l)) { TermFreqVector vector = reader.getTermFreqVector(l, content); Document doc = reader.document(l); String current = doc.getField(classF).stringValue(); if (!labels.containsKey(current)) { labels.put(current, labels.size()); } if (vector != null) { String listosI[] = vector.getTerms(); for (i = 0; i < listosI.length; i++) { if (!terms.containsKey(listosI[i])) { terms.put(listosI[i], terms.size()); } } } } } Container[] terminos = convertir(terms); Arrays.sort(terminos); for (int j = 0; j < terminos.length; j++) { FileUtil.writeFile(nuevo, "@ATTRIBUTE " + (int) terminos[j].getKey() + " NUMERIC" + "\n"); } FileUtil.writeFile(nuevo, "@ATTRIBUTE class REAL [0.0,"); FileUtil.writeFile(nuevo, (labels.size() - 1) + ".0]" + doubleLine); FileUtil.writeFile(nuevo, "@DATA\n"); for (int pos = 0; pos < searcher.maxDoc(); pos++) { if (!reader.isDeleted(pos)) { TermFreqVector vector = reader.getTermFreqVector(pos, content); if (vector != null) { int[] origen = vector.getTermFrequencies(); String[] termsI = vector.getTerms(); int[] positions = new int[origen.length]; for (int k = 0; k < origen.length; k++) { positions[k] = terms.get(termsI[k]); } Container[] escribir = convertir(positions, origen); Arrays.sort(escribir); FileUtil.writeFile(nuevo, "{"); for (int j = 0; j < escribir.length; j++) { FileUtil.writeFile(nuevo, (int) escribir[j].getKey() + " " + escribir[j].getValue() + ","); } FileUtil.writeFile(nuevo, terms.size() + " " + labels.get(searcher.doc(pos).getField(classF).stringValue()) + ".0}\n"); } } } //close files closeReaders(searcher, reader); //Test if the weka file works Instances test = testWekaFile(wekaFileName); return test; }
From source file:net.sf.katta.lib.lucene.LuceneServer.java
License:Apache License
/** * Returns the number of documents a shard has. * // w w w. ja v a2s . c o m * @param shardName * @return the number of documents in the shard. */ protected int shardSize(String shardName) { final SearcherHandle handle = getSearcherHandleByShard(shardName); IndexSearcher searcher = handle.getSearcher(); try { if (searcher != null) { int size = searcher.getIndexReader().numDocs(); if (LOG.isDebugEnabled()) { LOG.debug("Shard '" + shardName + "' has " + size + " docs."); } return size; } throw new IllegalArgumentException("Shard '" + shardName + "' unknown"); } finally { handle.finishSearcher(); } }
From source file:net.sf.katta.lib.lucene.LuceneServer.java
License:Apache License
@Override public MapWritable getDetails(final String[] shards, final int docId, final String[] fieldNames) throws IOException { final SearcherHandle handle = getSearcherHandleByShard(shards[0]); IndexSearcher searcher = handle.getSearcher(); IndexReader ir = searcher.getIndexReader(); final MapWritable result = new MapWritable(); final Document doc = doc(shards[0], docId, fieldNames); final List<Fieldable> fields = doc.getFields(); for (final Fieldable field : fields) { final String name = field.name(); if (field.isBinary()) { final byte[] binaryValue = field.getBinaryValue(); result.put(new Text(name), new BytesWritable(binaryValue)); } else {/* w ww. ja v a 2 s . com*/ final String stringValue = field.stringValue(); result.put(new Text(name), new Text(stringValue)); } TermFreqVector tfv = ir.getTermFreqVector(docId, name); String terms[] = tfv.getTerms(); int freqs[] = tfv.getTermFrequencies(); MapWritable returnTerms = new MapWritable(); for (int t = 0; t < tfv.size(); t++) { returnTerms.put(new Text(terms[t]), new IntWritable(freqs[t])); } result.put(new Text(name + "_freqs"), returnTerms); } return result; }