Example usage for org.apache.lucene.search IndexSearcher getIndexReader

List of usage examples for org.apache.lucene.search IndexSearcher getIndexReader

Introduction

In this page you can find the example usage for org.apache.lucene.search IndexSearcher getIndexReader.

Prototype

public IndexReader getIndexReader() 

Source Link

Document

Return the IndexReader this searches.

Usage

From source file:lia.chapter3.NearRealTimeTest.java

License:Apache License

public void testNearRealTime() throws Exception {
    Directory dir = new RAMDirectory();
    IndexWriter writer = Utils.getIndexWriter(dir);
    for (int i = 0; i < 10; i++) {
        Document doc = new Document();
        doc.add(new Field("id", "" + i, StringField.TYPE_STORED));
        doc.add(new Field("text", "aaa", TextField.TYPE_STORED));
        writer.addDocument(doc);//from   w w w  .  j  ava 2  s.c  o  m
    }
    IndexSearcher searcher = Utils.getIndexSearcher(dir); // #A

    Query query = new TermQuery(new Term("text", "aaa"));
    TopDocs docs = searcher.search(query, 1);
    assertEquals(10, docs.totalHits); // #B

    writer.deleteDocuments(new Term("id", "7")); // #2

    Document doc = new Document(); // #3
    doc.add(new Field("id", // #3
            "11", // #3
            StringField.TYPE_STORED)); // #3
    doc.add(new Field("text", // #3
            "bbb", // #3
            TextField.TYPE_STORED)); // #3
    writer.addDocument(doc); // #3

    IndexReader newReader = searcher.getIndexReader(); // #4
    assertFalse(searcher.getIndexReader() == newReader); // #5
    searcher.getIndexReader().close(); // #6
    searcher = new IndexSearcher(newReader);

    TopDocs hits = searcher.search(query, 10); // #7
    assertEquals(9, hits.totalHits); // #7

    query = new TermQuery(new Term("text", "bbb")); // #8
    hits = searcher.search(query, 1); // #8
    assertEquals(1, hits.totalHits); // #8

    newReader.close();
    writer.close();
}

From source file:lia.chapter5.CategorizerTest.java

License:Apache License

private void buildCategoryVectors() throws IOException {
    IndexSearcher searcher = Utils.getBookIndexSearcher();
    IndexReader reader = searcher.getIndexReader();

    int maxDoc = reader.maxDoc();
    System.out.println(maxDoc);/*  w ww .  j  ava  2s  . co  m*/
    for (int i = 0; i < maxDoc; i++) {
        Document doc = reader.document(i);
        String category = doc.get("category");
        System.out.println("\n" + doc.get("subject") + "\n");
        Map vectorMap = (Map) categoryMap.get(category);
        if (vectorMap == null) {
            vectorMap = new TreeMap();
            categoryMap.put(category, vectorMap);
        }

        Terms termsVector = reader.getTermVector(i, "subject");

        addTermFreqToMap(vectorMap, termsVector);
    }
}

From source file:lia.tools.FastVectorHighlighterSample.java

License:Apache License

static void searchIndex(String filename) throws Exception {
    QueryParser parser = new QueryParser(Version.LUCENE_30, F, analyzer);
    Query query = parser.parse(QUERY);
    FastVectorHighlighter highlighter = getHighlighter(); // #C
    FieldQuery fieldQuery = highlighter.getFieldQuery(query); // #D
    IndexSearcher searcher = new IndexSearcher(dir);
    TopDocs docs = searcher.search(query, 10);

    FileWriter writer = new FileWriter(filename);
    writer.write("<html>");
    writer.write("<body>");
    writer.write("<p>QUERY : " + QUERY + "</p>");
    for (ScoreDoc scoreDoc : docs.scoreDocs) {
        String snippet = highlighter.getBestFragment( // #E
                fieldQuery, searcher.getIndexReader(), // #E
                scoreDoc.doc, F, 100); // #E
        if (snippet != null) {
            writer.write(scoreDoc.doc + " : " + snippet + "<br/>");
        }/*from   ww  w  . ja  v  a 2 s.co m*/
    }
    writer.write("</body></html>");
    writer.close();
    searcher.close();
}

From source file:lux.search.LuxSearcher.java

License:Mozilla Public License

/**
 * creates a Lux searcher based on an existing Lucene IndexSearcher
 * @param searcher the underlying {@link IndexSearcher}
 *//*w ww.  j a  va  2  s  .c o m*/
public LuxSearcher(IndexSearcher searcher) {
    super(searcher.getIndexReader());
    indexReader = null;
    wrappedSearcher = searcher;
}

From source file:Main.WebAPI.Search.java

/**
 * /*w  w w  . j a v  a 2 s  .  com*/
 * @param args args[0] is a query
 * 
 * @throws IOException
 * @throws ParseException
 * @throws InvalidTokenOffsetsException 
 */

public static void main(String[] args) throws IOException, ParseException, InvalidTokenOffsetsException {
    //... Above, create documents with two fields, one with term vectors (tv) and one without (notv)
    Analyzer analyzer = new ThaiAnalyzer(Version.LUCENE_45);

    Directory index = FSDirectory.open(new File("data/indexing"));
    String querystr = args.length > 0 ? args[0] : "mike lab";
    // the "title" arg specifies the default field to use
    // when no field is explicitly specified in the query.
    Query query = new MultiFieldQueryParser(Version.LUCENE_45, new String[] { "content" }, analyzer)
            .parse(querystr);

    // 3. search
    int hitsPerPage = 10;
    IndexReader reader = DirectoryReader.open(index);
    IndexSearcher searcher = new IndexSearcher(reader);

    TopDocs hits = searcher.search(query, 10);

    SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter();
    Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query));
    String Preview;
    for (int i = 0; i < 10; i++) {
        int id = hits.scoreDocs[i].doc;
        Document doc = searcher.doc(id);
        String text;
        Preview = "";
        System.out.println(doc.get("url"));
        System.out.println(doc.get("title"));
        text = doc.get("content");
        TokenStream tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), id, "content",
                analyzer);
        TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, text, false, 10);//highlighter.getBestFragments(tokenStream, text, 3, "...");
        int k = 0;
        for (TextFragment frag1 : frag) {
            if ((frag1 != null) && (frag1.getScore() > 0)) {
                Preview += (frag1.toString()) + "...<br>";
                k++;
                // Get 2 Line Preview
                if (k >= 2)
                    break;
            }
        }
        //Term vector
        System.out.println("-------------");
    }
}

From source file:net.conquiris.search.DefaultSearcherService.java

License:Apache License

void disposeIndexSearcher(IndexSearcher searcher) {
    try {/*from  w w  w .  j a v  a  2 s  .  c o m*/
        searcher.close();
    } catch (IOException e) {
        // TODO: log
    }
    try {
        searcher.getIndexReader().close();
    } catch (IOException e) {
        // TODO: log
    }
}

From source file:net.sf.jclal.util.dataset.LuceneIndexToWekaDataSet.java

License:Open Source License

/**
 * It converts a index file of Lucene to a weka file for classification. The
 * weka file class are nominal. The classifiers will work with nominal
 * class.//from w  w  w  .  jav a  2  s  .  co  m
 *
 *
 * @param wekaFileName Path of weka file.
 * @param indexFile Path of index file based on Lucene. The document indexes
 * must have fields called "class" and "content". WARNING: The fields must
 * not contains any puntuaction sign.
 *
 * @return Instances of weka. The instances are sparse since it is about
 * text information.
 *
 * @throws FileNotFoundException If the file does not exists.
 * @throws IOException If happens a error while writing the file.
 */
public Instances convertLuceneToWekaClassification(String wekaFileName, String indexFile)
        throws FileNotFoundException, IOException {
    File nuevo = new File(wekaFileName);

    if (!verify(nuevo)) {
        return null;
    }

    FileUtil.writeFile(nuevo, "@RELATION " + nuevo.getName() + doubleLine);

    IndexSearcher searcher = new IndexSearcher(indexFile);

    IndexReader reader = searcher.getIndexReader();

    int total = reader.maxDoc();

    HashMap<String, Integer> terms = new HashMap<String, Integer>(total * 2);
    Set<String> labels = new HashSet<String>(total * 2);

    int i;
    for (int l = 0; l < total; l++) {
        if (!reader.isDeleted(l)) {
            TermFreqVector vector = reader.getTermFreqVector(l, content);

            Document doc = reader.document(l);

            String current = doc.getField(classF).stringValue();

            labels.add(current);

            if (vector != null) {
                String listosI[] = vector.getTerms();
                for (i = 0; i < listosI.length; i++) {
                    if (!terms.containsKey(listosI[i])) {
                        terms.put(listosI[i], terms.size());
                    }

                }
            }
        }
    }

    String[] labelReady = new String[labels.size()];
    int posLabel = 0;
    for (String string : labels) {
        labelReady[posLabel] = string;
        posLabel++;
    }

    Container[] terminos = convertir(terms);
    Arrays.sort(terminos);

    for (int j = 0; j < terminos.length; j++) {
        FileUtil.writeFile(nuevo, "@ATTRIBUTE " + (int) terminos[j].getKey() + " NUMERIC" + "\n");
    }

    FileUtil.writeFile(nuevo, "@ATTRIBUTE class {");
    for (int j = 0; j < labelReady.length - 1; j++) {
        FileUtil.writeFile(nuevo, labelReady[j] + ",");
    }
    FileUtil.writeFile(nuevo, labelReady[labelReady.length - 1] + "}" + doubleLine);

    FileUtil.writeFile(nuevo, "@DATA\n");

    for (int pos = 0; pos < searcher.maxDoc(); pos++) {

        if (!reader.isDeleted(pos)) {

            TermFreqVector vector = reader.getTermFreqVector(pos, content);

            if (vector != null) {
                int[] origen = vector.getTermFrequencies();
                String[] termsI = vector.getTerms();

                int[] positions = new int[origen.length];

                for (int k = 0; k < origen.length; k++) {
                    positions[k] = terms.get(termsI[k]);
                }

                Container[] escribir = convertir(positions, origen);
                Arrays.sort(escribir);

                FileUtil.writeFile(nuevo, "{");
                for (int j = 0; j < escribir.length; j++) {
                    FileUtil.writeFile(nuevo, (int) escribir[j].getKey() + " " + escribir[j].getValue() + ",");
                }

                FileUtil.writeFile(nuevo,
                        terms.size() + " " + searcher.doc(pos).getField(classF).stringValue() + "}\n");
            }

        }
    }

    //close files
    closeReaders(searcher, reader);

    //Test if the weka file works
    Instances test = testWekaFile(wekaFileName);

    return test;
}

From source file:net.sf.jclal.util.dataset.LuceneIndexToWekaDataSet.java

License:Open Source License

/**
 * It converts a index file of Lucene to a weka file for regression. The
 * weka file class are real. The used classifiers will work with numeric
 * real classe.//from   w  w  w.j a v a  2 s .  com
 *
 * @param wekaFileName Path of weka file.
 * @param indexFile Path of index file based on Lucene. The document indexes
 * must have fields called "class" and "content". WARNING: The fields must
 * not contains any puntuaction sign.
 *
 * @return Instances of weka. The instances are sparse since it is about
 * text information.
 *
 * @throws FileNotFoundException If the file does not exists.
 * @throws IOException If happens a error while writing the file.
 */
public Instances convertLuceneToWekaRegression(String wekaFileName, String indexFile)
        throws FileNotFoundException, IOException {
    File nuevo = new File(wekaFileName);

    if (!verify(nuevo)) {
        return null;
    }

    FileUtil.writeFile(nuevo, "@RELATION " + nuevo.getName() + doubleLine);

    IndexSearcher searcher = new IndexSearcher(indexFile);

    IndexReader reader = searcher.getIndexReader();

    int total = reader.maxDoc();

    HashMap<String, Integer> terms = new HashMap<String, Integer>(total * 2);
    HashMap<String, Integer> labels = new HashMap<String, Integer>(total * 2);

    int i;
    for (int l = 0; l < total; l++) {
        if (!reader.isDeleted(l)) {
            TermFreqVector vector = reader.getTermFreqVector(l, content);

            Document doc = reader.document(l);

            String current = doc.getField(classF).stringValue();

            if (!labels.containsKey(current)) {
                labels.put(current, labels.size());
            }

            if (vector != null) {
                String listosI[] = vector.getTerms();
                for (i = 0; i < listosI.length; i++) {
                    if (!terms.containsKey(listosI[i])) {
                        terms.put(listosI[i], terms.size());
                    }

                }
            }
        }
    }

    Container[] terminos = convertir(terms);
    Arrays.sort(terminos);

    for (int j = 0; j < terminos.length; j++) {
        FileUtil.writeFile(nuevo, "@ATTRIBUTE " + (int) terminos[j].getKey() + " NUMERIC" + "\n");
    }

    FileUtil.writeFile(nuevo, "@ATTRIBUTE class REAL [0.0,");

    FileUtil.writeFile(nuevo, (labels.size() - 1) + ".0]" + doubleLine);

    FileUtil.writeFile(nuevo, "@DATA\n");

    for (int pos = 0; pos < searcher.maxDoc(); pos++) {

        if (!reader.isDeleted(pos)) {

            TermFreqVector vector = reader.getTermFreqVector(pos, content);

            if (vector != null) {
                int[] origen = vector.getTermFrequencies();
                String[] termsI = vector.getTerms();

                int[] positions = new int[origen.length];

                for (int k = 0; k < origen.length; k++) {
                    positions[k] = terms.get(termsI[k]);
                }

                Container[] escribir = convertir(positions, origen);
                Arrays.sort(escribir);

                FileUtil.writeFile(nuevo, "{");
                for (int j = 0; j < escribir.length; j++) {
                    FileUtil.writeFile(nuevo, (int) escribir[j].getKey() + " " + escribir[j].getValue() + ",");
                }

                FileUtil.writeFile(nuevo, terms.size() + " "
                        + labels.get(searcher.doc(pos).getField(classF).stringValue()) + ".0}\n");
            }

        }
    }

    //close files
    closeReaders(searcher, reader);

    //Test if the weka file works
    Instances test = testWekaFile(wekaFileName);

    return test;
}

From source file:net.sf.katta.lib.lucene.LuceneServer.java

License:Apache License

/**
 * Returns the number of documents a shard has.
 * //  w  w w. ja v  a2s .  c o m
 * @param shardName
 * @return the number of documents in the shard.
 */
protected int shardSize(String shardName) {
    final SearcherHandle handle = getSearcherHandleByShard(shardName);
    IndexSearcher searcher = handle.getSearcher();
    try {
        if (searcher != null) {
            int size = searcher.getIndexReader().numDocs();
            if (LOG.isDebugEnabled()) {
                LOG.debug("Shard '" + shardName + "' has " + size + " docs.");
            }
            return size;
        }
        throw new IllegalArgumentException("Shard '" + shardName + "' unknown");
    } finally {
        handle.finishSearcher();
    }
}

From source file:net.sf.katta.lib.lucene.LuceneServer.java

License:Apache License

@Override
public MapWritable getDetails(final String[] shards, final int docId, final String[] fieldNames)
        throws IOException {
    final SearcherHandle handle = getSearcherHandleByShard(shards[0]);
    IndexSearcher searcher = handle.getSearcher();
    IndexReader ir = searcher.getIndexReader();

    final MapWritable result = new MapWritable();
    final Document doc = doc(shards[0], docId, fieldNames);
    final List<Fieldable> fields = doc.getFields();
    for (final Fieldable field : fields) {
        final String name = field.name();
        if (field.isBinary()) {
            final byte[] binaryValue = field.getBinaryValue();
            result.put(new Text(name), new BytesWritable(binaryValue));
        } else {/* w ww.  ja v  a 2  s . com*/
            final String stringValue = field.stringValue();
            result.put(new Text(name), new Text(stringValue));
        }

        TermFreqVector tfv = ir.getTermFreqVector(docId, name);
        String terms[] = tfv.getTerms();
        int freqs[] = tfv.getTermFrequencies();
        MapWritable returnTerms = new MapWritable();
        for (int t = 0; t < tfv.size(); t++) {
            returnTerms.put(new Text(terms[t]), new IntWritable(freqs[t]));
        }
        result.put(new Text(name + "_freqs"), returnTerms);
    }
    return result;
}