Example usage for org.apache.lucene.index IndexReader document

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader document.

Prototype




public final Document document(int docID) throws IOException

Source Link

Document

Returns the stored fields of the n^th Document in this index.

Usage

From source file:net.semanticmetadata.lire.TestImageSearcher.java

License:Open Source License

public void testCachingSearcherParallelWithBundling() throws IOException, InterruptedException {
    final IndexReader ir = DirectoryReader.open(FSDirectory.open(new File("C:\\Temp\\test-100k-cedd-idx")));

    LinkedList<LireFeature> q = new LinkedList<LireFeature>();
    for (int i = 0; i < ir.maxDoc(); i++) {
        Document d = ir.document(i);
        CEDD cedd = new CEDD();
        BytesRef binaryValue = d.getBinaryValue(cedd.getFieldName());
        cedd.setByteArrayRepresentation(binaryValue.bytes, binaryValue.offset, binaryValue.length);
        q.add(cedd);//from   w w  w.  j  av a  2 s  . com
    }

    int count = 0;
    Thread[] searchers = new Thread[4];
    final LinkedBlockingQueue<WorkItem> queryQueue = new LinkedBlockingQueue<WorkItem>(100);
    for (int i = 0; i < searchers.length; i++) {
        searchers[i] = new Thread(new Runnable() {
            @Override
            public void run() {
                SingleNddCeddImageSearcher is = new SingleNddCeddImageSearcher(ir);
                WorkItem remove;
                while ((remove = queryQueue.remove()).features != null) {
                    try {
                        SimpleResult[] hits = is.findMostSimilar(remove.features);
                        for (int j = 0; j < hits.length; j++) {
                            if (hits[j].getIndexNumber() != remove.id[j])
                                System.err.println("oops");
                        }
                    } catch (IOException e) {
                        e.printStackTrace();
                    }
                }
            }

        });
        searchers[i].start();
    }
    long time = System.currentTimeMillis();
    LireFeature[] qarr = new LireFeature[10];
    int[] iarr = new int[10];
    int currentIndex = 0;
    int bundleCount = 0;
    Iterator<LireFeature> iterator = q.iterator();
    while (iterator.hasNext() && bundleCount < 200) {
        LireFeature next = iterator.next();
        try {
            iarr[currentIndex] = count;
            qarr[currentIndex++] = next;
            if (currentIndex >= qarr.length) { // do bundled search
                currentIndex = 0;
                queryQueue.put(new WorkItem(qarr.clone(), iarr.clone()));
                bundleCount++;
            }
            count++;
        } catch (InterruptedException e) {
            e.printStackTrace();
        }
    }
    for (int i = 0; i < 8; i++) {
        queryQueue.put(new WorkItem(null, null));
    }
    for (int i = 0; i < searchers.length; i++) {
        searchers[i].join();
    }
    long l = System.currentTimeMillis() - time;
    System.out.printf("Tested %d search requests on %d documents: overall time of %d:%02d, %.2f ms per search",
            count, ir.maxDoc(), l / (1000 * 60), (l / 1000) % 60, ((float) l / (float) count));
}

From source file:net.semanticmetadata.lire.TestImageSearcher.java

License:Open Source License

public void testCachingSearcherBundling() throws IOException {
    IndexReader ir = DirectoryReader.open(FSDirectory.open(new File("C:\\Temp\\test-100k-cedd-idx")));
    SingleNddCeddImageSearcher is = new SingleNddCeddImageSearcher(ir);

    LinkedList<LireFeature> q = new LinkedList<LireFeature>();
    for (int i = 0; i < ir.maxDoc(); i++) {
        Document d = ir.document(i);
        CEDD cedd = new CEDD();
        BytesRef binaryValue = d.getBinaryValue(cedd.getFieldName());
        cedd.setByteArrayRepresentation(binaryValue.bytes, binaryValue.offset, binaryValue.length);
        q.add(cedd);/*from w  w  w. j  a va  2  s  . c om*/
    }

    long time = System.currentTimeMillis();
    int count = 0;
    LireFeature[] qarr = new LireFeature[10];
    int currentIndex = 0;
    for (Iterator<LireFeature> iterator = q.iterator(); iterator.hasNext();) {
        LireFeature next = iterator.next();
        qarr[currentIndex++] = next;
        if (currentIndex >= qarr.length) { // do bundled search
            currentIndex = 0;
            is.findMostSimilar(qarr);
        }
        count++;
        if (count > 999 & currentIndex == 0)
            break;
    }
    long l = System.currentTimeMillis() - time;
    System.out.printf("Tested %d search requests on %d documents: overall time of %d:%02d, %.2f ms per search",
            count, ir.maxDoc(), l / (1000 * 60), (l / 1000) % 60, ((float) l / (float) count));
}

From source file:net.semanticmetadata.lire.utils.FileUtils.java

License:Open Source License

/**
 * Puts results into a HTML file.//  w w w  .  ja  v a  2 s .c  o  m
 *
 * @param prefix
 * @param hits
 * @param queryImage
 * @return
 * @throws IOException
 */
public static String saveImageResultsToHtml(String prefix, ImageSearchHits hits, String queryImage,
        IndexReader reader) throws IOException {
    long l = System.currentTimeMillis() / 1000;
    String fileName = "results-" + prefix + "-" + l + ".html";
    BufferedWriter bw = new BufferedWriter(new FileWriter(fileName));
    bw.write("<html>\n" + "<head><title>Search Results</title></head>\n" + "<body bgcolor=\"#FFFFFF\">\n");
    bw.write("<h3>query</h3>\n");
    bw.write("<a href=\"file://" + queryImage + "\"><img src=\"file://" + queryImage + "\"></a><p>\n");
    bw.write("<h3>results</h3>\n");
    for (int i = 0; i < hits.length(); i++) {
        bw.write(hits.score(i) + " - <a href=\"file://"
                + reader.document(hits.documentID(i)).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]
                + "\"><img src=\"file://"
                + reader.document(hits.documentID(i)).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]
                + "\"></a><p>\n");
    }
    bw.write("</body>\n" + "</html>");
    bw.close();
    return fileName;
}

From source file:net.semanticmetadata.lire.utils.FileUtils.java

License:Open Source License

/**
 * Puts results into a HTML file.//from w  w  w.  j a v a 2 s  .  co  m
 *
 * @param prefix
 * @param hits
 * @param reader
 * @param queryImage
 * @return
 * @throws IOException
 */
public static String saveImageResultsToHtml(String prefix, TopDocs hits, IndexReader reader, String queryImage)
        throws IOException {
    long l = System.currentTimeMillis() / 1000;
    String fileName = "results-" + prefix + "-" + l + ".html";
    BufferedWriter bw = new BufferedWriter(new FileWriter(fileName));
    bw.write("<html>\n" + "<head><title>Search Results</title></head>\n" + "<body bgcolor=\"#FFFFFF\">\n");
    bw.write("<h3>query</h3>\n");
    bw.write("<a href=\"file://" + queryImage + "\"><img src=\"file://" + queryImage + "\"></a><p>\n");
    bw.write("<h3>results</h3>\n");
    for (int i = 0; i < hits.scoreDocs.length; i++) {
        bw.write(hits.scoreDocs[i].score + " - <a href=\"file://"
                + reader.document(hits.scoreDocs[i].doc).get("descriptorImageIdentifier")
                + "\"><img src=\"file://"
                + reader.document(hits.scoreDocs[i].doc).get("descriptorImageIdentifier") + "\"></a><p>\n");
    }
    bw.write("</body>\n" + "</html>");
    bw.close();
    return fileName;
}

From source file:net.semanticmetadata.lire.utils.FileUtils.java

License:Open Source License

public static void saveImageResultsToPng(String prefix, ImageSearchHits hits, String queryImage,
        IndexReader reader) throws IOException {
    LinkedList<BufferedImage> results = new LinkedList<BufferedImage>();
    int width = 0;
    for (int i = 0; i < hits.length(); i++) {
        // hits.score(i)
        // hits.doc(i).get("descriptorImageIdentifier")
        BufferedImage tmp = ImageIO.read(new FileInputStream(
                reader.document(hits.documentID(i)).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]));
        //            if (tmp.getHeight() > 200) {
        double factor = 200d / ((double) tmp.getHeight());
        tmp = ImageUtils.scaleImage(tmp, (int) (tmp.getWidth() * factor), 200);
        //            }
        width += tmp.getWidth() + 5;// w  ww .  j  ava2s. c o m
        results.add(tmp);
    }
    BufferedImage result = new BufferedImage(width, 220, BufferedImage.TYPE_INT_RGB);
    Graphics2D g2 = (Graphics2D) result.getGraphics();
    g2.setColor(Color.white);
    g2.setBackground(Color.white);
    g2.clearRect(0, 0, result.getWidth(), result.getHeight());
    g2.setColor(Color.black);
    g2.setFont(Font.decode("\"Arial\", Font.BOLD, 12"));
    int offset = 0;
    int count = 0;
    for (Iterator<BufferedImage> iterator = results.iterator(); iterator.hasNext();) {
        BufferedImage next = iterator.next();
        g2.drawImage(next, offset, 20, null);
        g2.drawString(hits.score(count) + "", offset + 5, 12);
        offset += next.getWidth() + 5;
        count++;
    }
    ImageIO.write(result, "PNG", new File(prefix + "_" + (System.currentTimeMillis() / 1000) + ".png"));
}

From source file:net.semanticmetadata.lire.utils.FileUtils.java

License:Open Source License

public static void saveImageResultsToPng(String prefix, TopDocs hits, String queryImage, IndexReader ir)
        throws IOException {
    LinkedList<BufferedImage> results = new LinkedList<BufferedImage>();
    int width = 0;
    for (int i = 0; i < Math.min(hits.scoreDocs.length, 10); i++) {
        // hits.score(i)
        // hits.doc(i).get("descriptorImageIdentifier")
        BufferedImage tmp = ImageIO
                .read(new FileInputStream(ir.document(hits.scoreDocs[i].doc).get("descriptorImageIdentifier")));
        if (tmp.getHeight() > 200) {
            double factor = 200d / ((double) tmp.getHeight());
            tmp = ImageUtils.scaleImage(tmp, (int) (tmp.getWidth() * factor), 200);
        }//from  w w w.  j  a  va  2  s.  c  o m
        width += tmp.getWidth() + 5;
        results.add(tmp);
    }
    BufferedImage result = new BufferedImage(width, 220, BufferedImage.TYPE_INT_RGB);
    Graphics2D g2 = (Graphics2D) result.getGraphics();
    g2.setColor(Color.black);
    g2.clearRect(0, 0, result.getWidth(), result.getHeight());
    g2.setColor(Color.green);
    g2.setFont(Font.decode("\"Arial\", Font.BOLD, 12"));
    int offset = 0;
    int count = 0;
    for (Iterator<BufferedImage> iterator = results.iterator(); iterator.hasNext();) {
        BufferedImage next = iterator.next();
        g2.drawImage(next, offset, 20, null);
        g2.drawString(hits.scoreDocs[count].score + "", offset + 5, 12);
        offset += next.getWidth() + 5;
        count++;
    }
    ImageIO.write(result, "PNG", new File(prefix + "_" + (System.currentTimeMillis() / 1000) + ".png"));
}

From source file:net.semanticmetadata.lire.VisualWordsTest.java

License:Open Source License

public void testSearchInIndexSurf() throws IOException {
    int[] docIDs = new int[] { 7886, 1600, 4611, 4833, 4260, 2044, 7658 };
    for (int i : docIDs) {
        IndexReader ir = DirectoryReader.open(FSDirectory.open(indexPath));
        BOVWBuilder sfh = new BOVWBuilder(ir, new SurfFeature());
        VisualWordsImageSearcher vis = new VisualWordsImageSearcher(10,
                DocumentBuilder.FIELD_NAME_SURF + DocumentBuilder.FIELD_NAME_BOVW);
        //            Document doc = sfh.getVisualWords(surfBuilder.createDocument(ImageIO.read(new File(queryImage)), queryImage));
        ImageSearchHits hits = vis.search(ir.document(i), ir);
        FileUtils.saveImageResultsToPng("results_bow_no_tf_" + i, hits, queryImage);
    }/*from  ww w.j a  v  a 2 s.  co  m*/
}

From source file:net.semanticmetadata.lire.VisualWordsTest.java

License:Open Source License

public void testSearchInIndexSift() throws IOException {
    int[] docIDs = new int[] { 0, 10, 23, 35, 56, 77 };
    for (int i : docIDs) {
        System.out.println("i = " + i);
        IndexReader ir = DirectoryReader.open(FSDirectory.open(indexPath));
        VisualWordsImageSearcher vis = new VisualWordsImageSearcher(10,
                DocumentBuilder.FIELD_NAME_SIFT + DocumentBuilder.FIELD_NAME_BOVW);
        ImageSearchHits hits = vis.search(ir.document(i), ir);
        FileUtils.saveImageResultsToPng("results_bow_no_tf_sift_" + i, hits, queryImage);
    }/* w  ww . j  a  va 2s .  c  om*/
}

From source file:net.semanticmetadata.lire.VisualWordsTest.java

License:Open Source License

public void testWikiSearchIndex() throws IOException {
    String indexPath = "./bovw-test";
    VisualWordsImageSearcher searcher = new VisualWordsImageSearcher(10,
            DocumentBuilder.FIELD_NAME_SURF + DocumentBuilder.FIELD_NAME_BOVW);
    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath)));
    // let's take the first document for a query:
    Document query = reader.document(2);
    ImageSearchHits hits = searcher.search(query, reader);
    // show or analyze your results ....
    FileUtils.saveImageResultsToPng("bovw", hits, query.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]);
}

From source file:net.sf.jclal.util.dataset.LuceneIndexToWekaDataSet.java

License:Open Source License

/**
 * It converts a index file of Lucene to a weka file for classification. The
 * weka file class are nominal. The classifiers will work with nominal
 * class./*  w w  w. j av a 2s. co  m*/
 *
 *
 * @param wekaFileName Path of weka file.
 * @param indexFile Path of index file based on Lucene. The document indexes
 * must have fields called "class" and "content". WARNING: The fields must
 * not contains any puntuaction sign.
 *
 * @return Instances of weka. The instances are sparse since it is about
 * text information.
 *
 * @throws FileNotFoundException If the file does not exists.
 * @throws IOException If happens a error while writing the file.
 */
public Instances convertLuceneToWekaClassification(String wekaFileName, String indexFile)
        throws FileNotFoundException, IOException {
    File nuevo = new File(wekaFileName);

    if (!verify(nuevo)) {
        return null;
    }

    FileUtil.writeFile(nuevo, "@RELATION " + nuevo.getName() + doubleLine);

    IndexSearcher searcher = new IndexSearcher(indexFile);

    IndexReader reader = searcher.getIndexReader();

    int total = reader.maxDoc();

    HashMap<String, Integer> terms = new HashMap<String, Integer>(total * 2);
    Set<String> labels = new HashSet<String>(total * 2);

    int i;
    for (int l = 0; l < total; l++) {
        if (!reader.isDeleted(l)) {
            TermFreqVector vector = reader.getTermFreqVector(l, content);

            Document doc = reader.document(l);

            String current = doc.getField(classF).stringValue();

            labels.add(current);

            if (vector != null) {
                String listosI[] = vector.getTerms();
                for (i = 0; i < listosI.length; i++) {
                    if (!terms.containsKey(listosI[i])) {
                        terms.put(listosI[i], terms.size());
                    }

                }
            }
        }
    }

    String[] labelReady = new String[labels.size()];
    int posLabel = 0;
    for (String string : labels) {
        labelReady[posLabel] = string;
        posLabel++;
    }

    Container[] terminos = convertir(terms);
    Arrays.sort(terminos);

    for (int j = 0; j < terminos.length; j++) {
        FileUtil.writeFile(nuevo, "@ATTRIBUTE " + (int) terminos[j].getKey() + " NUMERIC" + "\n");
    }

    FileUtil.writeFile(nuevo, "@ATTRIBUTE class {");
    for (int j = 0; j < labelReady.length - 1; j++) {
        FileUtil.writeFile(nuevo, labelReady[j] + ",");
    }
    FileUtil.writeFile(nuevo, labelReady[labelReady.length - 1] + "}" + doubleLine);

    FileUtil.writeFile(nuevo, "@DATA\n");

    for (int pos = 0; pos < searcher.maxDoc(); pos++) {

        if (!reader.isDeleted(pos)) {

            TermFreqVector vector = reader.getTermFreqVector(pos, content);

            if (vector != null) {
                int[] origen = vector.getTermFrequencies();
                String[] termsI = vector.getTerms();

                int[] positions = new int[origen.length];

                for (int k = 0; k < origen.length; k++) {
                    positions[k] = terms.get(termsI[k]);
                }

                Container[] escribir = convertir(positions, origen);
                Arrays.sort(escribir);

                FileUtil.writeFile(nuevo, "{");
                for (int j = 0; j < escribir.length; j++) {
                    FileUtil.writeFile(nuevo, (int) escribir[j].getKey() + " " + escribir[j].getValue() + ",");
                }

                FileUtil.writeFile(nuevo,
                        terms.size() + " " + searcher.doc(pos).getField(classF).stringValue() + "}\n");
            }

        }
    }

    //close files
    closeReaders(searcher, reader);

    //Test if the weka file works
    Instances test = testWekaFile(wekaFileName);

    return test;
}