Example usage for org.apache.lucene.index IndexReader maxDoc

List of usage examples for org.apache.lucene.index IndexReader maxDoc

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader maxDoc.

Prototype

public abstract int maxDoc();

Source Link

Document

Returns one greater than the largest possible document number.

Usage

From source file:net.semanticmetadata.lire.TestImageSearcher.java

License:Open Source License

public void testCachingSearcher() throws IOException {
    IndexReader ir = DirectoryReader.open(FSDirectory.open(new File("C:\\Temp\\test-100k-cedd-idx")));
    GenericFastImageSearcher is = new GenericFastImageSearcher(1, CEDD.class, true, ir);
    SingleNddCeddImageSearcher nis = new SingleNddCeddImageSearcher(ir);
    LinkedList<Document> q = new LinkedList<Document>();
    for (int i = 0; i < Math.min(1000, ir.maxDoc()); i++) {
        q.add(ir.document(i));//from   w  w w  . j  a v  a2s.c om
    }

    long time = System.currentTimeMillis();
    int count = 0;
    for (Iterator<Document> iterator = q.iterator(); iterator.hasNext();) {
        Document next = iterator.next();
        String id = is.search(next, ir).doc(0).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0];
        CEDD cedd = new CEDD();
        BytesRef binaryValue = next.getBinaryValue(cedd.getFieldName());
        cedd.setByteArrayRepresentation(binaryValue.bytes, binaryValue.offset, binaryValue.length);

        String s = nis.findMostSimilar(cedd).getDocument().getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0];
        String qID = next.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0];
        System.out.println(s.equals(id) + " " + id.equals(qID) + " " + qID.equals(s));
        count++;
        if (count > 100)
            break;
    }
    long l = System.currentTimeMillis() - time;
    System.out.printf("Tested %d search requests on %d documents: overall time of %d:%02d, %.2f ms per search",
            count, ir.maxDoc(), l / (1000 * 60), (l / 1000) % 60, ((float) l / (float) count));

}

From source file:net.semanticmetadata.lire.TestImageSearcher.java

License:Open Source License

public void testCustomCachingSearcher() throws IOException {
    IndexReader ir = DirectoryReader.open(FSDirectory.open(new File("C:\\Temp\\test-100k-cedd-idx")));
    SingleNddCeddImageSearcher is = new SingleNddCeddImageSearcher(ir);

    LinkedList<LireFeature> q = new LinkedList<LireFeature>();
    for (int i = 0; i < ir.maxDoc(); i++) {
        Document d = ir.document(i);
        CEDD cedd = new CEDD();
        BytesRef binaryValue = d.getBinaryValue(cedd.getFieldName());
        cedd.setByteArrayRepresentation(binaryValue.bytes, binaryValue.offset, binaryValue.length);
        q.add(cedd);/* w w  w.  j av a2  s .  c  o  m*/
    }

    long time = System.currentTimeMillis();
    int count = 0;
    for (Iterator<LireFeature> iterator = q.iterator(); iterator.hasNext();) {
        LireFeature next = iterator.next();
        is.findMostSimilar(next);
        count++;
        if (count > 100)
            break;
    }
    long l = System.currentTimeMillis() - time;
    System.out.printf("Tested %d search requests on %d documents: overall time of %d:%02d, %.2f ms per search",
            count, ir.maxDoc(), l / (1000 * 60), (l / 1000) % 60, ((float) l / (float) count));
}

From source file:net.semanticmetadata.lire.TestImageSearcher.java

License:Open Source License

public void testCachingSearcherParallel() throws IOException, InterruptedException {
    final IndexReader ir = DirectoryReader.open(FSDirectory.open(new File("C:\\Temp\\test-100k-cedd-idx")));
    SingleNddCeddImageSearcher is = new SingleNddCeddImageSearcher(ir);

    LinkedList<LireFeature> q = new LinkedList<LireFeature>();
    for (int i = 0; i < ir.maxDoc(); i++) {
        Document d = ir.document(i);
        CEDD cedd = new CEDD();
        BytesRef binaryValue = d.getBinaryValue(cedd.getFieldName());
        cedd.setByteArrayRepresentation(binaryValue.bytes, binaryValue.offset, binaryValue.length);
        q.add(cedd);//from w  w w .  jav  a  2s  .com
    }

    int count = 0;
    Thread[] searchers = new Thread[3];
    final LinkedBlockingQueue<LireFeature> queryQueue = new LinkedBlockingQueue<LireFeature>(1000);
    for (int i = 0; i < searchers.length; i++) {
        searchers[i] = new Thread(new Runnable() {
            @Override
            public void run() {
                SingleNddCeddImageSearcher is = new SingleNddCeddImageSearcher(ir);
                LireFeature remove;
                while ((remove = queryQueue.remove()) instanceof CEDD) {
                    try {
                        is.findMostSimilar(remove);
                    } catch (IOException e) {
                        e.printStackTrace();
                    }
                }
            }

        });
        searchers[i].start();
    }
    long time = System.currentTimeMillis();
    for (Iterator<LireFeature> iterator = q.iterator(); iterator.hasNext() && count < 1000;) {
        LireFeature next = iterator.next();
        try {
            queryQueue.put(next);
            count++;
        } catch (InterruptedException e) {
            e.printStackTrace();
        }
    }
    for (int i = 0; i < 8; i++) {
        queryQueue.put(new ScalableColor());
    }
    for (int i = 0; i < searchers.length; i++) {
        searchers[i].join();
    }
    long l = System.currentTimeMillis() - time;
    System.out.printf("Tested %d search requests on %d documents: overall time of %d:%02d, %.2f ms per search",
            count, ir.maxDoc(), l / (1000 * 60), (l / 1000) % 60, ((float) l / (float) count));
}

From source file:net.semanticmetadata.lire.TestImageSearcher.java

License:Open Source License

public void testCachingSearcherParallelWithBundling() throws IOException, InterruptedException {
    final IndexReader ir = DirectoryReader.open(FSDirectory.open(new File("C:\\Temp\\test-100k-cedd-idx")));

    LinkedList<LireFeature> q = new LinkedList<LireFeature>();
    for (int i = 0; i < ir.maxDoc(); i++) {
        Document d = ir.document(i);
        CEDD cedd = new CEDD();
        BytesRef binaryValue = d.getBinaryValue(cedd.getFieldName());
        cedd.setByteArrayRepresentation(binaryValue.bytes, binaryValue.offset, binaryValue.length);
        q.add(cedd);//  w ww .  ja v a  2s .  co m
    }

    int count = 0;
    Thread[] searchers = new Thread[4];
    final LinkedBlockingQueue<WorkItem> queryQueue = new LinkedBlockingQueue<WorkItem>(100);
    for (int i = 0; i < searchers.length; i++) {
        searchers[i] = new Thread(new Runnable() {
            @Override
            public void run() {
                SingleNddCeddImageSearcher is = new SingleNddCeddImageSearcher(ir);
                WorkItem remove;
                while ((remove = queryQueue.remove()).features != null) {
                    try {
                        SimpleResult[] hits = is.findMostSimilar(remove.features);
                        for (int j = 0; j < hits.length; j++) {
                            if (hits[j].getIndexNumber() != remove.id[j])
                                System.err.println("oops");
                        }
                    } catch (IOException e) {
                        e.printStackTrace();
                    }
                }
            }

        });
        searchers[i].start();
    }
    long time = System.currentTimeMillis();
    LireFeature[] qarr = new LireFeature[10];
    int[] iarr = new int[10];
    int currentIndex = 0;
    int bundleCount = 0;
    Iterator<LireFeature> iterator = q.iterator();
    while (iterator.hasNext() && bundleCount < 200) {
        LireFeature next = iterator.next();
        try {
            iarr[currentIndex] = count;
            qarr[currentIndex++] = next;
            if (currentIndex >= qarr.length) { // do bundled search
                currentIndex = 0;
                queryQueue.put(new WorkItem(qarr.clone(), iarr.clone()));
                bundleCount++;
            }
            count++;
        } catch (InterruptedException e) {
            e.printStackTrace();
        }
    }
    for (int i = 0; i < 8; i++) {
        queryQueue.put(new WorkItem(null, null));
    }
    for (int i = 0; i < searchers.length; i++) {
        searchers[i].join();
    }
    long l = System.currentTimeMillis() - time;
    System.out.printf("Tested %d search requests on %d documents: overall time of %d:%02d, %.2f ms per search",
            count, ir.maxDoc(), l / (1000 * 60), (l / 1000) % 60, ((float) l / (float) count));
}

From source file:net.semanticmetadata.lire.TestImageSearcher.java

License:Open Source License

public void testCachingSearcherBundling() throws IOException {
    IndexReader ir = DirectoryReader.open(FSDirectory.open(new File("C:\\Temp\\test-100k-cedd-idx")));
    SingleNddCeddImageSearcher is = new SingleNddCeddImageSearcher(ir);

    LinkedList<LireFeature> q = new LinkedList<LireFeature>();
    for (int i = 0; i < ir.maxDoc(); i++) {
        Document d = ir.document(i);
        CEDD cedd = new CEDD();
        BytesRef binaryValue = d.getBinaryValue(cedd.getFieldName());
        cedd.setByteArrayRepresentation(binaryValue.bytes, binaryValue.offset, binaryValue.length);
        q.add(cedd);/*  w w w. j  a va  2s.  c o  m*/
    }

    long time = System.currentTimeMillis();
    int count = 0;
    LireFeature[] qarr = new LireFeature[10];
    int currentIndex = 0;
    for (Iterator<LireFeature> iterator = q.iterator(); iterator.hasNext();) {
        LireFeature next = iterator.next();
        qarr[currentIndex++] = next;
        if (currentIndex >= qarr.length) { // do bundled search
            currentIndex = 0;
            is.findMostSimilar(qarr);
        }
        count++;
        if (count > 999 & currentIndex == 0)
            break;
    }
    long l = System.currentTimeMillis() - time;
    System.out.printf("Tested %d search requests on %d documents: overall time of %d:%02d, %.2f ms per search",
            count, ir.maxDoc(), l / (1000 * 60), (l / 1000) % 60, ((float) l / (float) count));
}

From source file:net.sf.jclal.util.dataset.LuceneIndexToWekaDataSet.java

License:Open Source License

/**
 * It converts a index file of Lucene to a weka file for classification. The
 * weka file class are nominal. The classifiers will work with nominal
 * class./* w w w. j a v  a2s  . c om*/
 *
 *
 * @param wekaFileName Path of weka file.
 * @param indexFile Path of index file based on Lucene. The document indexes
 * must have fields called "class" and "content". WARNING: The fields must
 * not contains any puntuaction sign.
 *
 * @return Instances of weka. The instances are sparse since it is about
 * text information.
 *
 * @throws FileNotFoundException If the file does not exists.
 * @throws IOException If happens a error while writing the file.
 */
public Instances convertLuceneToWekaClassification(String wekaFileName, String indexFile)
        throws FileNotFoundException, IOException {
    File nuevo = new File(wekaFileName);

    if (!verify(nuevo)) {
        return null;
    }

    FileUtil.writeFile(nuevo, "@RELATION " + nuevo.getName() + doubleLine);

    IndexSearcher searcher = new IndexSearcher(indexFile);

    IndexReader reader = searcher.getIndexReader();

    int total = reader.maxDoc();

    HashMap<String, Integer> terms = new HashMap<String, Integer>(total * 2);
    Set<String> labels = new HashSet<String>(total * 2);

    int i;
    for (int l = 0; l < total; l++) {
        if (!reader.isDeleted(l)) {
            TermFreqVector vector = reader.getTermFreqVector(l, content);

            Document doc = reader.document(l);

            String current = doc.getField(classF).stringValue();

            labels.add(current);

            if (vector != null) {
                String listosI[] = vector.getTerms();
                for (i = 0; i < listosI.length; i++) {
                    if (!terms.containsKey(listosI[i])) {
                        terms.put(listosI[i], terms.size());
                    }

                }
            }
        }
    }

    String[] labelReady = new String[labels.size()];
    int posLabel = 0;
    for (String string : labels) {
        labelReady[posLabel] = string;
        posLabel++;
    }

    Container[] terminos = convertir(terms);
    Arrays.sort(terminos);

    for (int j = 0; j < terminos.length; j++) {
        FileUtil.writeFile(nuevo, "@ATTRIBUTE " + (int) terminos[j].getKey() + " NUMERIC" + "\n");
    }

    FileUtil.writeFile(nuevo, "@ATTRIBUTE class {");
    for (int j = 0; j < labelReady.length - 1; j++) {
        FileUtil.writeFile(nuevo, labelReady[j] + ",");
    }
    FileUtil.writeFile(nuevo, labelReady[labelReady.length - 1] + "}" + doubleLine);

    FileUtil.writeFile(nuevo, "@DATA\n");

    for (int pos = 0; pos < searcher.maxDoc(); pos++) {

        if (!reader.isDeleted(pos)) {

            TermFreqVector vector = reader.getTermFreqVector(pos, content);

            if (vector != null) {
                int[] origen = vector.getTermFrequencies();
                String[] termsI = vector.getTerms();

                int[] positions = new int[origen.length];

                for (int k = 0; k < origen.length; k++) {
                    positions[k] = terms.get(termsI[k]);
                }

                Container[] escribir = convertir(positions, origen);
                Arrays.sort(escribir);

                FileUtil.writeFile(nuevo, "{");
                for (int j = 0; j < escribir.length; j++) {
                    FileUtil.writeFile(nuevo, (int) escribir[j].getKey() + " " + escribir[j].getValue() + ",");
                }

                FileUtil.writeFile(nuevo,
                        terms.size() + " " + searcher.doc(pos).getField(classF).stringValue() + "}\n");
            }

        }
    }

    //close files
    closeReaders(searcher, reader);

    //Test if the weka file works
    Instances test = testWekaFile(wekaFileName);

    return test;
}

From source file:net.sf.jclal.util.dataset.LuceneIndexToWekaDataSet.java

License:Open Source License

/**
 * It converts a index file of Lucene to a weka file for regression. The
 * weka file class are real. The used classifiers will work with numeric
 * real classe.//from w  w  w  .  j  a va2  s.com
 *
 * @param wekaFileName Path of weka file.
 * @param indexFile Path of index file based on Lucene. The document indexes
 * must have fields called "class" and "content". WARNING: The fields must
 * not contains any puntuaction sign.
 *
 * @return Instances of weka. The instances are sparse since it is about
 * text information.
 *
 * @throws FileNotFoundException If the file does not exists.
 * @throws IOException If happens a error while writing the file.
 */
public Instances convertLuceneToWekaRegression(String wekaFileName, String indexFile)
        throws FileNotFoundException, IOException {
    File nuevo = new File(wekaFileName);

    if (!verify(nuevo)) {
        return null;
    }

    FileUtil.writeFile(nuevo, "@RELATION " + nuevo.getName() + doubleLine);

    IndexSearcher searcher = new IndexSearcher(indexFile);

    IndexReader reader = searcher.getIndexReader();

    int total = reader.maxDoc();

    HashMap<String, Integer> terms = new HashMap<String, Integer>(total * 2);
    HashMap<String, Integer> labels = new HashMap<String, Integer>(total * 2);

    int i;
    for (int l = 0; l < total; l++) {
        if (!reader.isDeleted(l)) {
            TermFreqVector vector = reader.getTermFreqVector(l, content);

            Document doc = reader.document(l);

            String current = doc.getField(classF).stringValue();

            if (!labels.containsKey(current)) {
                labels.put(current, labels.size());
            }

            if (vector != null) {
                String listosI[] = vector.getTerms();
                for (i = 0; i < listosI.length; i++) {
                    if (!terms.containsKey(listosI[i])) {
                        terms.put(listosI[i], terms.size());
                    }

                }
            }
        }
    }

    Container[] terminos = convertir(terms);
    Arrays.sort(terminos);

    for (int j = 0; j < terminos.length; j++) {
        FileUtil.writeFile(nuevo, "@ATTRIBUTE " + (int) terminos[j].getKey() + " NUMERIC" + "\n");
    }

    FileUtil.writeFile(nuevo, "@ATTRIBUTE class REAL [0.0,");

    FileUtil.writeFile(nuevo, (labels.size() - 1) + ".0]" + doubleLine);

    FileUtil.writeFile(nuevo, "@DATA\n");

    for (int pos = 0; pos < searcher.maxDoc(); pos++) {

        if (!reader.isDeleted(pos)) {

            TermFreqVector vector = reader.getTermFreqVector(pos, content);

            if (vector != null) {
                int[] origen = vector.getTermFrequencies();
                String[] termsI = vector.getTerms();

                int[] positions = new int[origen.length];

                for (int k = 0; k < origen.length; k++) {
                    positions[k] = terms.get(termsI[k]);
                }

                Container[] escribir = convertir(positions, origen);
                Arrays.sort(escribir);

                FileUtil.writeFile(nuevo, "{");
                for (int j = 0; j < escribir.length; j++) {
                    FileUtil.writeFile(nuevo, (int) escribir[j].getKey() + " " + escribir[j].getValue() + ",");
                }

                FileUtil.writeFile(nuevo, terms.size() + " "
                        + labels.get(searcher.doc(pos).getField(classF).stringValue()) + ".0}\n");
            }

        }
    }

    //close files
    closeReaders(searcher, reader);

    //Test if the weka file works
    Instances test = testWekaFile(wekaFileName);

    return test;
}

From source file:net.sf.logsaw.index.internal.LuceneIndexServiceImpl.java

License:Open Source License

private Date getLatestEntryDate(ILogResource log) throws CoreException {
    if (!hasDateComponent(log)) {
        return null;
    }/*from ww w  .  j a va  2s  .c om*/

    ARunWithIndexReader<Date> runnable = new ARunWithIndexReader<Date>() {

        /* (non-Javadoc)
         * @see net.sf.logsaw.index.impl.ARunWithIndexReader#doRunWithIndexReader(org.apache.lucene.index.IndexReader, net.sf.logsaw.core.framework.ILogResource)
         */
        @Override
        protected Date doRunWithIndexReader(IndexReader reader, ILogResource log) throws CoreException {
            if (reader == null) {
                // Index does not exist yet
                return null;
            }
            int i = reader.maxDoc();
            if (i > 0) {
                try {
                    Document doc = reader.document(i - 1);
                    String val = doc.get(log.getDialect().getFieldProvider().getTimestampField().getKey());
                    return log.getDialect().getFieldProvider().getTimestampField().fromIndexedValue(val);
                } catch (IOException e) {
                    // Unexpected exception; wrap with CoreException
                    throw new CoreException(new Status(IStatus.ERROR, IndexPlugin.PLUGIN_ID,
                            NLS.bind(Messages.LuceneIndexService_error_failedToReadIndex,
                                    new Object[] { log.getName(), e.getLocalizedMessage() }),
                            e));
                }
            }
            return null;
        }
    };
    return runnable.runWithIndexReader(log);
}

From source file:net.sf.zekr.engine.search.lucene.QuranTextSearcher.java

@Override
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
    BitSet bits = new BitSet(reader.maxDoc());
    for (int i = 0; i < reader.maxDoc(); i++) {
        Document doc = reader.document(i);
        IQuranLocation loc = new QuranLocation(doc.getField("location").stringValue());
        if (searchScope.includes(loc)) {
            bits.set(i);//from ww  w . ja va2  s.c om
        }
    }
    return new DocIdBitSet(bits);
}

From source file:nl.elucidator.maven.analyzer.indexer.IndexSearcher.java

License:Apache License

public Set<ArtifactInfo> getUniqueGAV() throws IOException, ComponentLookupException {
    IndexingContext centralContext = indexUpdater.getIndexContext();
    centralContext.lock();// ww  w . ja  v  a 2  s .  com
    Set<ArtifactInfo> artifactInfoSet = new HashSet<ArtifactInfo>();

    try {
        final IndexReader ir = centralContext.getIndexReader();

        for (int i = 0; i < ir.maxDoc(); i++) {
            if (!ir.isDeleted(i)) {
                final Document doc = ir.document(i);

                final ArtifactInfo ai = IndexUtils.constructArtifactInfo(doc, centralContext);
                if (ai != null) {
                    artifactInfoSet.add(ai);
                }
            }
        }

    } catch (CorruptIndexException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    } finally {
        centralContext.unlock();
    }
    return artifactInfoSet;
}