Example usage for org.apache.lucene.index IndexReader maxDoc

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader maxDoc.

Prototype

public abstract int maxDoc();

Source Link

Document

Returns one greater than the largest possible document number.

Usage

From source file:net.semanticmetadata.lire.TestImageSearcher.java

License:Open Source License

public void testCachingSearcher() throws IOException {
    IndexReader ir = DirectoryReader.open(FSDirectory.open(new File("C:\\Temp\\test-100k-cedd-idx")));
    GenericFastImageSearcher is = new GenericFastImageSearcher(1, CEDD.class, true, ir);
    SingleNddCeddImageSearcher nis = new SingleNddCeddImageSearcher(ir);
    LinkedList<Document> q = new LinkedList<Document>();
    for (int i = 0; i < Math.min(1000, ir.maxDoc()); i++) {
        q.add(ir.document(i));//from   w  w w  . j  a v  a2s.c om
    }

    long time = System.currentTimeMillis();
    int count = 0;
    for (Iterator<Document> iterator = q.iterator(); iterator.hasNext();) {
        Document next = iterator.next();
        String id = is.search(next, ir).doc(0).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0];
        CEDD cedd = new CEDD();
        BytesRef binaryValue = next.getBinaryValue(cedd.getFieldName());
        cedd.setByteArrayRepresentation(binaryValue.bytes, binaryValue.offset, binaryValue.length);

        String s = nis.findMostSimilar(cedd).getDocument().getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0];
        String qID = next.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0];
        System.out.println(s.equals(id) + " " + id.equals(qID) + " " + qID.equals(s));
        count++;
        if (count > 100)
            break;
    }
    long l = System.currentTimeMillis() - time;
    System.out.printf("Tested %d search requests on %d documents: overall time of %d:%02d, %.2f ms per search",
            count, ir.maxDoc(), l / (1000 * 60), (l / 1000) % 60, ((float) l / (float) count));

}

From source file:net.semanticmetadata.lire.TestImageSearcher.java

License:Open Source License

public void testCustomCachingSearcher() throws IOException {
    IndexReader ir = DirectoryReader.open(FSDirectory.open(new File("C:\\Temp\\test-100k-cedd-idx")));
    SingleNddCeddImageSearcher is = new SingleNddCeddImageSearcher(ir);

    LinkedList<LireFeature> q = new LinkedList<LireFeature>();
    for (int i = 0; i < ir.maxDoc(); i++) {
        Document d = ir.document(i);
        CEDD cedd = new CEDD();
        BytesRef binaryValue = d.getBinaryValue(cedd.getFieldName());
        cedd.setByteArrayRepresentation(binaryValue.bytes, binaryValue.offset, binaryValue.length);
        q.add(cedd);/* w w  w.  j av a2  s .  c  o  m*/
    }

    long time = System.currentTimeMillis();
    int count = 0;
    for (Iterator<LireFeature> iterator = q.iterator(); iterator.hasNext();) {
        LireFeature next = iterator.next();
        is.findMostSimilar(next);
        count++;
        if (count > 100)
            break;
    }
    long l = System.currentTimeMillis() - time;
    System.out.printf("Tested %d search requests on %d documents: overall time of %d:%02d, %.2f ms per search",
            count, ir.maxDoc(), l / (1000 * 60), (l / 1000) % 60, ((float) l / (float) count));
}

From source file:net.semanticmetadata.lire.TestImageSearcher.java

License:Open Source License

public void testCachingSearcherParallel() throws IOException, InterruptedException {
    final IndexReader ir = DirectoryReader.open(FSDirectory.open(new File("C:\\Temp\\test-100k-cedd-idx")));
    SingleNddCeddImageSearcher is = new SingleNddCeddImageSearcher(ir);

    LinkedList<LireFeature> q = new LinkedList<LireFeature>();
    for (int i = 0; i < ir.maxDoc(); i++) {
        Document d = ir.document(i);
        CEDD cedd = new CEDD();
        BytesRef binaryValue = d.getBinaryValue(cedd.getFieldName());
        cedd.setByteArrayRepresentation(binaryValue.bytes, binaryValue.offset, binaryValue.length);
        q.add(cedd);//from w  w w .  jav  a  2s  .com
    }

    int count = 0;
    Thread[] searchers = new Thread[3];
    final LinkedBlockingQueue<LireFeature> queryQueue = new LinkedBlockingQueue<LireFeature>(1000);
    for (int i = 0; i < searchers.length; i++) {
        searchers[i] = new Thread(new Runnable() {
            @Override
            public void run() {
                SingleNddCeddImageSearcher is = new SingleNddCeddImageSearcher(ir);
                LireFeature remove;
                while ((remove = queryQueue.remove()) instanceof CEDD) {
                    try {
                        is.findMostSimilar(remove);
                    } catch (IOException e) {
                        e.printStackTrace();
                    }
                }
            }

        });
        searchers[i].start();
    }
    long time = System.currentTimeMillis();
    for (Iterator<LireFeature> iterator = q.iterator(); iterator.hasNext() && count < 1000;) {
        LireFeature next = iterator.next();
        try {
            queryQueue.put(next);
            count++;
        } catch (InterruptedException e) {
            e.printStackTrace();
        }
    }
    for (int i = 0; i < 8; i++) {
        queryQueue.put(new ScalableColor());
    }
    for (int i = 0; i < searchers.length; i++) {
        searchers[i].join();
    }
    long l = System.currentTimeMillis() - time;
    System.out.printf("Tested %d search requests on %d documents: overall time of %d:%02d, %.2f ms per search",
            count, ir.maxDoc(), l / (1000 * 60), (l / 1000) % 60, ((float) l / (float) count));
}

From source file:net.semanticmetadata.lire.TestImageSearcher.java

License:Open Source License

public void testCachingSearcherParallelWithBundling() throws IOException, InterruptedException {
    final IndexReader ir = DirectoryReader.open(FSDirectory.open(new File("C:\\Temp\\test-100k-cedd-idx")));

    LinkedList<LireFeature> q = new LinkedList<LireFeature>();
    for (int i = 0; i < ir.maxDoc(); i++) {
        Document d = ir.document(i);
        CEDD cedd = new CEDD();
        BytesRef binaryValue = d.getBinaryValue(cedd.getFieldName());
        cedd.setByteArrayRepresentation(binaryValue.bytes, binaryValue.offset, binaryValue.length);
        q.add(cedd);//  w ww .  ja v a  2s .  co m
    }

    int count = 0;
    Thread[] searchers = new Thread[4];
    final LinkedBlockingQueue<WorkItem> queryQueue = new LinkedBlockingQueue<WorkItem>(100);
    for (int i = 0; i < searchers.length; i++) {
        searchers[i] = new Thread(new Runnable() {
            @Override
            public void run() {
                SingleNddCeddImageSearcher is = new SingleNddCeddImageSearcher(ir);
                WorkItem remove;
                while ((remove = queryQueue.remove()).features != null) {
                    try {
                        SimpleResult[] hits = is.findMostSimilar(remove.features);
                        for (int j = 0; j < hits.length; j++) {
                            if (hits[j].getIndexNumber() != remove.id[j])
                                System.err.println("oops");
                        }
                    } catch (IOException e) {
                        e.printStackTrace();
                    }
                }
            }

        });
        searchers[i].start();
    }
    long time = System.currentTimeMillis();
    LireFeature[] qarr = new LireFeature[10];
    int[] iarr = new int[10];
    int currentIndex = 0;
    int bundleCount = 0;
    Iterator<LireFeature> iterator = q.iterator();
    while (iterator.hasNext() && bundleCount < 200) {
        LireFeature next = iterator.next();
        try {
            iarr[currentIndex] = count;
            qarr[currentIndex++] = next;
            if (currentIndex >= qarr.length) { // do bundled search
                currentIndex = 0;
                queryQueue.put(new WorkItem(qarr.clone(), iarr.clone()));
                bundleCount++;
            }
            count++;
        } catch (InterruptedException e) {
            e.printStackTrace();
        }
    }
    for (int i = 0; i < 8; i++) {
        queryQueue.put(new WorkItem(null, null));
    }
    for (int i = 0; i < searchers.length; i++) {
        searchers[i].join();
    }
    long l = System.currentTimeMillis() - time;
    System.out.printf("Tested %d search requests on %d documents: overall time of %d:%02d, %.2f ms per search",
            count, ir.maxDoc(), l / (1000 * 60), (l / 1000) % 60, ((float) l / (float) count));
}

From source file:net.semanticmetadata.lire.TestImageSearcher.java

License:Open Source License

public void testCachingSearcherBundling() throws IOException {
    IndexReader ir = DirectoryReader.open(FSDirectory.open(new File("C:\\Temp\\test-100k-cedd-idx")));
    SingleNddCeddImageSearcher is = new SingleNddCeddImageSearcher(ir);

    LinkedList<LireFeature> q = new LinkedList<LireFeature>();
    for (int i = 0; i < ir.maxDoc(); i++) {
        Document d = ir.document(i);
        CEDD cedd = new CEDD();
        BytesRef binaryValue = d.getBinaryValue(cedd.getFieldName());
        cedd.setByteArrayRepresentation(binaryValue.bytes, binaryValue.offset, binaryValue.length);
        q.add(cedd);/*  w w w. j  a va  2s.  c o  m*/
    }

    long time = System.currentTimeMillis();
    int count = 0;
    LireFeature[] qarr = new LireFeature[10];
    int currentIndex = 0;
    for (Iterator<LireFeature> iterator = q.iterator(); iterator.hasNext();) {
        LireFeature next = iterator.next();
        qarr[currentIndex++] = next;
        if (currentIndex >= qarr.length) { // do bundled search
            currentIndex = 0;
            is.findMostSimilar(qarr);
        }
        count++;
        if (count > 999 & currentIndex == 0)
            break;
    }
    long l = System.currentTimeMillis() - time;
    System.out.printf("Tested %d search requests on %d documents: overall time of %d:%02d, %.2f ms per search",
            count, ir.maxDoc(), l / (1000 * 60), (l / 1000) % 60, ((float) l / (float) count));
}

From source file:net.sf.jclal.util.dataset.LuceneIndexToWekaDataSet.java

License:Open Source License

/**
 * It converts a index file of Lucene to a weka file for classification. The
 * weka file class are nominal. The classifiers will work with nominal
 * class./* w w w. j a v  a2s  . c om*/
 *
 *
 * @param wekaFileName Path of weka file.
 * @param indexFile Path of index file based on Lucene. The document indexes
 * must have fields called "class" and "content". WARNING: The fields must
 * not contains any puntuaction sign.
 *
 * @return Instances of weka. The instances are sparse since it is about
 * text information.
 *
 * @throws FileNotFoundException If the file does not exists.
 * @throws IOException If happens a error while writing the file.
 */
public Instances convertLuceneToWekaClassification(String wekaFileName, String indexFile)
        throws FileNotFoundException, IOException {
    File nuevo = new File(wekaFileName);

    if (!verify(nuevo)) {
        return null;
    }

    FileUtil.writeFile(nuevo, "@RELATION " + nuevo.getName() + doubleLine);

    IndexSearcher searcher = new IndexSearcher(indexFile);

    IndexReader reader = searcher.getIndexReader();

    int total = reader.maxDoc();

    HashMap<String, Integer> terms = new HashMap<String, Integer>(total * 2);
    Set<String> labels = new HashSet<String>(total * 2);

    int i;
    for (int l = 0; l < total; l++) {
        if (!reader.isDeleted(l)) {
            TermFreqVector vector = reader.getTermFreqVector(l, content);

            Document doc = reader.document(l);

            String current = doc.getField(classF).stringValue();

            labels.add(current);

            if (vector != null) {
                String listosI[] = vector.getTerms();
                for (i = 0; i < listosI.length; i++) {
                    if (!terms.containsKey(listosI[i])) {
                        terms.put(listosI[i], terms.size());
                    }

                }
            }
        }
    }

    String[] labelReady = new String[labels.size()];
    int posLabel = 0;
    for (String string : labels) {
        labelReady[posLabel] = string;
        posLabel++;
    }

    Container[] terminos = convertir(terms);
    Arrays.sort(terminos);

    for (int j = 0; j < terminos.length; j++) {
        FileUtil.writeFile(nuevo, "@ATTRIBUTE " + (int) terminos[j].getKey() + " NUMERIC" + "\n");
    }

    FileUtil.writeFile(nuevo, "@ATTRIBUTE class {");
    for (int j = 0; j < labelReady.length - 1; j++) {
        FileUtil.writeFile(nuevo, labelReady[j] + ",");
    }
    FileUtil.writeFile(nuevo, labelReady[labelReady.length - 1] + "}" + doubleLine);

    FileUtil.writeFile(nuevo, "@DATA\n");

    for (int pos = 0; pos < searcher.maxDoc(); pos++) {

        if (!reader.isDeleted(pos)) {

            TermFreqVector vector = reader.getTermFreqVector(pos, content);

            if (vector != null) {
                int[] origen = vector.getTermFrequencies();
                String[] termsI = vector.getTerms();

                int[] positions = new int[origen.length];

                for (int k = 0; k < origen.length; k++) {
                    positions[k] = terms.get(termsI[k]);
                }

                Container[] escribir = convertir(positions, origen);
                Arrays.sort(escribir);

                FileUtil.writeFile(nuevo, "{");
                for (int j = 0; j < escribir.length; j++) {
                    FileUtil.writeFile(nuevo, (int) escribir[j].getKey() + " " + escribir[j].getValue() + ",");
                }

                FileUtil.writeFile(nuevo,
                        terms.size() + " " + searcher.doc(pos).getField(classF).stringValue() + "}\n");
            }

        }
    }

    //close files
    closeReaders(searcher, reader);

    //Test if the weka file works
    Instances test = testWekaFile(wekaFileName);

    return test;
}

From source file:net.sf.jclal.util.dataset.LuceneIndexToWekaDataSet.java

License:Open Source License

/**
 * It converts a index file of Lucene to a weka file for regression. The
 * weka file class are real. The used classifiers will work with numeric
 * real classe.//from w  w  w  .  j  a va2  s.com
 *
 * @param wekaFileName Path of weka file.
 * @param indexFile Path of index file based on Lucene. The document indexes
 * must have fields called "class" and "content". WARNING: The fields must
 * not contains any puntuaction sign.
 *
 * @return Instances of weka. The instances are sparse since it is about
 * text information.
 *
 * @throws FileNotFoundException If the file does not exists.
 * @throws IOException If happens a error while writing the file.
 */
public Instances convertLuceneToWekaRegression(String wekaFileName, String indexFile)
        throws FileNotFoundException, IOException {
    File nuevo = new File(wekaFileName);

    if (!verify(nuevo)) {
        return null;
    }

    FileUtil.writeFile(nuevo, "@RELATION " + nuevo.getName() + doubleLine);

    IndexSearcher searcher = new IndexSearcher(indexFile);

    IndexReader reader = searcher.getIndexReader();

    int total = reader.maxDoc();

    HashMap<String, Integer> terms = new HashMap<String, Integer>(total * 2);
    HashMap<String, Integer> labels = new HashMap<String, Integer>(total * 2);

    int i;
    for (int l = 0; l < total; l++) {
        if (!reader.isDeleted(l)) {
            TermFreqVector vector = reader.getTermFreqVector(l, content);

            Document doc = reader.document(l);

            String current = doc.getField(classF).stringValue();

            if (!labels.containsKey(current)) {
                labels.put(current, labels.size());
            }

            if (vector != null) {
                String listosI[] = vector.getTerms();
                for (i = 0; i < listosI.length; i++) {
                    if (!terms.containsKey(listosI[i])) {
                        terms.put(listosI[i], terms.size());
                    }

                }
            }
        }
    }

    Container[] terminos = convertir(terms);
    Arrays.sort(terminos);

    for (int j = 0; j < terminos.length; j++) {
        FileUtil.writeFile(nuevo, "@ATTRIBUTE " + (int) terminos[j].getKey() + " NUMERIC" + "\n");
    }

    FileUtil.writeFile(nuevo, "@ATTRIBUTE class REAL [0.0,");

    FileUtil.writeFile(nuevo, (labels.size() - 1) + ".0]" + doubleLine);

    FileUtil.writeFile(nuevo, "@DATA\n");

    for (int pos = 0; pos < searcher.maxDoc(); pos++) {

        if (!reader.isDeleted(pos)) {

            TermFreqVector vector = reader.getTermFreqVector(pos, content);

            if (vector != null) {
                int[] origen = vector.getTermFrequencies();
                String[] termsI = vector.getTerms();

                int[] positions = new int[origen.length];

                for (int k = 0; k < origen.length; k++) {
                    positions[k] = terms.get(termsI[k]);
                }

                Container[] escribir = convertir(positions, origen);
                Arrays.sort(escribir);

                FileUtil.writeFile(nuevo, "{");
                for (int j = 0; j < escribir.length; j++) {
                    FileUtil.writeFile(nuevo, (int) escribir[j].getKey() + " " + escribir[j].getValue() + ",");
                }

                FileUtil.writeFile(nuevo, terms.size() + " "
                        + labels.get(searcher.doc(pos).getField(classF).stringValue()) + ".0}\n");
            }

        }
    }

    //close files
    closeReaders(searcher, reader);

    //Test if the weka file works
    Instances test = testWekaFile(wekaFileName);

    return test;
}

From source file:net.sf.logsaw.index.internal.LuceneIndexServiceImpl.java

License:Open Source License

private Date getLatestEntryDate(ILogResource log) throws CoreException {
    if (!hasDateComponent(log)) {
        return null;
    }/*from ww w  .  j a va  2s  .c om*/

    ARunWithIndexReader<Date> runnable = new ARunWithIndexReader<Date>() {

        /* (non-Javadoc)
         * @see net.sf.logsaw.index.impl.ARunWithIndexReader#doRunWithIndexReader(org.apache.lucene.index.IndexReader, net.sf.logsaw.core.framework.ILogResource)
         */
        @Override
        protected Date doRunWithIndexReader(IndexReader reader, ILogResource log) throws CoreException {
            if (reader == null) {
                // Index does not exist yet
                return null;
            }
            int i = reader.maxDoc();
            if (i > 0) {
                try {
                    Document doc = reader.document(i - 1);
                    String val = doc.get(log.getDialect().getFieldProvider().getTimestampField().getKey());
                    return log.getDialect().getFieldProvider().getTimestampField().fromIndexedValue(val);
                } catch (IOException e) {
                    // Unexpected exception; wrap with CoreException
                    throw new CoreException(new Status(IStatus.ERROR, IndexPlugin.PLUGIN_ID,
                            NLS.bind(Messages.LuceneIndexService_error_failedToReadIndex,
                                    new Object[] { log.getName(), e.getLocalizedMessage() }),
                            e));
                }
            }
            return null;
        }
    };
    return runnable.runWithIndexReader(log);
}

From source file:net.sf.zekr.engine.search.lucene.QuranTextSearcher.java

@Override
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
    BitSet bits = new BitSet(reader.maxDoc());
    for (int i = 0; i < reader.maxDoc(); i++) {
        Document doc = reader.document(i);
        IQuranLocation loc = new QuranLocation(doc.getField("location").stringValue());
        if (searchScope.includes(loc)) {
            bits.set(i);//from ww  w . ja va2  s.c om
        }
    }
    return new DocIdBitSet(bits);
}

From source file:nl.elucidator.maven.analyzer.indexer.IndexSearcher.java

License:Apache License

public Set<ArtifactInfo> getUniqueGAV() throws IOException, ComponentLookupException {
    IndexingContext centralContext = indexUpdater.getIndexContext();
    centralContext.lock();// ww  w . ja  v  a 2  s .  com
    Set<ArtifactInfo> artifactInfoSet = new HashSet<ArtifactInfo>();

    try {
        final IndexReader ir = centralContext.getIndexReader();

        for (int i = 0; i < ir.maxDoc(); i++) {
            if (!ir.isDeleted(i)) {
                final Document doc = ir.document(i);

                final ArtifactInfo ai = IndexUtils.constructArtifactInfo(doc, centralContext);
                if (ai != null) {
                    artifactInfoSet.add(ai);
                }
            }
        }

    } catch (CorruptIndexException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    } finally {
        centralContext.unlock();
    }
    return artifactInfoSet;
}