Example usage for org.apache.lucene.index IndexReader hasDeletions

List of usage examples for org.apache.lucene.index IndexReader hasDeletions

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader hasDeletions.

Prototype

public boolean hasDeletions() 

Source Link

Document

Returns true if any documents have been deleted.

Usage

From source file:net.semanticmetadata.lire.searchers.custom.SingleNddCeddImageSearcher.java

License:Open Source License

protected void init(IndexReader reader) {
    this.reader = reader;
    if (reader.hasDeletions()) {
        throw new UnsupportedOperationException(
                "The index has to be optimized first to be cached! Use IndexWriter.forceMerge(0) to do this.");
    }/*from   w  ww  . j  a v  a  2 s  .c om*/
    docs = new TreeSet<SimpleResult>();
    try {
        this.cachedInstance = (GlobalFeature) this.descriptorClass.newInstance();
        if (fieldName == null)
            fieldName = this.cachedInstance.getFieldName();
    } catch (InstantiationException e) {
        logger.log(Level.SEVERE, "Error instantiating class for generic image searcher ("
                + descriptorClass.getName() + "): " + e.getMessage());
    } catch (IllegalAccessException e) {
        logger.log(Level.SEVERE, "Error instantiating class for generic image searcher ("
                + descriptorClass.getName() + "): " + e.getMessage());
    }
    // put all respective features into an in-memory cache ...
    if (isCaching && reader != null) {
        int docs = reader.numDocs();
        featureCache = new ArrayList<double[]>(docs);
        try {
            Document d;
            for (int i = 0; i < docs; i++) {
                d = reader.document(i);
                cachedInstance.setByteArrayRepresentation(d.getField(fieldName).binaryValue().bytes,
                        d.getField(fieldName).binaryValue().offset, d.getField(fieldName).binaryValue().length);
                // normalize features,o we can use L1
                if (!halfDimensions) {
                    featureCache.add(normalize(cachedInstance.getFeatureVector()));
                } else {
                    featureCache.add(crunch(cachedInstance.getFeatureVector()));
                }
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

From source file:net.semanticmetadata.lire.searchers.custom.TopDocsImageSearcher.java

License:Open Source License

/**
 * @param results//from w  w  w  .  j  av a 2s .c  o m
 * @param reader
 * @param globalFeature
 * @return the maximum distance found for normalizing.
 * @throws java.io.IOException
 */
protected double findSimilar(TopDocs results, IndexReader reader, GlobalFeature globalFeature)
        throws IOException {
    double maxDistance = -1d, overallMaxDistance = -1d;
    boolean hasDeletions = reader.hasDeletions();

    // clear result set ...
    docs.clear();
    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);

    int docs = results.totalHits;
    for (int i = 0; i < docs; i++) {
        if (reader.hasDeletions() && !liveDocs.get(i))
            continue; // if it is deleted, just ignore it.

        Document d = reader.document(results.scoreDocs[i].doc);
        double distance = getDistance(d, globalFeature);
        assert (distance >= 0);
        // calculate the overall max distance to normalize score afterwards
        if (overallMaxDistance < distance) {
            overallMaxDistance = distance;
        }
        // if it is the first document:
        if (maxDistance < 0) {
            maxDistance = distance;
        }
        // if the array is not full yet:
        if (this.docs.size() < maxHits) {
            this.docs.add(new SimpleResult(distance, results.scoreDocs[i].doc));
            if (distance > maxDistance)
                maxDistance = distance;
        } else if (distance < maxDistance) {
            // if it is nearer to the sample than at least on of the current set:
            // remove the last one ...
            this.docs.remove(this.docs.last());
            // add the new one ...
            this.docs.add(new SimpleResult(distance, results.scoreDocs[i].doc));
            // and set our new distance border ...
            maxDistance = this.docs.last().getDistance();
        }
    }
    return maxDistance;
}

From source file:net.semanticmetadata.lire.searchers.FastOpponentImageSearcher.java

License:Open Source License

/**
 * @param reader/*from  w w  w .j  ava 2s  .co m*/
 * @param globalFeature
 * @return the maximum distance found for normalizing.
 * @throws java.io.IOException
 */
protected double findSimilar(IndexReader reader, GlobalFeature globalFeature) throws IOException {
    maxDistance = -1f;
    // clear result set ...
    docs.clear();
    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);
    Document d;
    double tmpDistance;
    int docs = reader.numDocs();
    byte[] histogram = globalFeature.getByteArrayRepresentation();
    for (int i = 0; i < docs; i++) {
        if (reader.hasDeletions() && !liveDocs.get(i))
            continue; // if it is deleted, just ignore it.

        d = reader.document(i);
        tmpDistance = getDistance(d, histogram);
        assert (tmpDistance >= 0);
        // calculate the overall max distance to normalize score afterwards
        //            if (overallMaxDistance < tmpDistance) {
        //                overallMaxDistance = tmpDistance;
        //            }
        // if it is the first document:
        if (maxDistance < 0) {
            maxDistance = tmpDistance;
        }
        // if the array is not full yet:
        if (this.docs.size() < maxHits) {
            this.docs.add(new SimpleResult(tmpDistance, i));
            if (tmpDistance > maxDistance)
                maxDistance = tmpDistance;
        } else if (tmpDistance < maxDistance) {
            // if it is nearer to the sample than at least on of the current set:
            // remove the last one ...
            this.docs.remove(this.docs.last());
            // add the new one ...
            this.docs.add(new SimpleResult(tmpDistance, i));
            // and set our new distance border ...
            maxDistance = this.docs.last().getDistance();
        }
    }
    return maxDistance;
}

From source file:net.semanticmetadata.lire.searchers.forevaluations.GenericFastImageSearcherForEvaluation.java

License:Open Source License

/**
 * @param reader//from  ww w.j  av  a2s .c o m
 * @param lireFeature
 * @return the maximum distance found for normalizing.
 * @throws IOException
 */
protected double findSimilar(IndexReader reader, LireFeature lireFeature) throws IOException {
    maxDistance = -1d;

    // clear result set ...
    docs.clear();
    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);
    Document d;
    double tmpDistance;
    int docs = reader.numDocs();
    if (!isCaching) {
        // we read each and every document from the index and then we compare it to the query.
        for (int i = 0; i < docs; i++) {
            if (reader.hasDeletions() && !liveDocs.get(i))
                continue; // if it is deleted, just ignore it.

            d = reader.document(i);
            tmpDistance = getDistance(d, lireFeature);
            assert (tmpDistance >= 0);
            // if the array is not full yet:
            if (this.docs.size() < maxHits) {
                this.docs.add(new SimpleResultForEvaluation(tmpDistance, i,
                        d.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]));
                if (tmpDistance > maxDistance)
                    maxDistance = tmpDistance;
            } else if (tmpDistance < maxDistance) {
                // if it is nearer to the sample than at least on of the current set:
                // remove the last one ...
                this.docs.remove(this.docs.last());
                // add the new one ...
                this.docs.add(new SimpleResultForEvaluation(tmpDistance, i,
                        d.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]));
                // and set our new distance border ...
                maxDistance = this.docs.last().getDistance();
            }
        }
    } else {
        LinkedList<Consumer> tasks = new LinkedList<Consumer>();
        LinkedList<Thread> threads = new LinkedList<Thread>();
        Consumer consumer;
        Thread thread;
        Thread p = new Thread(new Producer());
        p.start();
        for (int i = 0; i < numThreads; i++) {
            consumer = new Consumer(lireFeature);
            thread = new Thread(consumer);
            thread.start();
            tasks.add(consumer);
            threads.add(thread);
        }
        for (Thread next : threads) {
            try {
                next.join();
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
        }
        TreeSet<SimpleResultForEvaluation> tmpDocs;
        boolean flag;
        SimpleResultForEvaluation simpleResult;
        for (Consumer task : tasks) {
            tmpDocs = task.getResult();
            flag = true;
            while (flag && (tmpDocs.size() > 0)) {
                simpleResult = tmpDocs.pollFirst();
                if (this.docs.size() < maxHits) {
                    this.docs.add(simpleResult);
                    if (simpleResult.getDistance() > maxDistance)
                        maxDistance = simpleResult.getDistance();
                } else if (simpleResult.getDistance() < maxDistance) {
                    //                        this.docs.remove(this.docs.last());
                    this.docs.pollLast();
                    this.docs.add(simpleResult);
                    maxDistance = this.docs.last().getDistance();
                } else
                    flag = false;
            }
        }
    }
    return maxDistance;
}

From source file:net.semanticmetadata.lire.searchers.forevaluations.GenericFastImageSearcherForEvaluation.java

License:Open Source License

public ImageDuplicates findDuplicates(IndexReader reader) throws IOException {
    // get the first document:
    SimpleImageDuplicates simpleImageDuplicates = null;
    //        try {
    //            if (!IndexReader.indexExists(reader.directory()))
    //                throw new FileNotFoundException("No index found at this specific location.");
    Document doc = reader.document(0);

    LireFeature lireFeature = extractorItem.getFeatureInstance();
    if (doc.getField(fieldName).binaryValue() != null && doc.getField(fieldName).binaryValue().length > 0)
        lireFeature.setByteArrayRepresentation(doc.getField(fieldName).binaryValue().bytes,
                doc.getField(fieldName).binaryValue().offset, doc.getField(fieldName).binaryValue().length);

    HashMap<Double, List<String>> duplicates = new HashMap<Double, List<String>>();

    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);

    int docs = reader.numDocs();
    int numDuplicates = 0;
    for (int i = 0; i < docs; i++) {
        if (reader.hasDeletions() && !liveDocs.get(i))
            continue; // if it is deleted, just ignore it.

        Document d = reader.document(i);
        double distance = getDistance(d, lireFeature);

        if (!duplicates.containsKey(distance)) {
            duplicates.put(distance, new LinkedList<String>());
        } else {/*from ww  w . j  a v  a  2  s  .c o m*/
            numDuplicates++;
        }
        duplicates.get(distance).add(d.getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue());
    }

    if (numDuplicates == 0)
        return null;

    LinkedList<List<String>> results = new LinkedList<List<String>>();
    for (double d : duplicates.keySet()) {
        if (duplicates.get(d).size() > 1) {
            results.add(duplicates.get(d));
        }
    }
    simpleImageDuplicates = new SimpleImageDuplicates(results);
    //        } catch (InstantiationException e) {
    //            logger.log(Level.SEVERE, "Error instantiating class for generic image searcher: " + e.getMessage());
    //        } catch (IllegalAccessException e) {
    //            logger.log(Level.SEVERE, "Error instantiating class for generic image searcher: " + e.getMessage());
    //        }
    return simpleImageDuplicates;

}

From source file:net.semanticmetadata.lire.searchers.GenericFastImageSearcher.java

License:Open Source License

/**
 * @param reader// w  ww. ja v a 2s. c  om
 * @param lireFeature
 * @return the maximum distance found for normalizing.
 * @throws IOException
 */
protected double findSimilar(IndexReader reader, LireFeature lireFeature) throws IOException {
    maxDistance = -1d;

    // clear result set ...
    docs.clear();
    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);
    Document d;
    double tmpDistance;
    int docs = reader.numDocs();
    if (!isCaching) {
        // we read each and every document from the index and then we compare it to the query.
        for (int i = 0; i < docs; i++) {
            if (reader.hasDeletions() && !liveDocs.get(i))
                continue; // if it is deleted, just ignore it.

            d = reader.document(i);
            tmpDistance = getDistance(d, lireFeature);
            assert (tmpDistance >= 0);
            // if the array is not full yet:
            if (this.docs.size() < maxHits) {
                this.docs.add(new SimpleResult(tmpDistance, i));
                if (tmpDistance > maxDistance)
                    maxDistance = tmpDistance;
            } else if (tmpDistance < maxDistance) {
                // if it is nearer to the sample than at least on of the current set:
                // remove the last one ...
                this.docs.remove(this.docs.last());
                // add the new one ...
                this.docs.add(new SimpleResult(tmpDistance, i));
                // and set our new distance border ...
                maxDistance = this.docs.last().getDistance();
            }
        }
    } else {
        LinkedList<Consumer> tasks = new LinkedList<Consumer>();
        LinkedList<Thread> threads = new LinkedList<Thread>();
        Consumer consumer;
        Thread thread;
        Thread p = new Thread(new Producer());
        p.start();
        for (int i = 0; i < numThreads; i++) {
            consumer = new Consumer(lireFeature);
            thread = new Thread(consumer);
            thread.start();
            tasks.add(consumer);
            threads.add(thread);
        }
        for (Thread next : threads) {
            try {
                next.join();
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
        }
        TreeSet<SimpleResult> tmpDocs;
        boolean flag;
        SimpleResult simpleResult;
        for (Consumer task : tasks) {
            tmpDocs = task.getResult();
            flag = true;
            while (flag && (tmpDocs.size() > 0)) {
                simpleResult = tmpDocs.pollFirst();
                if (this.docs.size() < maxHits) {
                    this.docs.add(simpleResult);
                    if (simpleResult.getDistance() > maxDistance)
                        maxDistance = simpleResult.getDistance();
                } else if (simpleResult.getDistance() < maxDistance) {
                    //                        this.docs.remove(this.docs.last());
                    this.docs.pollLast();
                    this.docs.add(simpleResult);
                    maxDistance = this.docs.last().getDistance();
                } else
                    flag = false;
            }
        }
    }
    return maxDistance;
}

From source file:net.semanticmetadata.lire.searchers.TestSearching.java

License:Open Source License

public void testSeparateIndex() throws IOException, IllegalAccessException, InstantiationException {
    Cluster[] cvsurf512 = Cluster.readClusters(codebookPath + "CvSURF512");
    Cluster[] simpleceddcvsurf512 = Cluster.readClusters(codebookPath + "SIMPLEdetCVSURFCEDD512");

    ParallelIndexer parallelIndexer = new ParallelIndexer(DocumentBuilder.NUM_OF_THREADS, indexPath,
            testExtensiveRed, numOfClusters, numOfDocsForVocabulary, aggregatorClass);
    parallelIndexer.addExtractor(globalFeatureClass);
    parallelIndexer.addExtractor(localFeatureClass, cvsurf512);
    parallelIndexer.addExtractor(globalFeatureClass, keypointDetector, simpleceddcvsurf512);
    parallelIndexer.run();//w w w  .  j a  v a2s  .co m

    ParallelIndexer parallelIndexerSeparate = new ParallelIndexer(DocumentBuilder.NUM_OF_THREADS,
            indexPathSeparate, testExtensiveBlack, indexPath);
    parallelIndexerSeparate.run();

    IndexReader readerIndex = DirectoryReader
            .open(new RAMDirectory(FSDirectory.open(Paths.get(indexPath)), IOContext.READONCE));
    System.out.println("Documents in the reader: " + readerIndex.maxDoc());

    IndexReader readerQueries = DirectoryReader
            .open(new RAMDirectory(FSDirectory.open(Paths.get(indexPathSeparate)), IOContext.READONCE));
    System.out.println("Documents in the reader: " + readerQueries.maxDoc());

    GenericFastImageSearcher ceddSearcher = new GenericFastImageSearcher(5, globalFeatureClass, true,
            readerIndex);
    GenericFastImageSearcher cvsurfsearcher = new GenericFastImageSearcher(5, localFeatureClass,
            aggregatorClass.newInstance(), 512, true, readerIndex, indexPath + ".config");
    GenericFastImageSearcher simpleceddcvsurfsearcher = new GenericFastImageSearcher(5, globalFeatureClass,
            keypointDetector, aggregatorClass.newInstance(), 512, true, readerIndex, indexPath + ".config");

    Bits liveDocs = MultiFields.getLiveDocs(readerQueries);

    ImageSearchHits ceddhits, cvsurfhits, simpleceddcvsurfhits;
    Document queryDoc;
    String queryfile, hitFile;
    int counter = 0;
    for (int i = 0; i < readerQueries.maxDoc(); i++) {
        if (readerQueries.hasDeletions() && !liveDocs.get(i))
            continue;

        queryDoc = readerQueries.document(i);
        queryfile = queryDoc.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0];
        queryfile = queryfile.substring(queryfile.lastIndexOf('\\') + 1);
        System.out.println(counter + ". Query image: " + queryfile);
        ceddhits = ceddSearcher.search(queryDoc, readerIndex);
        cvsurfhits = cvsurfsearcher.search(queryDoc, readerIndex);
        simpleceddcvsurfhits = simpleceddcvsurfsearcher.search(queryDoc, readerIndex);

        System.out.println("Global:");
        for (int y = 0; y < ceddhits.length(); y++) {
            hitFile = readerIndex.document(ceddhits.documentID(y))
                    .getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0];
            hitFile = hitFile.substring(hitFile.lastIndexOf('\\') + 1);
            System.out.println(y + ". " + hitFile + " " + ceddhits.score(y));
        }

        System.out.println("Local:");
        for (int y = 0; y < cvsurfhits.length(); y++) {
            hitFile = readerIndex.document(cvsurfhits.documentID(y))
                    .getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0];
            hitFile = hitFile.substring(hitFile.lastIndexOf('\\') + 1);
            System.out.println(y + ". " + hitFile + " " + cvsurfhits.score(y));
        }

        System.out.println("Simple:");
        for (int y = 0; y < simpleceddcvsurfhits.length(); y++) {
            hitFile = readerIndex.document(simpleceddcvsurfhits.documentID(y))
                    .getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0];
            hitFile = hitFile.substring(hitFile.lastIndexOf('\\') + 1);
            System.out.println(y + ". " + hitFile + " " + simpleceddcvsurfhits.score(y));
        }
        System.out.println();
        counter++;
    }
}

From source file:org.apache.gaelucene.tools.LuceneIndexPushUtil.java

License:Apache License

public static void main(String[] args) throws IOException {
    for (int i = 0; i < args.length; i++) {
        if ("-app-url".equals(args[i])) {
            gaeAppURL = args[++i];//w  ww  .j a v a2  s  .  c om
        } else if ("-auth-cookie".equals(args[i])) {
            authCookie = args[++i];
        } else if ("-src".equals(args[i])) {
            sourceDirName = args[++i];
        } else if ("-cat".equals(args[i])) {
            category = args[++i];
        } else if ("-rec-file".equals(args[i])) {
            jobRecFileName = args[++i];
        }
    }

    if (gaeAppURL == null || authCookie == null || sourceDirName == null || category == null
            || jobRecFileName == null) {
        System.err.println(USAGE);
        System.exit(-1);
    }

    File sourceDir = new File(sourceDirName);
    if (!sourceDir.exists()) {
        System.err.println("'" + sourceDir.getAbsolutePath() + "' DOES NOT EXIST!");
        System.exit(-1);
    }
    sourceDirName = sourceDir.getAbsolutePath();

    // load filenames that have been uploaded successfully last time.
    HashSet<String> uploadedRec = new HashSet<String>();
    File jobRecFile = new File(jobRecFileName);
    if (jobRecFile.exists()) {
        LineNumberReader reader = new LineNumberReader(new FileReader(jobRecFile));
        for (String line = reader.readLine(); line != null;) {
            if (line.indexOf(" OK") > -1) {
                line = line.substring(0, line.indexOf(" ")).trim();
            }
            uploadedRec.add(line);
            line = reader.readLine();
        }
        reader.close();
    }

    System.out.println("[INFO ] - trying to open index under " + sourceDirName);
    IndexReader indexReader = IndexReader.open(sourceDir);
    int maxDoc = indexReader.maxDoc();
    int numDocs = indexReader.numDocs();
    long version = indexReader.getVersion();
    boolean hasDeletions = indexReader.hasDeletions();
    boolean isOptimized = indexReader.isOptimized();

    System.out.println("maxDoc:" + maxDoc);
    System.out.println("numDocs:" + numDocs);
    System.out.println("version:" + version);
    System.out.println("hasDeletions:" + hasDeletions);
    System.out.println("isOptimized:" + isOptimized);

    // record filenames that were uploaded successfully
    BufferedWriter dataWriter = new BufferedWriter(
            new OutputStreamWriter(new FileOutputStream(jobRecFile, true)));
    System.out.println("[INFO ] - trying to synchronize the index files onto gae...");
    File[] files = sourceDir.listFiles();
    for (int i = 0; i < files.length; i++) {
        File file = files[i];
        if (uploadedRec.contains(file.getName())) {
            System.out.println("[INFO ] - skip file '" + file.getName() + "'");
            continue;
        }
        try {
            commitFile(file, category, version, i);
            dataWriter.write(file.getName() + " OK\n");
        } catch (IOException ioe) {
            System.out.println("[WARN ] - failed to upload '" + file.getName() + "', because:" + ioe);
        }
    }
    dataWriter.flush();
    dataWriter.close();

    System.out.println("[INFO ] - trying to activate the index...");
    try {
        activateIndex(category, version);
    } catch (IOException ioe) {
        System.out.println("[WARN ] - failed to activate the index, because:" + ioe);
    }
}

From source file:org.dyndns.andreasbaumann.LuceneAnalyzer.java

License:Open Source License

private static void printGlobalInfo(IndexReader indexReader, boolean printHeaders, boolean isSolr,
        SolrIndexSearcher solrSearch) throws IOException {
    if (printHeaders) {
        System.out.println("Global Information:");
        System.out.println("===================");
    }//from w  w w.ja  v  a  2 s . c o m

    System.out.println("\tnumber of documents: " + indexReader.numDocs());

    // we should get the number of features differently, this is inefficient, but Lucene
    // has no notion of global statistics (because the default weighting schema doesn't
    // make use of it!)
    int nofFeatures = 0;
    int nofTokens = 0;
    TermEnum terms = indexReader.terms();
    while (terms.next()) {
        Term term = terms.term();
        int df = terms.docFreq();
        nofFeatures++;
        nofTokens += df;
    }
    System.out.println("\ttotal number of features: " + nofFeatures);
    System.out.println("\ttotal number of tokens: " + nofTokens);

    System.out.println("\tversion: " + indexReader.getVersion());
    System.out.println("\tstill current: " + indexReader.isCurrent());

    //TODO: we don't get segment information!
    //System.out.println( "is optimized:" + segmentInfos.size( ) == 1 && !indexReader.hasDeletions( ) );
    System.out.println("\tmaximal document number: " + indexReader.maxDoc());
    System.out.println("\thas deletions: " + indexReader.hasDeletions());

    if (isSolr) {
        System.out.println("\tSolr version: " + solrSearch.getVersion());
    }

    System.out.println("");
}

From source file:org.eclipse.che.api.search.server.impl.LuceneSearcher.java

License:Open Source License

private void printStatistic() throws IOException {
    if (LOG.isDebugEnabled()) {
        IndexSearcher luceneSearcher = null;
        try {/*w w  w . j  ava2 s . co  m*/
            searcherManager.maybeRefresh();
            luceneSearcher = searcherManager.acquire();
            IndexReader reader = luceneSearcher.getIndexReader();
            LOG.debug(
                    "IndexReader numDocs={} numDeletedDocs={} maxDoc={} hasDeletions={}. Writer numDocs={} numRamDocs={} hasPendingMerges={}  hasUncommittedChanges={} hasDeletions={}",
                    reader.numDocs(), reader.numDeletedDocs(), reader.maxDoc(), reader.hasDeletions(),
                    luceneIndexWriter.numDocs(), luceneIndexWriter.numRamDocs(),
                    luceneIndexWriter.hasPendingMerges(), luceneIndexWriter.hasUncommittedChanges(),
                    luceneIndexWriter.hasDeletions());
        } finally {
            searcherManager.release(luceneSearcher);
        }
    }
}