Example usage for org.apache.lucene.search IndexSearcher getIndexReader

List of usage examples for org.apache.lucene.search IndexSearcher getIndexReader

Introduction

In this page you can find the example usage for org.apache.lucene.search IndexSearcher getIndexReader.

Prototype

public IndexReader getIndexReader() 

Source Link

Document

Return the IndexReader this searches.

Usage

From source file:perf.IndexAndSearchOpenStreetMaps.java

License:Apache License

private static void queryIndex(String queryClass, int gons, int nearestTopN, String polyFile,
        boolean preBuildQueries, Double filterPercent, boolean doDistanceSort) throws IOException {
    IndexSearcher[] searchers = new IndexSearcher[NUM_PARTS];
    Directory[] dirs = new Directory[NUM_PARTS];
    long sizeOnDisk = 0;
    for (int part = 0; part < NUM_PARTS; part++) {
        dirs[part] = FSDirectory.open(Paths.get(getName(part, doDistanceSort)));
        searchers[part] = new IndexSearcher(DirectoryReader.open(dirs[part]));
        searchers[part].setQueryCache(null);
        for (String name : dirs[part].listAll()) {
            sizeOnDisk += dirs[part].fileLength(name);
        }//from   w w  w.j  av  a  2  s . c  o  m
    }
    //plotBKD(searchers[0].getIndexReader());
    System.out.println("INDEX SIZE: " + (sizeOnDisk / 1024. / 1024. / 1024.) + " GB");
    long bytes = 0;
    long maxDoc = 0;
    for (IndexSearcher s : searchers) {
        IndexReader r = s.getIndexReader();
        maxDoc += r.maxDoc();
        for (LeafReaderContext ctx : r.leaves()) {
            CodecReader cr = (CodecReader) ctx.reader();
            /*
            for(Accountable acc : cr.getChildResources()) {
              System.out.println("  " + Accountables.toString(acc));
            }
            */
            bytes += cr.ramBytesUsed();
        }
    }
    System.out.println("READER MB: " + (bytes / 1024. / 1024.));
    System.out.println("maxDoc=" + maxDoc);

    double bestQPS = Double.NEGATIVE_INFINITY;

    // million hits per second:
    double bestMHPS = Double.NEGATIVE_INFINITY;

    if (queryClass.equals("polyFile")) {

        // TODO: only load the double[][] here, so that we includ the cost of making Polygon and Query in each iteration!!
        List<Polygon[]> polygons = readPolygons(polyFile);

        // Uncomment to find the lost points!!

        /*
        BooleanQuery.Builder b = new BooleanQuery.Builder();
        b.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
        for(Query q : queries) {
          b.add(q, BooleanClause.Occur.MUST_NOT);
        }
        searchers[0].search(b.build(), new SimpleCollector() {
            private int markerCount;
            private SortedNumericDocValues docValues;
                
            @Override
            protected void doSetNextReader(LeafReaderContext context) throws IOException {
              docValues = context.reader().getSortedNumericDocValues("point");
            }
                
            @Override
            public boolean needsScores() {
              return false;
            }
                
            @Override
            public void collect(int doc) {
              docValues.setDocument(doc);
              int count = docValues.count();
              for (int i = 0; i < count; i++) {
                long encoded = docValues.valueAt(i);
                double docLatitude = LatLonPoint.decodeLatitude((int)(encoded >> 32));
                double docLongitude = LatLonPoint.decodeLongitude((int)(encoded & 0xFFFFFFFF));
                System.out.println("        WE.marker([" + docLatitude + ", " + docLongitude + "]).addTo(earth);");
              }
            }
          });
        */

        /*
        {
          Query q = LatLonPoint.newBoxQuery("point", minLat, maxLat, minLon, maxLon);
          int totHits = 0;
                           
          for(IndexSearcher s : searchers) {
            int hitCount = s.count(q);
            totHits += hitCount;
          }
                
          System.out.println("Poly file bbox total hits: " + totHits);
        }
        */

        if (preBuildQueries) {
            System.out.println("\nUsing pre-built polygon queries, loaded from file " + polyFile);
            List<Query> queries = new ArrayList<>();
            for (Polygon[] multiPolygon : polygons) {
                Query q = null;
                if (useLatLonPoint) {
                    q = LatLonPoint.newPolygonQuery("point", multiPolygon);
                } else if (useGeoPoint) {
                    q = new GeoPointInPolygonQuery("point", multiPolygon);
                } else if (useGeo3DLarge) {
                    q = Geo3DPoint.newLargePolygonQuery("point", multiPolygon);
                } else if (useGeo3D) {
                    q = Geo3DPoint.newPolygonQuery("point", multiPolygon);
                }
                queries.add(q);
            }

            double[] result = runQueries(searchers, queries);
            bestQPS = result[0];
            bestMHPS = result[1];

        } else {

            System.out.println("\nUsing on-the-fly polygon queries, loaded from file " + polyFile);

            for (int iter = 0; iter < ITERS; iter++) {
                long tStart = System.nanoTime();
                long totHits = 0;
                int queryCount = 0;
                for (Polygon[] multiPolygon : polygons) {

                    // We do this to keep the benchmark honest, so any construction cost of a polygon is included in our run time measure:
                    multiPolygon = clonePolygon(multiPolygon);

                    Query q;
                    if (useLatLonPoint) {
                        q = LatLonPoint.newPolygonQuery("point", multiPolygon);
                    } else if (useGeoPoint) {
                        q = new GeoPointInPolygonQuery("point", multiPolygon);
                    } else {
                        q = Geo3DPoint.newLargePolygonQuery("point", multiPolygon);
                    }

                    for (IndexSearcher s : searchers) {
                        int hitCount = s.count(q);
                        totHits += hitCount;
                    }
                    queryCount++;
                }

                long tEnd = System.nanoTime();
                double elapsedSec = (tEnd - tStart) / 1000000000.0;
                double qps = queryCount / elapsedSec;
                double mhps = (totHits / 1000000.0) / elapsedSec;
                System.out.println(String.format(Locale.ROOT,
                        "ITER %d: %.2f M hits/sec, %.2f QPS (%.2f sec for %d queries), totHits=%d", iter, mhps,
                        qps, elapsedSec, queryCount, totHits));
                if (qps > bestQPS) {
                    System.out.println("  ***");
                    bestQPS = qps;
                    bestMHPS = mhps;
                }
            }
        }

    } else if (preBuildQueries) {
        System.out.println("\nUsing pre-built queries");

        double[] result = runQueries(searchers, makeQueries(queryClass, gons));
        bestQPS = result[0];
        bestMHPS = result[1];

    } else {
        System.out.println("\nUsing on-the-fly queries");

        // Create regularly spaced shapes in a grid around London, UK:
        int STEPS = 5;
        double MIN_LAT = 51.0919106;
        double MAX_LAT = 51.6542719;
        double MIN_LON = -0.3867282;
        double MAX_LON = 0.8492337;

        // makeRegularPoly has insanely slow math, so make the double[]'s here.
        // we still form the query inside the benchmark loop (e.g. to account for preprocessing)
        ArrayList<double[][]> polys = new ArrayList<double[][]>(225);
        if ("poly".equals(queryClass)) {
            for (int latStep = 0; latStep < STEPS; latStep++) {
                double lat = MIN_LAT + latStep * (MAX_LAT - MIN_LAT) / STEPS;
                for (int lonStep = 0; lonStep < STEPS; lonStep++) {
                    double lon = MIN_LON + lonStep * (MAX_LON - MIN_LON) / STEPS;
                    for (int latStepEnd = latStep + 1; latStepEnd <= STEPS; latStepEnd++) {
                        double latEnd = MIN_LAT + latStepEnd * (MAX_LAT - MIN_LAT) / STEPS;
                        for (int lonStepEnd = lonStep + 1; lonStepEnd <= STEPS; lonStepEnd++) {
                            double lonEnd = MIN_LON + lonStepEnd * (MAX_LON - MIN_LON) / STEPS;
                            double distanceMeters = SloppyMath.haversinMeters(lat, lon, latEnd, lonEnd) / 2.0;
                            double centerLat = (lat + latEnd) / 2.0;
                            double centerLon = (lon + lonEnd) / 2.0;
                            polys.add(makeRegularPoly(centerLat, centerLon, distanceMeters, gons));
                        }
                    }
                }
            }
        }

        for (int iter = 0; iter < ITERS; iter++) {
            long tStart = System.nanoTime();
            long totHits = 0;
            double totNearestDistance = 0.0;
            int queryCount = 0;

            for (int latStep = 0; latStep < STEPS; latStep++) {
                double lat = MIN_LAT + latStep * (MAX_LAT - MIN_LAT) / STEPS;
                for (int lonStep = 0; lonStep < STEPS; lonStep++) {
                    double lon = MIN_LON + lonStep * (MAX_LON - MIN_LON) / STEPS;
                    for (int latStepEnd = latStep + 1; latStepEnd <= STEPS; latStepEnd++) {
                        double latEnd = MIN_LAT + latStepEnd * (MAX_LAT - MIN_LAT) / STEPS;
                        for (int lonStepEnd = lonStep + 1; lonStepEnd <= STEPS; lonStepEnd++) {
                            double lonEnd = MIN_LON + lonStepEnd * (MAX_LON - MIN_LON) / STEPS;

                            double distanceMeters = SloppyMath.haversinMeters(lat, lon, latEnd, lonEnd) / 2.0;
                            double centerLat = (lat + latEnd) / 2.0;
                            double centerLon = (lon + lonEnd) / 2.0;
                            ScoreDoc[] nearestHits = null;
                            Query q = null;

                            switch (queryClass) {
                            case "distance":
                                if (useGeo3D || useGeo3DLarge) {
                                    q = Geo3DPoint.newDistanceQuery("point", centerLat, centerLon,
                                            distanceMeters);
                                } else if (useLatLonPoint) {
                                    q = LatLonPoint.newDistanceQuery("point", centerLat, centerLon,
                                            distanceMeters);
                                } else if (useGeoPoint) {
                                    q = new GeoPointDistanceQuery("point", centerLat, centerLon,
                                            distanceMeters);
                                } else {
                                    throw new AssertionError();
                                }
                                break;
                            case "poly":
                                double[][] poly = polys.get(queryCount);
                                //System.out.println("poly lats: " + Arrays.toString(poly[0]));
                                //System.out.println("poly lons: " + Arrays.toString(poly[1]));
                                if (useGeo3DLarge) {
                                    //System.out.println("POLY:\n  lats=" + Arrays.toString(poly[0]) + "\n  lons=" + Arrays.toString(poly[1]));
                                    q = Geo3DPoint.newLargePolygonQuery("point", new Polygon(poly[0], poly[1]));
                                } else if (useGeo3D) {
                                    q = Geo3DPoint.newPolygonQuery("point", new Polygon(poly[0], poly[1]));
                                } else if (useLatLonPoint) {
                                    q = LatLonPoint.newPolygonQuery("point", new Polygon(poly[0], poly[1]));
                                } else if (useGeoPoint) {
                                    q = new GeoPointInPolygonQuery("point", new Polygon(poly[0], poly[1]));
                                } else {
                                    throw new AssertionError();
                                }
                                break;
                            case "box":
                                if (useGeo3D || useGeo3DLarge) {
                                    q = Geo3DPoint.newBoxQuery("point", lat, latEnd, lon, lonEnd);
                                } else if (useLatLonPoint) {
                                    q = LatLonPoint.newBoxQuery("point", lat, latEnd, lon, lonEnd);
                                } else if (useGeoPoint) {
                                    q = new GeoPointInBBoxQuery("point", lat, latEnd, lon, lonEnd);
                                } else {
                                    throw new AssertionError();
                                }
                                break;
                            case "nearest":
                                if (useLatLonPoint) {
                                    if (searchers.length != 1) {
                                        // TODO
                                        throw new AssertionError();
                                    }
                                    nearestHits = LatLonPoint.nearest(searchers[0], "point",
                                            (lat + latEnd) / 2.0, (lon + lonEnd) / 2.0, nearestTopN).scoreDocs;
                                    if (false && iter == 0) {
                                        System.out.println("\n" + nearestHits.length + " nearest:");
                                        for (ScoreDoc hit : nearestHits) {
                                            System.out.println("  " + ((FieldDoc) hit).fields[0]);
                                        }
                                    }
                                    for (ScoreDoc hit : nearestHits) {
                                        totNearestDistance += (Double) ((FieldDoc) hit).fields[0];
                                    }
                                } else {
                                    throw new AssertionError();
                                }
                                break;
                            default:
                                throw new AssertionError("unknown queryClass " + queryClass);
                            }

                            // TODO: do this somewhere else?
                            if (filterPercent != null) {
                                BooleanQuery.Builder builder = new BooleanQuery.Builder();
                                builder.add(q, BooleanClause.Occur.MUST);
                                builder.add(new RandomQuery(filterPercent), BooleanClause.Occur.FILTER);
                                q = builder.build();
                            }

                            if (q != null) {
                                if (doDistanceSort) {
                                    Sort sort = new Sort(LatLonDocValuesField.newDistanceSort("point",
                                            centerLat, centerLon));
                                    for (IndexSearcher s : searchers) {
                                        TopFieldDocs hits = s.search(q, 10, sort);
                                        totHits += hits.totalHits;
                                    }
                                } else {
                                    //System.out.println("\nRUN QUERY " + q);
                                    //long t0 = System.nanoTime();
                                    for (IndexSearcher s : searchers) {
                                        int hitCount = s.count(q);
                                        totHits += hitCount;
                                        if (false && iter == 0) {
                                            System.out.println("q=" + q + " lat=" + centerLat + " lon="
                                                    + centerLon + " distanceMeters=" + distanceMeters
                                                    + " hits: " + hitCount);
                                        }
                                    }
                                }
                            } else {
                                assert nearestHits != null;
                                totHits += nearestHits.length;
                            }
                            queryCount++;
                            //throw new RuntimeException("now stop");
                        }
                    }
                }
            }

            long tEnd = System.nanoTime();
            double elapsedSec = (tEnd - tStart) / 1000000000.0;
            double qps = queryCount / elapsedSec;
            double mhps = (totHits / 1000000.0) / elapsedSec;
            if (queryClass.equals("nearest")) {
                System.out.println(String.format(Locale.ROOT,
                        "ITER %d: %.2f QPS (%.2f sec for %d queries), totNearestDistance=%.10f, totHits=%d",
                        iter, qps, elapsedSec, queryCount, totNearestDistance, maxDoc));
            } else {
                System.out.println(String.format(Locale.ROOT,
                        "ITER %d: %.2f M hits/sec, %.2f QPS (%.2f sec for %d queries), totHits=%d", iter, mhps,
                        qps, elapsedSec, queryCount, totHits));
            }
            if (qps > bestQPS) {
                System.out.println("  ***");
                bestQPS = qps;
                bestMHPS = mhps;
            }
        }
    }
    System.out.println("BEST M hits/sec: " + bestMHPS);
    System.out.println("BEST QPS: " + bestQPS);

    for (IndexSearcher s : searchers) {
        s.getIndexReader().close();
    }
    IOUtils.close(dirs);
}

From source file:perf.IndexState.java

License:Apache License

public IndexState(ReferenceManager<IndexSearcher> mgr, TaxonomyReader taxoReader, String textFieldName,
        DirectSpellChecker spellChecker, String hiliteImpl, FacetsConfig facetsConfig) throws IOException {
    this.mgr = mgr;
    this.spellChecker = spellChecker;
    this.textFieldName = textFieldName;
    this.taxoReader = taxoReader;
    this.facetsConfig = facetsConfig;

    groupEndQuery = new TermQuery(new Term("groupend", "x"));
    if (hiliteImpl.equals("FastVectorHighlighter")) {
        fastHighlighter = new FastVectorHighlighter(true, true);
        useHighlighter = false;/*from w ww  .ja v a2  s . c  o m*/
        postingsHighlighter = null;
    } else if (hiliteImpl.equals("PostingsHighlighter")) {
        fastHighlighter = null;
        useHighlighter = false;
        postingsHighlighter = new PostingsHighlighter();
    } else if (hiliteImpl.equals("Highlighter")) {
        fastHighlighter = null;
        useHighlighter = true;
        postingsHighlighter = null;
    } else {
        throw new IllegalArgumentException("unrecognized -hiliteImpl \"" + hiliteImpl + "\"");
    }
    IndexSearcher searcher = mgr.acquire();
    try {
        hasDeletions = searcher.getIndexReader().hasDeletions();

        for (LeafReaderContext ctx : searcher.getIndexReader().leaves()) {
            pkLookupStates.put(ctx.reader().getCoreCacheKey(), new ThreadLocal<PKLookupState>());
            pointsPKLookupStates.put(ctx.reader().getCoreCacheKey(), new ThreadLocal<PointsPKLookupState>());
        }
    } finally {
        mgr.release(searcher);
    }
}

From source file:perf.LocalTaskSource.java

License:Apache License

public LocalTaskSource(IndexState indexState, TaskParser taskParser, String tasksFile, Random staticRandom,
        Random random, int numTaskPerCat, int taskRepeatCount, boolean doPKLookup)
        throws IOException, ParseException {

    final List<Task> loadedTasks = loadTasks(taskParser, tasksFile);
    Collections.shuffle(loadedTasks, staticRandom);
    final List<Task> prunedTasks = pruneTasks(loadedTasks, numTaskPerCat);

    final IndexSearcher searcher = indexState.mgr.acquire();
    final int maxDoc;
    try {//from   w  ww . j  a va2 s .c o  m
        maxDoc = searcher.getIndexReader().maxDoc();
    } finally {
        indexState.mgr.release(searcher);
    }

    // Add PK tasks
    //System.out.println("WARNING: skip PK tasks");
    if (doPKLookup) {
        final int numPKTasks = (int) Math.min(maxDoc / 6000., numTaskPerCat);
        final Set<BytesRef> pkSeenIDs = new HashSet<BytesRef>();
        final Set<Integer> pkSeenIntIDs = new HashSet<Integer>();
        for (int idx = 0; idx < numPKTasks; idx++) {
            prunedTasks.add(new PKLookupTask(maxDoc, staticRandom, 4000, pkSeenIDs, idx));
            //prunedTasks.add(new PointsPKLookupTask(maxDoc, staticRandom, 4000, pkSeenIntIDs, idx));
        }
        /*
        final Set<BytesRef> pkSeenSingleIDs = new HashSet<BytesRef>();
        for(int idx=0;idx<numPKTasks*100;idx++) {
          prunedTasks.add(new SinglePKLookupTask(maxDoc, staticRandom, pkSeenSingleIDs, idx));
        }
        */
    }

    tasks = new ArrayList<Task>();

    // Copy the pruned tasks multiple times, shuffling the order each time:
    for (int iter = 0; iter < taskRepeatCount; iter++) {
        Collections.shuffle(prunedTasks, random);
        for (Task task : prunedTasks) {
            tasks.add(task.clone());
        }
    }
    System.out.println("TASK LEN=" + tasks.size());
}

From source file:perf.NRTPerfTest.java

License:Apache License

public static void main(String[] args) throws Exception {

    final String dirImpl = args[0];
    final String dirPath = args[1];
    final String commit = args[2];
    final String lineDocFile = args[3];
    final long seed = Long.parseLong(args[4]);
    final double docsPerSec = Double.parseDouble(args[5]);
    final double runTimeSec = Double.parseDouble(args[6]);
    final int numSearchThreads = Integer.parseInt(args[7]);
    int numIndexThreads = Integer.parseInt(args[8]);
    if (numIndexThreads > docsPerSec) {
        System.out.println("INFO: numIndexThreads higher than docsPerSec, adjusting numIndexThreads");
        numIndexThreads = (int) Math.max(1, docsPerSec);
    }//www .  java 2 s  . co m
    final double reopenPerSec = Double.parseDouble(args[9]);
    final Mode mode = Mode.valueOf(args[10].toUpperCase(Locale.ROOT));
    statsEverySec = Integer.parseInt(args[11]);
    final boolean doCommit = args[12].equals("yes");
    final double mergeMaxWriteMBPerSec = Double.parseDouble(args[13]);
    if (mergeMaxWriteMBPerSec != 0.0) {
        throw new IllegalArgumentException("mergeMaxWriteMBPerSec must be 0.0 until LUCENE-3202 is done");
    }
    final String tasksFile = args[14];
    if (Files.notExists(Paths.get(tasksFile))) {
        throw new FileNotFoundException("tasks file not found " + tasksFile);
    }

    final boolean hasProcMemInfo = Files.exists(Paths.get("/proc/meminfo"));

    System.out.println("DIR=" + dirImpl);
    System.out.println("Index=" + dirPath);
    System.out.println("Commit=" + commit);
    System.out.println("LineDocs=" + lineDocFile);
    System.out.println("Docs/sec=" + docsPerSec);
    System.out.println("Run time sec=" + runTimeSec);
    System.out.println("NumSearchThreads=" + numSearchThreads);
    System.out.println("NumIndexThreads=" + numIndexThreads);
    System.out.println("Reopen/sec=" + reopenPerSec);
    System.out.println("Mode=" + mode);
    System.out.println("tasksFile=" + tasksFile);

    System.out.println("Record stats every " + statsEverySec + " seconds");
    final int count = (int) ((runTimeSec / statsEverySec) + 2);
    docsIndexedByTime = new AtomicInteger[count];
    searchesByTime = new AtomicInteger[count];
    totalUpdateTimeByTime = new AtomicLong[count];
    final AtomicInteger reopensByTime[] = new AtomicInteger[count];
    for (int i = 0; i < count; i++) {
        docsIndexedByTime[i] = new AtomicInteger();
        searchesByTime[i] = new AtomicInteger();
        totalUpdateTimeByTime[i] = new AtomicLong();
        reopensByTime[i] = new AtomicInteger();
    }

    System.out.println(
            "Max merge MB/sec = " + (mergeMaxWriteMBPerSec <= 0.0 ? "unlimited" : mergeMaxWriteMBPerSec));
    final Random random = new Random(seed);

    final LineFileDocs docs = new LineFileDocs(lineDocFile, true, false, false, false, false, null,
            new HashSet<String>(), null, true);

    final Directory dir0;
    if (dirImpl.equals("MMapDirectory")) {
        dir0 = new MMapDirectory(Paths.get(dirPath));
    } else if (dirImpl.equals("NIOFSDirectory")) {
        dir0 = new NIOFSDirectory(Paths.get(dirPath));
    } else if (dirImpl.equals("SimpleFSDirectory")) {
        dir0 = new SimpleFSDirectory(Paths.get(dirPath));
    } else {
        docs.close();
        throw new RuntimeException("unknown directory impl \"" + dirImpl + "\"");
    }
    //final NRTCachingDirectory dir = new NRTCachingDirectory(dir0, 10, 200.0, mergeMaxWriteMBPerSec);
    final NRTCachingDirectory dir = new NRTCachingDirectory(dir0, 20, 400.0);
    //final MergeScheduler ms = dir.getMergeScheduler();
    //final Directory dir = dir0;
    //final MergeScheduler ms = new ConcurrentMergeScheduler();

    final String field = "body";

    // Open an IW on the requested commit point, but, don't
    // delete other (past or future) commit points:
    // TODO take Analyzer as parameter
    StandardAnalyzer analyzer = new StandardAnalyzer(CharArraySet.EMPTY_SET);
    final IndexWriterConfig conf = new IndexWriterConfig(analyzer);
    conf.setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE);
    conf.setRAMBufferSizeMB(256.0);
    //iwc.setMergeScheduler(ms);

    final Codec codec = new Lucene62Codec() {
        @Override
        public PostingsFormat getPostingsFormatForField(String field) {
            if (field.equals("id")) {
                return PostingsFormat.forName("Memory");
            } else {
                return PostingsFormat.forName("Lucene50");
            }
        }

        private final DocValuesFormat direct = DocValuesFormat.forName("Direct");

        @Override
        public DocValuesFormat getDocValuesFormatForField(String field) {
            return direct;
        }
    };

    conf.setCodec(codec);

    /*
    iwc.setMergePolicy(new LogByteSizeMergePolicy());
    ((LogMergePolicy) iwc.getMergePolicy()).setUseCompoundFile(false);
    ((LogMergePolicy) iwc.getMergePolicy()).setMergeFactor(30);
    ((LogByteSizeMergePolicy) iwc.getMergePolicy()).setMaxMergeMB(10000.0);
    System.out.println("USING LOG BS MP");
     */

    TieredMergePolicy tmp = new TieredMergePolicy();
    tmp.setNoCFSRatio(0.0);
    tmp.setMaxMergedSegmentMB(1000000.0);
    //tmp.setReclaimDeletesWeight(3.0);
    //tmp.setMaxMergedSegmentMB(7000.0);
    conf.setMergePolicy(tmp);

    if (!commit.equals("none")) {
        conf.setIndexCommit(PerfUtils.findCommitPoint(commit, dir));
    }

    // Make sure merges run @ higher prio than indexing:
    final ConcurrentMergeScheduler cms = (ConcurrentMergeScheduler) conf.getMergeScheduler();
    cms.setMaxMergesAndThreads(4, 1);

    conf.setMergedSegmentWarmer(new MergedReaderWarmer(field));

    final IndexWriter w = new IndexWriter(dir, conf);
    // w.setInfoStream(System.out);

    IndexThreads.UpdatesListener updatesListener = new IndexThreads.UpdatesListener() {
        long startTimeNS;

        @Override
        public void beforeUpdate() {
            startTimeNS = System.nanoTime();
        }

        @Override
        public void afterUpdate() {
            int idx = currentQT.get();
            totalUpdateTimeByTime[idx].addAndGet(System.nanoTime() - startTimeNS);
            docsIndexedByTime[idx].incrementAndGet();
        }
    };
    IndexThreads indexThreads = new IndexThreads(random, w, new AtomicBoolean(false), docs, numIndexThreads, -1,
            false, false, mode, (float) (docsPerSec / numIndexThreads), updatesListener, -1.0, w.maxDoc());

    // NativePosixUtil.mlockTermsDict(startR, "id");
    final SearcherManager manager = new SearcherManager(w, null);
    IndexSearcher s = manager.acquire();
    try {
        System.out.println("Reader=" + s.getIndexReader());
    } finally {
        manager.release(s);
    }

    final DirectSpellChecker spellChecker = new DirectSpellChecker();
    final IndexState indexState = new IndexState(manager, null, field, spellChecker, "PostingsHighlighter",
            null);
    final QueryParser qp = new QueryParser(field, analyzer);
    TaskParser taskParser = new TaskParser(indexState, qp, field, 10, random, true);
    final TaskSource tasks = new RandomTaskSource(taskParser, tasksFile, random) {
        @Override
        public void taskDone(Task task, long queueTimeNS, int toalHitCount) {
            searchesByTime[currentQT.get()].incrementAndGet();
        }
    };
    System.out.println("Task repeat count 1");
    System.out.println("Tasks file " + tasksFile);
    System.out.println("Num task per cat 20");
    final TaskThreads taskThreads = new TaskThreads(tasks, indexState, numSearchThreads);

    final ReopenThread reopenThread = new ReopenThread(reopenPerSec, manager, reopensByTime, runTimeSec);
    reopenThread.setName("ReopenThread");
    reopenThread.setPriority(4 + Thread.currentThread().getPriority());
    System.out.println("REOPEN PRI " + reopenThread.getPriority());

    indexThreads.start();
    reopenThread.start();
    taskThreads.start();

    Thread.currentThread().setPriority(5 + Thread.currentThread().getPriority());
    System.out.println("TIMER PRI " + Thread.currentThread().getPriority());

    //System.out.println("Start: " + new Date());

    final long startMS = System.currentTimeMillis();
    final long stopMS = startMS + (long) (runTimeSec * 1000);
    int lastQT = -1;
    while (true) {
        final long t = System.currentTimeMillis();
        if (t >= stopMS) {
            break;
        }
        final int qt = (int) ((t - startMS) / statsEverySec / 1000);
        currentQT.set(qt);
        if (qt != lastQT) {
            final int prevQT = lastQT;
            lastQT = qt;
            if (prevQT > 0) {
                final String other;
                if (hasProcMemInfo) {
                    other = " D=" + getLinuxDirtyBytes();
                } else {
                    other = "";
                }
                int prev = prevQT - 1;
                System.out.println(String.format("QT %d searches=%d docs=%d reopens=%s totUpdateTime=%d", prev,
                        searchesByTime[prev].get(), docsIndexedByTime[prev].get(),
                        reopensByTime[prev].get() + other,
                        TimeUnit.NANOSECONDS.toMillis(totalUpdateTimeByTime[prev].get())));
            }
        }
        Thread.sleep(25);
    }

    taskThreads.stop();
    reopenThread.join();
    indexThreads.stop();

    System.out.println("By time:");
    for (int i = 0; i < searchesByTime.length - 2; i++) {
        System.out.println(String.format("  %d searches=%d docs=%d reopens=%d totUpdateTime=%d",
                i * statsEverySec, searchesByTime[i].get(), docsIndexedByTime[i].get(), reopensByTime[i].get(),
                TimeUnit.NANOSECONDS.toMillis(totalUpdateTimeByTime[i].get())));
    }

    manager.close();
    if (doCommit) {
        w.close();
    } else {
        w.rollback();
    }
}

From source file:perf.PKLookupTask.java

License:Apache License

@Override
public void go(IndexState state) throws IOException {

    final IndexSearcher searcher = state.mgr.acquire();
    try {/*from   w w  w  .  j  av  a 2  s.c o  m*/
        final List<LeafReaderContext> subReaders = searcher.getIndexReader().leaves();
        IndexState.PKLookupState[] pkStates = new IndexState.PKLookupState[subReaders.size()];
        for (int subIDX = 0; subIDX < subReaders.size(); subIDX++) {
            LeafReaderContext ctx = subReaders.get(subIDX);
            ThreadLocal<IndexState.PKLookupState> states = state.pkLookupStates
                    .get(ctx.reader().getCoreCacheKey());
            // NPE here means you are trying to use this task on a newly refreshed NRT reader!
            IndexState.PKLookupState pkState = states.get();
            if (pkState == null) {
                pkState = new IndexState.PKLookupState(ctx.reader(), "id");
                states.set(pkState);
            }
            pkStates[subIDX] = pkState;
        }

        for (int idx = 0; idx < ids.length; idx++) {
            int base = 0;
            final BytesRef id = ids[idx];
            for (int subIDX = 0; subIDX < subReaders.size(); subIDX++) {
                IndexState.PKLookupState pkState = pkStates[subIDX];
                //System.out.println("\nTASK: sub=" + sub);
                //System.out.println("TEST: lookup " + ids[idx].utf8ToString());
                if (pkState.termsEnum.seekExact(id)) {
                    //System.out.println("  found!");
                    PostingsEnum docs = pkState.termsEnum.postings(pkState.postingsEnum, 0);
                    assert docs != null;
                    int docID = DocIdSetIterator.NO_MORE_DOCS;
                    for (int d = docs.nextDoc(); d != DocIdSetIterator.NO_MORE_DOCS; d = docs.nextDoc()) {
                        if (pkState.liveDocs == null || pkState.liveDocs.get(d)) {
                            docID = d;
                            break;
                        }
                    }
                    if (docID != DocIdSetIterator.NO_MORE_DOCS) {
                        answers[idx] = base + docID;
                        break;
                    }
                }
                base += subReaders.get(subIDX).reader().maxDoc();
            }
        }
    } finally {
        state.mgr.release(searcher);
    }
}

From source file:perf.PointsPKLookupTask.java

License:Apache License

@Override
public void go(IndexState state) throws IOException {

    final IndexSearcher searcher = state.mgr.acquire();
    try {/*from  ww w  .j  ava 2  s  . c o m*/
        final List<LeafReaderContext> subReaders = searcher.getIndexReader().leaves();
        IndexState.PointsPKLookupState[] pkStates = new IndexState.PointsPKLookupState[subReaders.size()];
        for (int subIDX = 0; subIDX < subReaders.size(); subIDX++) {
            LeafReaderContext ctx = subReaders.get(subIDX);
            ThreadLocal<IndexState.PointsPKLookupState> states = state.pointsPKLookupStates
                    .get(ctx.reader().getCoreCacheKey());
            // NPE here means you are trying to use this task on a newly refreshed NRT reader!
            IndexState.PointsPKLookupState pkState = states.get();
            if (pkState == null) {
                pkState = new IndexState.PointsPKLookupState(ctx.reader(), "id");
                states.set(pkState);
            }
            pkStates[subIDX] = pkState;
        }
        for (int idx = 0; idx < ids.length; idx++) {
            /*
            int base = 0;
            final int id = ids[idx];
            for(int subIDX=0;subIDX<subReaders.size();subIDX++) {
              IndexState.PointsPKLookupState pkState = pkStates[subIDX];
              pkState.visitor.reset(id);
              pkState.bkdReader.intersect(pkState.state);
              if (pkState.visitor.answer != -1) {
                answers[idx] = base + pkState.visitor.answer;
                //System.out.println(id + " -> " + answers[idx]);
                break;
              }
              base += subReaders.get(subIDX).reader().maxDoc();
            }
            */

            // this approach works, uses public APIs, but is slowish:
            /*
            Query q = IntPoint.newExactQuery("id", ids[idx]);
            TopDocs hits = searcher.search(q, 1);
            if (hits.totalHits == 1) {
              answers[idx] = hits.scoreDocs[0].doc;
            }
            */
        }
    } finally {
        state.mgr.release(searcher);
    }
}

From source file:perf.RespellTask.java

License:Apache License

@Override
public void go(IndexState state) throws IOException {
    final IndexSearcher searcher = state.mgr.acquire();
    try {//from   ww w.j  a va2 s  .  c  o  m
        answers = state.spellChecker.suggestSimilar(term, 10, searcher.getIndexReader(),
                SuggestMode.SUGGEST_MORE_POPULAR);
    } finally {
        state.mgr.release(searcher);
    }
    //System.out.println("term=" + term); 
    //printResults(System.out, state);
}

From source file:perf.SearchPerfTest.java

License:Apache License

private static void _main(String[] clArgs) throws Exception {

    // args: dirImpl indexPath numThread numIterPerThread
    // eg java SearchPerfTest /path/to/index 4 100
    final Args args = new Args(clArgs);

    Directory dir0;//from  ww w . j  a  va 2 s . c  o  m
    final String dirPath = args.getString("-indexPath") + "/index";
    final String dirImpl = args.getString("-dirImpl");

    OpenDirectory od = OpenDirectory.get(dirImpl);

    /*
    } else if (dirImpl.equals("NativePosixMMapDirectory")) {
      dir0 = new NativePosixMMapDirectory(new File(dirPath));
      ramDir = null;
      if (doFacets) {
        facetsDir = new NativePosixMMapDirectory(new File(facetsDirPath));
      }
    } else if (dirImpl.equals("CachingDirWrapper")) {
      dir0 = new CachingRAMDirectory(new MMapDirectory(new File(dirPath)));
      ramDir = null;
    } else if (dirImpl.equals("RAMExceptDirectPostingsDirectory")) {
      // Load only non-postings files into RAMDir (assumes
      // Lucene40PF is the wrapped PF):
      Set<String> postingsExtensions = new HashSet<String>();
      postingsExtensions.add("frq");
      postingsExtensions.add("prx");
      postingsExtensions.add("tip");
      postingsExtensions.add("tim");
              
      ramDir =  new RAMDirectory();
      Directory fsDir = new MMapDirectory(new File(dirPath));
      for (String file : fsDir.listAll()) {
        int idx = file.indexOf('.');
        if (idx != -1 && postingsExtensions.contains(file.substring(idx+1, file.length()))) {
          continue;
        }
            
        fsDir.copy(ramDir, file, file, IOContext.READ);
      }
      dir0 = new FileSwitchDirectory(postingsExtensions,
                             fsDir,
                             ramDir,
                             true);
      if (doFacets) {
        facetsDir = new RAMDirectory(new SimpleFSDirectory(new File(facetsDirPath)), IOContext.READ);
      }
      */

    final RAMDirectory ramDir;
    dir0 = od.open(Paths.get(dirPath));
    if (dir0 instanceof RAMDirectory) {
        ramDir = (RAMDirectory) dir0;
    } else {
        ramDir = null;
    }

    // TODO: NativeUnixDir?

    final String analyzer = args.getString("-analyzer");
    final String tasksFile = args.getString("-taskSource");
    final int searchThreadCount = args.getInt("-searchThreadCount");
    final String fieldName = args.getString("-field");
    final boolean printHeap = args.getFlag("-printHeap");
    final boolean doPKLookup = args.getFlag("-pk");
    final int topN = args.getInt("-topN");
    final boolean doStoredLoads = args.getFlag("-loadStoredFields");

    // Used to choose which random subset of tasks we will
    // run, to generate the PKLookup tasks, and to generate
    // any random pct filters:
    final long staticRandomSeed = args.getLong("-staticSeed");

    // Used to shuffle the random subset of tasks:
    final long randomSeed = args.getLong("-seed");

    // TODO: this could be way better.
    final String similarity = args.getString("-similarity");
    // now reflect
    final Class<? extends Similarity> simClazz = Class
            .forName("org.apache.lucene.search.similarities." + similarity).asSubclass(Similarity.class);
    final Similarity sim = simClazz.newInstance();

    System.out.println("Using dir impl " + dir0.getClass().getName());
    System.out.println("Analyzer " + analyzer);
    System.out.println("Similarity " + similarity);
    System.out.println("Search thread count " + searchThreadCount);
    System.out.println("topN " + topN);
    System.out.println("JVM " + (Constants.JRE_IS_64BIT ? "is" : "is not") + " 64bit");
    System.out.println("Pointer is " + RamUsageEstimator.NUM_BYTES_OBJECT_REF + " bytes");

    final Analyzer a;
    if (analyzer.equals("EnglishAnalyzer")) {
        a = new EnglishAnalyzer();
    } else if (analyzer.equals("ClassicAnalyzer")) {
        a = new ClassicAnalyzer();
    } else if (analyzer.equals("StandardAnalyzer")) {
        a = new StandardAnalyzer();
    } else if (analyzer.equals("StandardAnalyzerNoStopWords")) {
        a = new StandardAnalyzer(CharArraySet.EMPTY_SET);
    } else if (analyzer.equals("ShingleStandardAnalyzer")) {
        a = new ShingleAnalyzerWrapper(new StandardAnalyzer(CharArraySet.EMPTY_SET), 2, 2,
                ShingleFilter.DEFAULT_TOKEN_SEPARATOR, true, true, ShingleFilter.DEFAULT_FILLER_TOKEN);
    } else {
        throw new RuntimeException("unknown analyzer " + analyzer);
    }

    final ReferenceManager<IndexSearcher> mgr;
    final IndexWriter writer;
    final Directory dir;

    final String commit = args.getString("-commit");
    final String hiliteImpl = args.getString("-hiliteImpl");

    final String logFile = args.getString("-log");

    final long tSearcherStart = System.currentTimeMillis();

    final boolean verifyCheckSum = !args.getFlag("-skipVerifyChecksum");
    final boolean recacheFilterDeletes = args.getFlag("-recacheFilterDeletes");

    if (recacheFilterDeletes) {
        throw new UnsupportedOperationException("recacheFilterDeletes was deprecated");
    }

    if (args.getFlag("-nrt")) {
        // TODO: get taxoReader working here too
        // TODO: factor out & share this CL processing w/ Indexer
        final int indexThreadCount = args.getInt("-indexThreadCount");
        final String lineDocsFile = args.getString("-lineDocsFile");
        final float docsPerSecPerThread = args.getFloat("-docsPerSecPerThread");
        final float reopenEverySec = args.getFloat("-reopenEverySec");
        final boolean storeBody = args.getFlag("-store");
        final boolean tvsBody = args.getFlag("-tvs");
        final boolean useCFS = args.getFlag("-cfs");
        final String defaultPostingsFormat = args.getString("-postingsFormat");
        final String idFieldPostingsFormat = args.getString("-idFieldPostingsFormat");
        final boolean verbose = args.getFlag("-verbose");
        final boolean cloneDocs = args.getFlag("-cloneDocs");
        final Mode mode = Mode.valueOf(args.getString("-mode", "update").toUpperCase(Locale.ROOT));

        final long reopenEveryMS = (long) (1000 * reopenEverySec);

        if (verbose) {
            InfoStream.setDefault(new PrintStreamInfoStream(System.out));
        }

        if (!dirImpl.equals("RAMDirectory") && !dirImpl.equals("RAMExceptDirectPostingsDirectory")) {
            System.out.println("Wrap NRTCachingDirectory");
            dir0 = new NRTCachingDirectory(dir0, 20, 400.0);
        }

        dir = dir0;

        final IndexWriterConfig iwc = new IndexWriterConfig(a);
        iwc.setOpenMode(IndexWriterConfig.OpenMode.APPEND);
        iwc.setRAMBufferSizeMB(256.0);
        iwc.setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE);

        // TODO: also RAMDirExceptDirect...?  need to
        // ... block deletes against wrapped FSDir?
        if (dirImpl.equals("RAMDirectory")) {
            // Let IW remove files only referenced by starting commit:
            iwc.setIndexDeletionPolicy(new KeepNoCommitsDeletionPolicy());
        }

        if (commit != null && commit.length() > 0) {
            System.out.println("Opening writer on commit=" + commit);
            iwc.setIndexCommit(PerfUtils.findCommitPoint(commit, dir));
        }

        ((TieredMergePolicy) iwc.getMergePolicy()).setNoCFSRatio(useCFS ? 1.0 : 0.0);
        //((TieredMergePolicy) iwc.getMergePolicy()).setMaxMergedSegmentMB(1024);
        //((TieredMergePolicy) iwc.getMergePolicy()).setReclaimDeletesWeight(3.0);
        //((TieredMergePolicy) iwc.getMergePolicy()).setMaxMergeAtOnce(4);

        final Codec codec = new Lucene62Codec() {
            @Override
            public PostingsFormat getPostingsFormatForField(String field) {
                return PostingsFormat
                        .forName(field.equals("id") ? idFieldPostingsFormat : defaultPostingsFormat);
            }
        };
        iwc.setCodec(codec);

        final ConcurrentMergeScheduler cms = (ConcurrentMergeScheduler) iwc.getMergeScheduler();
        // Only let one merge run at a time...
        // ... but queue up up to 4, before index thread is stalled:
        cms.setMaxMergesAndThreads(4, 1);

        iwc.setMergedSegmentWarmer(new IndexWriter.IndexReaderWarmer() {
            @Override
            public void warm(LeafReader reader) throws IOException {
                final long t0 = System.currentTimeMillis();
                //System.out.println("DO WARM: " + reader);
                IndexSearcher s = new IndexSearcher(reader);
                s.setQueryCache(null); // don't bench the cache
                s.search(new TermQuery(new Term(fieldName, "united")), 10);
                final long t1 = System.currentTimeMillis();
                System.out.println("warm segment=" + reader + " numDocs=" + reader.numDocs() + ": took "
                        + (t1 - t0) + " msec");
            }
        });

        writer = new IndexWriter(dir, iwc);
        System.out.println("Initial writer.maxDoc()=" + writer.maxDoc());

        // TODO: add -nrtBodyPostingsOffsets instead of
        // hardwired false:
        boolean addDVFields = mode == Mode.BDV_UPDATE || mode == Mode.NDV_UPDATE;
        LineFileDocs lineFileDocs = new LineFileDocs(lineDocsFile, false, storeBody, tvsBody, false, cloneDocs,
                null, null, null, addDVFields);
        IndexThreads threads = new IndexThreads(new Random(17), writer, new AtomicBoolean(false), lineFileDocs,
                indexThreadCount, -1, false, false, mode, docsPerSecPerThread, null, -1.0, -1);
        threads.start();

        mgr = new SearcherManager(writer, new SearcherFactory() {
            @Override
            public IndexSearcher newSearcher(IndexReader reader, IndexReader previous) {
                IndexSearcher s = new IndexSearcher(reader);
                s.setQueryCache(null); // don't bench the cache
                s.setSimilarity(sim);
                return s;
            }
        });

        System.out.println("reopen every " + reopenEverySec);

        Thread reopenThread = new Thread() {
            @Override
            public void run() {
                try {
                    final long startMS = System.currentTimeMillis();

                    int reopenCount = 1;
                    while (true) {
                        final long sleepMS = startMS + (reopenCount * reopenEveryMS)
                                - System.currentTimeMillis();
                        if (sleepMS < 0) {
                            System.out.println("WARNING: reopen fell behind by " + Math.abs(sleepMS) + " ms");
                        } else {
                            Thread.sleep(sleepMS);
                        }

                        Thread.sleep(sleepMS);
                        mgr.maybeRefresh();
                        reopenCount++;
                        IndexSearcher s = mgr.acquire();
                        try {
                            if (ramDir != null) {
                                System.out.println(String.format(Locale.ENGLISH,
                                        "%.1fs: index: %d bytes in RAMDir; writer.maxDoc()=%d; searcher.maxDoc()=%d; searcher.numDocs()=%d",
                                        (System.currentTimeMillis() - startMS) / 1000.0, ramDir.ramBytesUsed(),
                                        writer.maxDoc(), s.getIndexReader().maxDoc(),
                                        s.getIndexReader().numDocs()));
                                //String[] l = ramDir.listAll();
                                //Arrays.sort(l);
                                //for(String f : l) {
                                //System.out.println("  " + f + ": " + ramDir.fileLength(f));
                                //}
                            } else {
                                System.out.println(String.format(Locale.ENGLISH,
                                        "%.1fs: done reopen; writer.maxDoc()=%d; searcher.maxDoc()=%d; searcher.numDocs()=%d",
                                        (System.currentTimeMillis() - startMS) / 1000.0, writer.maxDoc(),
                                        s.getIndexReader().maxDoc(), s.getIndexReader().numDocs()));
                            }
                        } finally {
                            mgr.release(s);
                        }
                    }
                } catch (Exception e) {
                    throw new RuntimeException(e);
                }
            }
        };
        reopenThread.setName("ReopenThread");
        reopenThread.setPriority(4 + Thread.currentThread().getPriority());
        reopenThread.start();

    } else {
        dir = dir0;
        writer = null;
        final DirectoryReader reader;
        if (commit != null && commit.length() > 0) {
            System.out.println("Opening searcher on commit=" + commit);
            reader = DirectoryReader.open(PerfUtils.findCommitPoint(commit, dir));
        } else {
            // open last commit
            reader = DirectoryReader.open(dir);
        }
        IndexSearcher s = new IndexSearcher(reader);
        s.setQueryCache(null); // don't bench the cache
        s.setSimilarity(sim);
        System.out.println("maxDoc=" + reader.maxDoc() + " numDocs=" + reader.numDocs() + " %tg deletes="
                + (100. * reader.maxDoc() / reader.numDocs()));

        mgr = new SingleIndexSearcher(s);
    }

    System.out.println((System.currentTimeMillis() - tSearcherStart) + " msec to init searcher/NRT");

    {
        IndexSearcher s = mgr.acquire();
        try {
            System.out.println("Searcher: numDocs=" + s.getIndexReader().numDocs() + " maxDoc="
                    + s.getIndexReader().maxDoc() + ": " + s);
        } finally {
            mgr.release(s);
        }
    }

    //System.out.println("searcher=" + searcher);

    FacetsConfig facetsConfig = new FacetsConfig();
    facetsConfig.setHierarchical("Date", true);

    TaxonomyReader taxoReader;
    Path taxoPath = Paths.get(args.getString("-indexPath"), "facets");
    Directory taxoDir = od.open(taxoPath);
    if (DirectoryReader.indexExists(taxoDir)) {
        taxoReader = new DirectoryTaxonomyReader(taxoDir);
        System.out.println("Taxonomy has " + taxoReader.getSize() + " ords");
    } else {
        taxoReader = null;
    }

    final Random staticRandom = new Random(staticRandomSeed);
    final Random random = new Random(randomSeed);

    final DirectSpellChecker spellChecker = new DirectSpellChecker();
    final IndexState indexState = new IndexState(mgr, taxoReader, fieldName, spellChecker, hiliteImpl,
            facetsConfig);

    final QueryParser queryParser = new QueryParser("body", a);
    TaskParser taskParser = new TaskParser(indexState, queryParser, fieldName, topN, staticRandom,
            doStoredLoads);

    final TaskSource tasks;

    if (tasksFile.startsWith("server:")) {
        int idx = tasksFile.indexOf(':', 8);
        if (idx == -1) {
            throw new RuntimeException(
                    "server is missing the port; should be server:interface:port (got: " + tasksFile + ")");
        }
        String iface = tasksFile.substring(7, idx);
        int port = Integer.valueOf(tasksFile.substring(1 + idx));
        RemoteTaskSource remoteTasks = new RemoteTaskSource(iface, port, searchThreadCount, taskParser);

        // nocommit must stop thread?
        tasks = remoteTasks;
    } else {
        // Load the tasks from a file:
        final int taskRepeatCount = args.getInt("-taskRepeatCount");
        final int numTaskPerCat = args.getInt("-tasksPerCat");
        tasks = new LocalTaskSource(indexState, taskParser, tasksFile, staticRandom, random, numTaskPerCat,
                taskRepeatCount, doPKLookup);
        System.out.println("Task repeat count " + taskRepeatCount);
        System.out.println("Tasks file " + tasksFile);
        System.out.println("Num task per cat " + numTaskPerCat);
    }

    args.check();

    // Evil respeller:
    //spellChecker.setMinPrefix(0);
    //spellChecker.setMaxInspections(1024);
    final TaskThreads taskThreads = new TaskThreads(tasks, indexState, searchThreadCount);
    Thread.sleep(10);

    final long startNanos = System.nanoTime();
    taskThreads.start();
    taskThreads.finish();
    final long endNanos = System.nanoTime();

    System.out.println("\n" + ((endNanos - startNanos) / 1000000.0) + " msec total");

    final List<Task> allTasks = tasks.getAllTasks();

    PrintStream out = new PrintStream(logFile);

    if (allTasks != null) {
        // Tasks were local: verify checksums:

        // indexState.setDocIDToID();

        final Map<Task, Task> tasksSeen = new HashMap<Task, Task>();

        out.println("\nResults for " + allTasks.size() + " tasks:");

        boolean fail = false;
        for (final Task task : allTasks) {
            if (verifyCheckSum) {
                final Task other = tasksSeen.get(task);
                if (other != null) {
                    if (task.checksum() != other.checksum()) {
                        System.out.println("\nTASK:");
                        task.printResults(System.out, indexState);
                        System.out.println("\nOTHER TASK:");
                        other.printResults(System.out, indexState);
                        fail = true;
                        //throw new RuntimeException("task " + task + " hit different checksums: " + task.checksum() + " vs " + other.checksum() + " other=" + other);
                    }
                } else {
                    tasksSeen.put(task, task);
                }
            }
            out.println("\nTASK: " + task);
            out.println("  " + (task.runTimeNanos / 1000000.0) + " msec");
            out.println("  thread " + task.threadID);
            task.printResults(out, indexState);
        }
        if (fail) {
            throw new RuntimeException("some tasks got different results across different threads");
        }

        allTasks.clear();
    }

    mgr.close();

    if (taxoReader != null) {
        taxoReader.close();
    }

    if (writer != null) {
        // Don't actually commit any index changes:
        writer.rollback();
    }

    dir.close();

    if (printHeap) {

        // Try to get RAM usage -- some ideas poached from http://www.javaworld.com/javaworld/javatips/jw-javatip130.html
        final Runtime runtime = Runtime.getRuntime();
        long usedMem1 = PerfUtils.usedMemory(runtime);
        long usedMem2 = Long.MAX_VALUE;
        for (int iter = 0; iter < 10; iter++) {
            runtime.runFinalization();
            runtime.gc();
            Thread.yield();
            Thread.sleep(100);
            usedMem2 = usedMem1;
            usedMem1 = PerfUtils.usedMemory(runtime);
        }
        out.println("\nHEAP: " + PerfUtils.usedMemory(runtime));
    }
    out.close();
}

From source file:perf.SearchTask.java

License:Apache License

@Override
public void go(IndexState state) throws IOException {
    //System.out.println("go group=" + this.group + " single=" + singlePassGroup + " xxx=" + xxx + " this=" + this);
    final IndexSearcher searcher = state.mgr.acquire();

    //System.out.println("GO query=" + q);

    try {/*w ww  .j av  a2 s .  c  o m*/
        if (doHilite) {
            if (state.fastHighlighter != null) {
                fieldQuery = state.fastHighlighter.getFieldQuery(q, searcher.getIndexReader());
            } else if (state.useHighlighter) {
                highlighter = new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(q));
            } else {
                // no setup for postingshighlighter
            }
        }

        if (group != null) {
            if (singlePassGroup) {
                final BlockGroupingCollector c = new BlockGroupingCollector(Sort.RELEVANCE, 10, true,
                        searcher.createNormalizedWeight(state.groupEndQuery, false));
                searcher.search(q, c);
                groupsResultBlock = c.getTopGroups(Sort.RELEVANCE, 0, 0, 10, true);

                if (doHilite) {
                    hilite(groupsResultBlock, state, searcher);
                }

            } else {
                //System.out.println("GB: " + group);
                final TermFirstPassGroupingCollector c1 = new TermFirstPassGroupingCollector(group,
                        Sort.RELEVANCE, 10);

                final Collector c;
                final TermAllGroupsCollector allGroupsCollector;
                // Turn off AllGroupsCollector for now -- it's very slow:
                if (false && doCountGroups) {
                    allGroupsCollector = new TermAllGroupsCollector(group);
                    //c = MultiCollector.wrap(allGroupsCollector, c1);
                    c = c1;
                } else {
                    allGroupsCollector = null;
                    c = c1;
                }

                searcher.search(q, c);

                final Collection<SearchGroup<BytesRef>> topGroups = c1.getTopGroups(0, true);
                if (topGroups != null) {
                    final TermSecondPassGroupingCollector c2 = new TermSecondPassGroupingCollector(group,
                            topGroups, Sort.RELEVANCE, Sort.RELEVANCE, 10, true, true, true);
                    searcher.search(q, c2);
                    groupsResultTerms = c2.getTopGroups(0);
                    if (allGroupsCollector != null) {
                        groupsResultTerms = new TopGroups<BytesRef>(groupsResultTerms,
                                allGroupsCollector.getGroupCount());
                    }
                    if (doHilite) {
                        hilite(groupsResultTerms, state, searcher);
                    }
                }
            }
        } else if (!facetRequests.isEmpty()) {
            // TODO: support sort, filter too!!
            // TODO: support other facet methods
            if (doDrillSideways) {
                // nocommit todo
                hits = null;
                facetResults = null;
            } else {
                facetResults = new ArrayList<FacetResult>();
                FacetsCollector fc = new FacetsCollector();
                hits = FacetsCollector.search(searcher, q, 10, fc);
                long t0 = System.nanoTime();

                Facets mainFacets = null;
                for (String request : facetRequests) {
                    if (request.startsWith("range:")) {
                        int i = request.indexOf(':', 6);
                        if (i == -1) {
                            throw new IllegalArgumentException("range facets request \"" + request
                                    + "\" is missing field; should be range:field:0-10,10-20");
                        }
                        String field = request.substring(6, i);
                        String[] rangeStrings = request.substring(i + 1, request.length()).split(",");
                        LongRange[] ranges = new LongRange[rangeStrings.length];
                        for (int rangeIDX = 0; rangeIDX < ranges.length; rangeIDX++) {
                            String rangeString = rangeStrings[rangeIDX];
                            int j = rangeString.indexOf('-');
                            if (j == -1) {
                                throw new IllegalArgumentException(
                                        "range facets request should be X-Y; got: " + rangeString);
                            }
                            long start = Long.parseLong(rangeString.substring(0, j));
                            long end = Long.parseLong(rangeString.substring(j + 1));
                            ranges[rangeIDX] = new LongRange(rangeString, start, true, end, true);
                        }
                        LongRangeFacetCounts facets = new LongRangeFacetCounts(field, fc, ranges);
                        facetResults.add(facets.getTopChildren(ranges.length, field));
                    } else {
                        Facets facets = new FastTaxonomyFacetCounts(state.taxoReader, state.facetsConfig, fc);
                        facetResults.add(facets.getTopChildren(10, request));
                    }
                }
                getFacetResultsMsec = (System.nanoTime() - t0) / 1000000.0;
            }
        } else if (s == null) {
            hits = searcher.search(q, topN);
            if (doHilite) {
                hilite(hits, state, searcher, q);
            }
        } else {
            hits = searcher.search(q, topN, s);
            if (doHilite) {
                hilite(hits, state, searcher, q);
            }
            /*
              final boolean fillFields = true;
              final boolean fieldSortDoTrackScores = true;
              final boolean fieldSortDoMaxScore = true;
              final TopFieldCollector c = TopFieldCollector.create(s, topN,
              fillFields,
              fieldSortDoTrackScores,
              fieldSortDoMaxScore,
              false);
              searcher.search(q, c);
              hits = c.topDocs();
            */
        }
        if (hits != null) {
            totalHitCount = hits.totalHits;

            if (doStoredLoads) {
                for (int i = 0; i < hits.scoreDocs.length; i++) {
                    ScoreDoc scoreDoc = hits.scoreDocs[i];
                    searcher.doc(scoreDoc.doc);
                }
            }

        } else if (groupsResultBlock != null) {
            totalHitCount = groupsResultBlock.totalHitCount;
        }
    } catch (Throwable t) {
        System.out.println("EXC: " + q);
        throw new RuntimeException(t);
        //System.out.println("TE: " + TermsEnum.getStats());
    } finally {
        state.mgr.release(searcher);
        fieldQuery = null;
        highlighter = null;
    }
}

From source file:perf.SearchTask.java

License:Apache License

private void hilite(int docID, IndexState indexState, IndexSearcher searcher) throws IOException {
    //System.out.println("  title=" + searcher.doc(docID).get("titleTokenized"));
    if (indexState.fastHighlighter != null) {
        for (String h : indexState.fastHighlighter.getBestFragments(fieldQuery, searcher.getIndexReader(),
                docID, indexState.textFieldName, 100, 2)) {
            totHiliteHash += h.hashCode();
            //System.out.println("    frag: " + h);
        }/*w ww  .j  a  va2 s.  co m*/
    } else {
        Document doc = searcher.doc(docID);
        String text = doc.get(indexState.textFieldName);
        // NOTE: passing null for analyzer: TermVectors must
        // be indexed!
        TokenStream tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), docID,
                indexState.textFieldName, null);
        TextFragment[] frags;
        try {
            frags = highlighter.getBestTextFragments(tokenStream, text, false, 2);
        } catch (InvalidTokenOffsetsException ioe) {
            throw new RuntimeException(ioe);
        }

        for (int j = 0; j < frags.length; j++) {
            if (frags[j] != null && frags[j].getScore() > 0) {
                //System.out.println("    frag " + j + ": " + frags[j].toString());
                totHiliteHash += frags[j].toString().hashCode();
            }
        }
    }
}