Example usage for org.apache.lucene.util ArrayUtil timSort

List of usage examples for org.apache.lucene.util ArrayUtil timSort

Introduction

In this page you can find the example usage for org.apache.lucene.util ArrayUtil timSort.

Prototype

public static <T> void timSort(T[] a, Comparator<? super T> comp) 

Source Link

Document

Sorts the given array using the Comparator .

Usage

From source file:com.sindicetech.siren.search.node.NodeConjunctionScorer.java

License:Open Source License

private void init() throws IOException {
    for (final NodeScorer scorer : scorers) {
        if (!scorer.nextCandidateDocument()) {
            // If even one of the sub-scorers does not have any documents, this
            // scorer should not attempt to do any more work.
            lastDocument = DocsAndNodesIterator.NO_MORE_DOC;
            lastNode = DocsAndNodesIterator.NO_MORE_NOD;
            return;
        }//  w w  w .  ja  v  a2  s .  co  m
    }

    // Sort the array the first time...
    // We don't need to sort the array in any future calls because we know
    // it will already start off sorted (all scorers on same candidate doc).

    // Note that this comparator is not consistent with equals!
    // Also we use mergeSort here to be stable (so order of Scorers that
    // match on first document keeps preserved):
    ArrayUtil.timSort(scorers, new Comparator<NodeScorer>() { // sort the array
        public int compare(final NodeScorer o1, final NodeScorer o2) {
            return o1.doc() - o2.doc();
        }
    });

    // NOTE: doNext() must be called before the re-sorting of the array later on.
    // The reason is this: assume there are 5 scorers, whose first docs are 1,
    // 2, 3, 5, 5 respectively. Sorting (above) leaves the array as is. Calling
    // doNext() here advances all the first scorers to 5 (or a larger doc ID
    // they all agree on).
    // However, if we re-sort before doNext() is called, the order will be 5, 3,
    // 2, 1, 5 and then doNext() will stop immediately, since the first scorer's
    // docs equals the last one. So the invariant that after calling doNext()
    // all scorers are on the same doc ID is broken.
    if (!this.doNext()) {
        // The scorers did not agree on any document.
        lastDocument = DocsAndNodesIterator.NO_MORE_DOC;
        lastNode = DocsAndNodesIterator.NO_MORE_NOD;
        return;
    }

    // If first-time skip distance is any predictor of
    // scorer sparseness, then we should always try to skip first on
    // those scorers.
    // Keep last scorer in it's last place (it will be the first
    // to be skipped on), but reverse all of the others so that
    // they will be skipped on in order of original high skip.
    final int end = (scorers.length - 1);
    for (int i = 0; i < (end >> 1); i++) {
        final NodeScorer tmp = scorers[i];
        scorers[i] = scorers[end - i - 1];
        scorers[end - i - 1] = tmp;
    }
}

From source file:com.sindicetech.siren.search.node.TopNodeTermsRewrite.java

License:Open Source License

@Override
public Q rewrite(final IndexReader reader, final MultiNodeTermQuery query) throws IOException {
    final int maxSize = Math.min(size, this.getMaxSize());
    final PriorityQueue<ScoreTerm> stQueue = new PriorityQueue<ScoreTerm>();
    this.collectTerms(reader, query, new TermCollector() {
        private final MaxNonCompetitiveBoostAttribute maxBoostAtt = attributes
                .addAttribute(MaxNonCompetitiveBoostAttribute.class);

        private final Map<BytesRef, ScoreTerm> visitedTerms = new HashMap<BytesRef, ScoreTerm>();

        private TermsEnum termsEnum;
        private Comparator<BytesRef> termComp;
        private BoostAttribute boostAtt;
        private ScoreTerm st;

        @Override// ww w  . j a  v a 2s .  co m
        public void setNextEnum(final TermsEnum termsEnum) throws IOException {
            this.termsEnum = termsEnum;
            this.termComp = termsEnum.getComparator();

            assert this.compareToLastTerm(null);

            // lazy init the initial ScoreTerm because comparator is not known on ctor:
            if (st == null)
                st = new ScoreTerm(this.termComp, new TermContext(topReaderContext));
            boostAtt = termsEnum.attributes().addAttribute(BoostAttribute.class);
        }

        // for assert:
        private BytesRef lastTerm;

        private boolean compareToLastTerm(final BytesRef t) throws IOException {
            if (lastTerm == null && t != null) {
                lastTerm = BytesRef.deepCopyOf(t);
            } else if (t == null) {
                lastTerm = null;
            } else {
                assert termsEnum.getComparator().compare(lastTerm, t) < 0 : "lastTerm=" + lastTerm + " t=" + t;
                lastTerm.copyBytes(t);
            }
            return true;
        }

        @Override
        public boolean collect(final BytesRef bytes) throws IOException {
            final float boost = boostAtt.getBoost();

            // make sure within a single seg we always collect
            // terms in order
            assert this.compareToLastTerm(bytes);

            //System.out.println("TTR.collect term=" + bytes.utf8ToString() + " boost=" + boost + " ord=" + readerContext.ord);
            // ignore uncompetitive hits
            if (stQueue.size() == maxSize) {
                final ScoreTerm t = stQueue.peek();
                if (boost < t.boost)
                    return true;
                if (boost == t.boost && termComp.compare(bytes, t.bytes) > 0)
                    return true;
            }
            ScoreTerm t = visitedTerms.get(bytes);
            final TermState state = termsEnum.termState();
            assert state != null;
            if (t != null) {
                // if the term is already in the PQ, only update docFreq of term in PQ
                assert t.boost == boost : "boost should be equal in all segment TermsEnums";
                t.termState.register(state, readerContext.ord, termsEnum.docFreq(), termsEnum.totalTermFreq());
            } else {
                // add new entry in PQ, we must clone the term, else it may get overwritten!
                st.bytes.copyBytes(bytes);
                st.boost = boost;
                visitedTerms.put(st.bytes, st);
                assert st.termState.docFreq() == 0;
                st.termState.register(state, readerContext.ord, termsEnum.docFreq(), termsEnum.totalTermFreq());
                stQueue.offer(st);
                // possibly drop entries from queue
                if (stQueue.size() > maxSize) {
                    st = stQueue.poll();
                    visitedTerms.remove(st.bytes);
                    st.termState.clear(); // reset the termstate!
                } else {
                    st = new ScoreTerm(termComp, new TermContext(topReaderContext));
                }
                assert stQueue.size() <= maxSize : "the PQ size must be limited to maxSize";
                // set maxBoostAtt with values to help FuzzyTermsEnum to optimize
                if (stQueue.size() == maxSize) {
                    t = stQueue.peek();
                    maxBoostAtt.setMaxNonCompetitiveBoost(t.boost);
                    maxBoostAtt.setCompetitiveTerm(t.bytes);
                }
            }

            return true;
        }
    });

    final Q q = this.getTopLevelQuery(query);
    final ScoreTerm[] scoreTerms = stQueue.toArray(new ScoreTerm[stQueue.size()]);
    ArrayUtil.timSort(scoreTerms, scoreTermSortByTermComp);

    for (final ScoreTerm st : scoreTerms) {
        final Term term = new Term(query.field, st.bytes);
        assert reader.docFreq(term) == st.termState.docFreq() : "reader DF is " + reader.docFreq(term) + " vs "
                + st.termState.docFreq() + " term=" + term;
        this.addClause(q, term, st.termState.docFreq(), query.getBoost() * st.boost, st.termState); // add to query
    }
    return q;
}

From source file:org.apache.solr.handler.AnalysisRequestHandlerBase.java

License:Apache License

/**
 * Converts the list of Tokens to a list of NamedLists representing the tokens.
 *
 * @param tokenList  Tokens to convert/* w ww.j  a  v  a2  s .  c  om*/
 * @param context The analysis context
 *
 * @return List of NamedLists containing the relevant information taken from the tokens
 */
private List<NamedList> convertTokensToNamedLists(final List<AttributeSource> tokenList,
        AnalysisContext context) {
    final List<NamedList> tokensNamedLists = new ArrayList<NamedList>();
    final FieldType fieldType = context.getFieldType();
    final AttributeSource[] tokens = tokenList.toArray(new AttributeSource[tokenList.size()]);

    // sort the tokens by absoulte position
    ArrayUtil.timSort(tokens, new Comparator<AttributeSource>() {
        @Override
        public int compare(AttributeSource a, AttributeSource b) {
            return arrayCompare(a.getAttribute(TokenTrackingAttribute.class).getPositions(),
                    b.getAttribute(TokenTrackingAttribute.class).getPositions());
        }

        private int arrayCompare(int[] a, int[] b) {
            int p = 0;
            final int stop = Math.min(a.length, b.length);
            while (p < stop) {
                int diff = a[p] - b[p];
                if (diff != 0)
                    return diff;
                p++;
            }
            // One is a prefix of the other, or, they are equal:
            return a.length - b.length;
        }
    });

    for (int i = 0; i < tokens.length; i++) {
        AttributeSource token = tokens[i];
        final NamedList<Object> tokenNamedList = new SimpleOrderedMap<Object>();
        final TermToBytesRefAttribute termAtt = token.getAttribute(TermToBytesRefAttribute.class);
        BytesRef rawBytes = termAtt.getBytesRef();
        termAtt.fillBytesRef();
        final String text = fieldType.indexedToReadable(rawBytes, new CharsRef(rawBytes.length)).toString();
        tokenNamedList.add("text", text);

        if (token.hasAttribute(CharTermAttribute.class)) {
            final String rawText = token.getAttribute(CharTermAttribute.class).toString();
            if (!rawText.equals(text)) {
                tokenNamedList.add("raw_text", rawText);
            }
        }

        tokenNamedList.add("raw_bytes", rawBytes.toString());

        if (context.getTermsToMatch().contains(rawBytes)) {
            tokenNamedList.add("match", true);
        }

        token.reflectWith(new AttributeReflector() {
            @Override
            public void reflect(Class<? extends Attribute> attClass, String key, Object value) {
                // leave out position and bytes term
                if (TermToBytesRefAttribute.class.isAssignableFrom(attClass))
                    return;
                if (CharTermAttribute.class.isAssignableFrom(attClass))
                    return;
                if (PositionIncrementAttribute.class.isAssignableFrom(attClass))
                    return;

                String k = attClass.getName() + '#' + key;

                // map keys for "standard attributes":
                if (ATTRIBUTE_MAPPING.containsKey(k)) {
                    k = ATTRIBUTE_MAPPING.get(k);
                }

                if (value instanceof BytesRef) {
                    final BytesRef p = (BytesRef) value;
                    value = p.toString();
                }

                tokenNamedList.add(k, value);
            }
        });

        tokensNamedLists.add(tokenNamedList);
    }

    return tokensNamedLists;
}

From source file:org.elasticsearch.cluster.routing.allocation.BalanceConfigurationTests.java

License:Apache License

public void testNoRebalanceOnPrimaryOverload() {
    Settings.Builder settings = Settings.builder();
    AllocationService strategy = new AllocationService(settings.build(),
            randomAllocationDeciders(settings.build(),
                    new ClusterSettings(Settings.Builder.EMPTY_SETTINGS,
                            ClusterSettings.BUILT_IN_CLUSTER_SETTINGS),
                    random()),// w  ww. j  a va  2  s . c o  m
            NoopGatewayAllocator.INSTANCE, new ShardsAllocator() {

                public Map<DiscoveryNode, Float> weighShard(RoutingAllocation allocation, ShardRouting shard) {
                    return new HashMap<DiscoveryNode, Float>();
                }

                /*
                 *  // this allocator tries to rebuild this scenario where a rebalance is
                 *  // triggered solely by the primary overload on node [1] where a shard
                 *  // is rebalanced to node 0
                routing_nodes:
                -----node_id[0][V]
                --------[test][0], node[0], [R], s[STARTED]
                --------[test][4], node[0], [R], s[STARTED]
                -----node_id[1][V]
                --------[test][0], node[1], [P], s[STARTED]
                --------[test][1], node[1], [P], s[STARTED]
                --------[test][3], node[1], [R], s[STARTED]
                -----node_id[2][V]
                --------[test][1], node[2], [R], s[STARTED]
                --------[test][2], node[2], [R], s[STARTED]
                --------[test][4], node[2], [P], s[STARTED]
                -----node_id[3][V]
                --------[test][2], node[3], [P], s[STARTED]
                --------[test][3], node[3], [P], s[STARTED]
                ---- unassigned
                */
                public void allocate(RoutingAllocation allocation) {
                    RoutingNodes.UnassignedShards unassigned = allocation.routingNodes().unassigned();
                    ShardRouting[] drain = unassigned.drain();
                    ArrayUtil.timSort(drain, (a, b) -> {
                        return a.primary() ? -1 : 1;
                    }); // we have to allocate primaries first
                    for (ShardRouting sr : drain) {
                        switch (sr.id()) {
                        case 0:
                            if (sr.primary()) {
                                allocation.routingNodes().initializeShard(sr, "node1", null, -1,
                                        allocation.changes());
                            } else {
                                allocation.routingNodes().initializeShard(sr, "node0", null, -1,
                                        allocation.changes());
                            }
                            break;
                        case 1:
                            if (sr.primary()) {
                                allocation.routingNodes().initializeShard(sr, "node1", null, -1,
                                        allocation.changes());
                            } else {
                                allocation.routingNodes().initializeShard(sr, "node2", null, -1,
                                        allocation.changes());
                            }
                            break;
                        case 2:
                            if (sr.primary()) {
                                allocation.routingNodes().initializeShard(sr, "node3", null, -1,
                                        allocation.changes());
                            } else {
                                allocation.routingNodes().initializeShard(sr, "node2", null, -1,
                                        allocation.changes());
                            }
                            break;
                        case 3:
                            if (sr.primary()) {
                                allocation.routingNodes().initializeShard(sr, "node3", null, -1,
                                        allocation.changes());
                            } else {
                                allocation.routingNodes().initializeShard(sr, "node1", null, -1,
                                        allocation.changes());
                            }
                            break;
                        case 4:
                            if (sr.primary()) {
                                allocation.routingNodes().initializeShard(sr, "node2", null, -1,
                                        allocation.changes());
                            } else {
                                allocation.routingNodes().initializeShard(sr, "node0", null, -1,
                                        allocation.changes());
                            }
                            break;
                        }

                    }
                }
            }, EmptyClusterInfoService.INSTANCE);
    MetaData.Builder metaDataBuilder = MetaData.builder();
    RoutingTable.Builder routingTableBuilder = RoutingTable.builder();
    IndexMetaData.Builder indexMeta = IndexMetaData.builder("test").settings(settings(Version.CURRENT))
            .numberOfShards(5).numberOfReplicas(1);
    metaDataBuilder = metaDataBuilder.put(indexMeta);
    MetaData metaData = metaDataBuilder.build();
    for (ObjectCursor<IndexMetaData> cursor : metaData.indices().values()) {
        routingTableBuilder.addAsNew(cursor.value);
    }
    RoutingTable routingTable = routingTableBuilder.build();
    DiscoveryNodes.Builder nodes = DiscoveryNodes.builder();
    for (int i = 0; i < 4; i++) {
        DiscoveryNode node = newNode("node" + i);
        nodes.add(node);
    }

    ClusterState clusterState = ClusterState
            .builder(org.elasticsearch.cluster.ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY))
            .nodes(nodes).metaData(metaData).routingTable(routingTable).build();
    routingTable = strategy.reroute(clusterState, "reroute").routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
    RoutingNodes routingNodes = clusterState.getRoutingNodes();

    for (RoutingNode routingNode : routingNodes) {
        for (ShardRouting shardRouting : routingNode) {
            assertThat(shardRouting.state(), Matchers.equalTo(ShardRoutingState.INITIALIZING));
        }
    }
    strategy = createAllocationService(settings.build());

    logger.info("use the new allocator and check if it moves shards");
    routingNodes = clusterState.getRoutingNodes();
    routingTable = strategy.applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING))
            .routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
    routingNodes = clusterState.getRoutingNodes();
    for (RoutingNode routingNode : routingNodes) {
        for (ShardRouting shardRouting : routingNode) {
            assertThat(shardRouting.state(), Matchers.equalTo(ShardRoutingState.STARTED));
        }
    }

    logger.info("start the replica shards");
    routingTable = strategy.applyStartedShards(clusterState, routingNodes.shardsWithState(INITIALIZING))
            .routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
    routingNodes = clusterState.getRoutingNodes();

    for (RoutingNode routingNode : routingNodes) {
        for (ShardRouting shardRouting : routingNode) {
            assertThat(shardRouting.state(), Matchers.equalTo(ShardRoutingState.STARTED));
        }
    }

    logger.info("rebalancing");
    routingTable = strategy.reroute(clusterState, "reroute").routingTable();
    clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
    routingNodes = clusterState.getRoutingNodes();

    for (RoutingNode routingNode : routingNodes) {
        for (ShardRouting shardRouting : routingNode) {
            assertThat(shardRouting.state(), Matchers.equalTo(ShardRoutingState.STARTED));
        }
    }

}

From source file:org.elasticsearch.indices.recovery.BlobRecoverySourceHandler.java

License:Apache License

/**
 * Perform phase1 of the recovery operations. Once this {@link SnapshotIndexCommit}
 * snapshot has been performed no commit operations (files being fsync'd)
 * are effectively allowed on this index until all recovery phases are done
 *
 * Phase1 examines the segment files on the target node and copies over the
 * segments that are missing. Only segments that have the same size and
 * checksum can be reused/*from w w w .ja  va  2  s  . c o m*/
 *
 * {@code InternalEngine#recover} is responsible for snapshotting the index
 * and releasing the snapshot once all 3 phases of recovery are complete
 */
@Override
public void phase1(final SnapshotIndexCommit snapshot) throws ElasticsearchException {
    cancellableThreads.checkForCancel();
    // Total size of segment files that are recovered
    long totalSize = 0;
    // Total size of segment files that were able to be re-used
    long existingTotalSize = 0;
    final Store store = shard.store();
    store.incRef();
    try {
        if (blobRecoveryHandler != null) {
            blobRecoveryHandler.phase1();
        }
        StopWatch stopWatch = new StopWatch().start();
        final Store.MetadataSnapshot recoverySourceMetadata = store.getMetadata(snapshot);
        for (String name : snapshot.getFiles()) {
            final StoreFileMetaData md = recoverySourceMetadata.get(name);
            if (md == null) {
                logger.info("Snapshot differs from actual index for file: {} meta: {}", name,
                        recoverySourceMetadata.asMap());
                throw new CorruptIndexException(
                        "Snapshot differs from actual index - maybe index was removed metadata has "
                                + recoverySourceMetadata.asMap().size() + " files");
            }
        }
        String recoverySourceSyncId = recoverySourceMetadata.getSyncId();
        String recoveryTargetSyncId = request.metadataSnapshot().getSyncId();
        final boolean recoverWithSyncId = recoverySourceSyncId != null
                && recoverySourceSyncId.equals(recoveryTargetSyncId);
        if (recoverWithSyncId) {
            final long numDocsTarget = request.metadataSnapshot().getNumDocs();
            final long numDocsSource = recoverySourceMetadata.getNumDocs();
            if (numDocsTarget != numDocsSource) {
                throw new IllegalStateException("try to recover " + request.shardId()
                        + " from primary shard with sync id but number of docs differ: " + numDocsTarget + " ("
                        + request.sourceNode().getName() + ", primary) vs " + numDocsSource + "("
                        + request.targetNode().getName() + ")");
            }
            // we shortcut recovery here because we have nothing to copy. but we must still start the engine on the target.
            // so we don't return here
            logger.trace(
                    "[{}][{}] skipping [phase1] to {} - identical sync id [{}] found on both source and target",
                    indexName, shardId, request.targetNode(), recoverySourceSyncId);
        } else {

            // Generate a "diff" of all the identical, different, and missing
            // segment files on the target node, using the existing files on
            // the source node
            final Store.RecoveryDiff diff = recoverySourceMetadata.recoveryDiff(request.metadataSnapshot());
            for (StoreFileMetaData md : diff.identical) {
                response.phase1ExistingFileNames.add(md.name());
                response.phase1ExistingFileSizes.add(md.length());
                existingTotalSize += md.length();
                if (logger.isTraceEnabled()) {
                    logger.trace(
                            "[{}][{}] recovery [phase1] to {}: not recovering [{}], exists in local store and has checksum [{}], size [{}]",
                            indexName, shardId, request.targetNode(), md.name(), md.checksum(), md.length());
                }
                totalSize += md.length();
            }
            for (StoreFileMetaData md : Iterables.concat(diff.different, diff.missing)) {
                if (request.metadataSnapshot().asMap().containsKey(md.name())) {
                    logger.trace(
                            "[{}][{}] recovery [phase1] to {}: recovering [{}], exists in local store, but is different: remote [{}], local [{}]",
                            indexName, shardId, request.targetNode(), md.name(),
                            request.metadataSnapshot().get(md.name()), md);
                } else {
                    logger.trace("[{}][{}] recovery [phase1] to {}: recovering [{}], does not exists in remote",
                            indexName, shardId, request.targetNode(), md.name());
                }
                response.phase1FileNames.add(md.name());
                response.phase1FileSizes.add(md.length());
                totalSize += md.length();
            }
            response.phase1TotalSize = totalSize;
            response.phase1ExistingTotalSize = existingTotalSize;

            logger.trace(
                    "[{}][{}] recovery [phase1] to {}: recovering_files [{}] with total_size [{}], reusing_files [{}] with total_size [{}]",
                    indexName, shardId, request.targetNode(), response.phase1FileNames.size(),
                    new ByteSizeValue(totalSize), response.phase1ExistingFileNames.size(),
                    new ByteSizeValue(existingTotalSize));
            cancellableThreads.execute(new Interruptable() {
                @Override
                public void run() throws InterruptedException {
                    RecoveryFilesInfoRequest recoveryInfoFilesRequest = new RecoveryFilesInfoRequest(
                            request.recoveryId(), request.shardId(), response.phase1FileNames,
                            response.phase1FileSizes, response.phase1ExistingFileNames,
                            response.phase1ExistingFileSizes, shard.translog().estimatedNumberOfOperations(),
                            response.phase1TotalSize, response.phase1ExistingTotalSize);
                    transportService.submitRequest(request.targetNode(), RecoveryTarget.Actions.FILES_INFO,
                            recoveryInfoFilesRequest,
                            TransportRequestOptions.options()
                                    .withTimeout(recoverySettings.internalActionTimeout()),
                            EmptyTransportResponseHandler.INSTANCE_SAME).txGet();
                }
            });

            // This latch will be used to wait until all files have been transferred to the target node
            final CountDownLatch latch = new CountDownLatch(response.phase1FileNames.size());
            final CopyOnWriteArrayList<Throwable> exceptions = new CopyOnWriteArrayList<>();
            final AtomicReference<Throwable> corruptedEngine = new AtomicReference<>();
            int fileIndex = 0;
            ThreadPoolExecutor pool;

            // How many bytes we've copied since we last called RateLimiter.pause
            final AtomicLong bytesSinceLastPause = new AtomicLong();

            for (final String name : response.phase1FileNames) {
                long fileSize = response.phase1FileSizes.get(fileIndex);

                // Files are split into two categories, files that are "small"
                // (under 5mb) and other files. Small files are transferred
                // using a separate thread pool dedicated to small files.
                //
                // The idea behind this is that while we are transferring an
                // older, large index, a user may create a new index, but that
                // index will not be able to recover until the large index
                // finishes, by using two different thread pools we can allow
                // tiny files (like segments for a brand new index) to be
                // recovered while ongoing large segment recoveries are
                // happening. It also allows these pools to be configured
                // separately.
                if (fileSize > RecoverySettings.SMALL_FILE_CUTOFF_BYTES) {
                    pool = recoverySettings.concurrentStreamPool();
                } else {
                    pool = recoverySettings.concurrentSmallFileStreamPool();
                }

                pool.execute(new AbstractRunnable() {
                    @Override
                    public void onFailure(Throwable t) {
                        // we either got rejected or the store can't be incremented / we are canceled
                        logger.debug("Failed to transfer file [" + name + "] on recovery");
                    }

                    public void onAfter() {
                        // Signify this file has completed by decrementing the latch
                        latch.countDown();
                    }

                    @Override
                    protected void doRun() {
                        cancellableThreads.checkForCancel();
                        store.incRef();
                        final StoreFileMetaData md = recoverySourceMetadata.get(name);
                        try (final IndexInput indexInput = store.directory().openInput(name,
                                IOContext.READONCE)) {
                            final int BUFFER_SIZE = (int) recoverySettings.fileChunkSize().bytes();
                            final byte[] buf = new byte[BUFFER_SIZE];
                            boolean shouldCompressRequest = recoverySettings.compress();
                            if (CompressorFactory.isCompressed(indexInput)) {
                                shouldCompressRequest = false;
                            }

                            final long len = indexInput.length();
                            long readCount = 0;
                            final TransportRequestOptions requestOptions = TransportRequestOptions.options()
                                    .withCompress(shouldCompressRequest)
                                    .withType(TransportRequestOptions.Type.RECOVERY)
                                    .withTimeout(recoverySettings.internalActionTimeout());

                            while (readCount < len) {
                                if (shard.state() == IndexShardState.CLOSED) { // check if the shard got closed on us
                                    throw new IndexShardClosedException(shard.shardId());
                                }
                                int toRead = readCount + BUFFER_SIZE > len ? (int) (len - readCount)
                                        : BUFFER_SIZE;
                                final long position = indexInput.getFilePointer();

                                // Pause using the rate limiter, if desired, to throttle the recovery
                                RateLimiter rl = recoverySettings.rateLimiter();
                                long throttleTimeInNanos = 0;
                                if (rl != null) {
                                    long bytes = bytesSinceLastPause.addAndGet(toRead);
                                    if (bytes > rl.getMinPauseCheckBytes()) {
                                        // Time to pause
                                        bytesSinceLastPause.addAndGet(-bytes);
                                        throttleTimeInNanos = rl.pause(bytes);
                                        shard.recoveryStats().addThrottleTime(throttleTimeInNanos);
                                    }
                                }
                                indexInput.readBytes(buf, 0, toRead, false);
                                final BytesArray content = new BytesArray(buf, 0, toRead);
                                readCount += toRead;
                                final boolean lastChunk = readCount == len;
                                final RecoveryFileChunkRequest fileChunkRequest = new RecoveryFileChunkRequest(
                                        request.recoveryId(), request.shardId(), md, position, content,
                                        lastChunk, shard.translog().estimatedNumberOfOperations(),
                                        throttleTimeInNanos);
                                cancellableThreads.execute(new Interruptable() {
                                    @Override
                                    public void run() throws InterruptedException {
                                        // Actually send the file chunk to the target node, waiting for it to complete
                                        transportService.submitRequest(request.targetNode(),
                                                RecoveryTarget.Actions.FILE_CHUNK, fileChunkRequest,
                                                requestOptions, EmptyTransportResponseHandler.INSTANCE_SAME)
                                                .txGet();
                                    }
                                });

                            }
                        } catch (Throwable e) {
                            final Throwable corruptIndexException;
                            if ((corruptIndexException = ExceptionsHelper.unwrapCorruption(e)) != null) {
                                if (store.checkIntegrity(md) == false) { // we are corrupted on the primary -- fail!
                                    logger.warn("{} Corrupted file detected {} checksum mismatch",
                                            shard.shardId(), md);
                                    if (corruptedEngine.compareAndSet(null, corruptIndexException) == false) {
                                        // if we are not the first exception, add ourselves as suppressed to the main one:
                                        corruptedEngine.get().addSuppressed(e);
                                    }
                                } else { // corruption has happened on the way to replica
                                    RemoteTransportException exception = new RemoteTransportException(
                                            "File corruption occurred on recovery but checksums are ok", null);
                                    exception.addSuppressed(e);
                                    exceptions.add(0, exception); // last exception first
                                    logger.warn(
                                            "{} Remote file corruption on node {}, recovering {}. local checksum OK",
                                            corruptIndexException, shard.shardId(), request.targetNode(), md);

                                }
                            } else {
                                exceptions.add(0, e); // last exceptions first
                            }
                        } finally {
                            store.decRef();

                        }
                    }
                });
                fileIndex++;
            }

            cancellableThreads.execute(new Interruptable() {
                @Override
                public void run() throws InterruptedException {
                    // Wait for all files that need to be transferred to finish transferring
                    latch.await();
                }
            });

            if (corruptedEngine.get() != null) {
                throw corruptedEngine.get();
            } else {
                ExceptionsHelper.rethrowAndSuppress(exceptions);
            }

            cancellableThreads.execute(new Interruptable() {
                @Override
                public void run() throws InterruptedException {
                    // Send the CLEAN_FILES request, which takes all of the files that
                    // were transferred and renames them from their temporary file
                    // names to the actual file names. It also writes checksums for
                    // the files after they have been renamed.
                    //
                    // Once the files have been renamed, any other files that are not
                    // related to this recovery (out of date segments, for example)
                    // are deleted
                    try {
                        transportService.submitRequest(request.targetNode(), RecoveryTarget.Actions.CLEAN_FILES,
                                new RecoveryCleanFilesRequest(request.recoveryId(), shard.shardId(),
                                        recoverySourceMetadata, shard.translog().estimatedNumberOfOperations()),
                                TransportRequestOptions.options()
                                        .withTimeout(recoverySettings.internalActionTimeout()),
                                EmptyTransportResponseHandler.INSTANCE_SAME).txGet();
                    } catch (RemoteTransportException remoteException) {
                        final IOException corruptIndexException;
                        // we realized that after the index was copied and we wanted to finalize the recovery
                        // the index was corrupted:
                        //   - maybe due to a broken segments file on an empty index (transferred with no checksum)
                        //   - maybe due to old segments without checksums or length only checks
                        if ((corruptIndexException = ExceptionsHelper
                                .unwrapCorruption(remoteException)) != null) {
                            try {
                                final Store.MetadataSnapshot recoverySourceMetadata = store
                                        .getMetadata(snapshot);
                                StoreFileMetaData[] metadata = Iterables.toArray(recoverySourceMetadata,
                                        StoreFileMetaData.class);
                                ArrayUtil.timSort(metadata, new Comparator<StoreFileMetaData>() {
                                    @Override
                                    public int compare(StoreFileMetaData o1, StoreFileMetaData o2) {
                                        return Long.compare(o1.length(), o2.length()); // check small files first
                                    }
                                });
                                for (StoreFileMetaData md : metadata) {
                                    logger.debug(
                                            "{} checking integrity for file {} after remove corruption exception",
                                            shard.shardId(), md);
                                    if (store.checkIntegrity(md) == false) { // we are corrupted on the primary -- fail!
                                        logger.warn("{} Corrupted file detected {} checksum mismatch",
                                                shard.shardId(), md);
                                        throw corruptIndexException;
                                    }
                                }
                            } catch (IOException ex) {
                                remoteException.addSuppressed(ex);
                                throw remoteException;
                            }
                            // corruption has happened on the way to replica
                            RemoteTransportException exception = new RemoteTransportException(
                                    "File corruption occurred on recovery but checksums are ok", null);
                            exception.addSuppressed(remoteException);
                            logger.warn(
                                    "{} Remote file corruption during finalization on node {}, recovering {}. local checksum OK",
                                    corruptIndexException, shard.shardId(), request.targetNode());
                        } else {
                            throw remoteException;
                        }
                    }
                }
            });
        }
        stopWatch.stop();
        logger.trace("[{}][{}] recovery [phase1] to {}: took [{}]", indexName, shardId, request.targetNode(),
                stopWatch.totalTime());
        response.phase1Time = stopWatch.totalTime().millis();
    } catch (Throwable e) {
        throw new RecoverFilesRecoveryException(request.shardId(), response.phase1FileNames.size(),
                new ByteSizeValue(totalSize), e);
    } finally {
        store.decRef();
    }
}

From source file:org.elasticsearch.indices.recovery.RecoverySourceHandler.java

License:Apache License

/**
 * Perform phase1 of the recovery operations. Once this {@link SnapshotIndexCommit}
 * snapshot has been performed no commit operations (files being fsync'd)
 * are effectively allowed on this index until all recovery phases are done
 * <p>/*  w  w w .  j  ava2 s  . c  om*/
 * Phase1 examines the segment files on the target node and copies over the
 * segments that are missing. Only segments that have the same size and
 * checksum can be reused
 */
public void phase1(final SnapshotIndexCommit snapshot, final Translog.View translogView) {
    cancellableThreads.checkForCancel();
    // Total size of segment files that are recovered
    long totalSize = 0;
    // Total size of segment files that were able to be re-used
    long existingTotalSize = 0;
    final Store store = shard.store();
    store.incRef();
    try {
        StopWatch stopWatch = new StopWatch().start();
        final Store.MetadataSnapshot recoverySourceMetadata;
        try {
            recoverySourceMetadata = store.getMetadata(snapshot);
        } catch (CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException ex) {
            shard.engine().failEngine("recovery", ex);
            throw ex;
        }
        for (String name : snapshot.getFiles()) {
            final StoreFileMetaData md = recoverySourceMetadata.get(name);
            if (md == null) {
                logger.info("Snapshot differs from actual index for file: {} meta: {}", name,
                        recoverySourceMetadata.asMap());
                throw new CorruptIndexException(
                        "Snapshot differs from actual index - maybe index was removed metadata has "
                                + recoverySourceMetadata.asMap().size() + " files",
                        name);
            }
        }
        // Generate a "diff" of all the identical, different, and missing
        // segment files on the target node, using the existing files on
        // the source node
        String recoverySourceSyncId = recoverySourceMetadata.getSyncId();
        String recoveryTargetSyncId = request.metadataSnapshot().getSyncId();
        final boolean recoverWithSyncId = recoverySourceSyncId != null
                && recoverySourceSyncId.equals(recoveryTargetSyncId);
        if (recoverWithSyncId) {
            final long numDocsTarget = request.metadataSnapshot().getNumDocs();
            final long numDocsSource = recoverySourceMetadata.getNumDocs();
            if (numDocsTarget != numDocsSource) {
                throw new IllegalStateException("try to recover " + request.shardId()
                        + " from primary shard with sync id but number of docs differ: " + numDocsTarget + " ("
                        + request.sourceNode().getName() + ", primary) vs " + numDocsSource + "("
                        + request.targetNode().getName() + ")");
            }
            // we shortcut recovery here because we have nothing to copy. but we must still start the engine on the target.
            // so we don't return here
            logger.trace(
                    "[{}][{}] skipping [phase1] to {} - identical sync id [{}] found on both source and target",
                    indexName, shardId, request.targetNode(), recoverySourceSyncId);
        } else {
            final Store.RecoveryDiff diff = recoverySourceMetadata.recoveryDiff(request.metadataSnapshot());
            for (StoreFileMetaData md : diff.identical) {
                response.phase1ExistingFileNames.add(md.name());
                response.phase1ExistingFileSizes.add(md.length());
                existingTotalSize += md.length();
                if (logger.isTraceEnabled()) {
                    logger.trace(
                            "[{}][{}] recovery [phase1] to {}: not recovering [{}], exists in local store and has checksum [{}], size [{}]",
                            indexName, shardId, request.targetNode(), md.name(), md.checksum(), md.length());
                }
                totalSize += md.length();
            }
            for (StoreFileMetaData md : Iterables.concat(diff.different, diff.missing)) {
                if (request.metadataSnapshot().asMap().containsKey(md.name())) {
                    logger.trace(
                            "[{}][{}] recovery [phase1] to {}: recovering [{}], exists in local store, but is different: remote [{}], local [{}]",
                            indexName, shardId, request.targetNode(), md.name(),
                            request.metadataSnapshot().asMap().get(md.name()), md);
                } else {
                    logger.trace("[{}][{}] recovery [phase1] to {}: recovering [{}], does not exists in remote",
                            indexName, shardId, request.targetNode(), md.name());
                }
                response.phase1FileNames.add(md.name());
                response.phase1FileSizes.add(md.length());
                totalSize += md.length();
            }

            response.phase1TotalSize = totalSize;
            response.phase1ExistingTotalSize = existingTotalSize;

            logger.trace(
                    "[{}][{}] recovery [phase1] to {}: recovering_files [{}] with total_size [{}], reusing_files [{}] with total_size [{}]",
                    indexName, shardId, request.targetNode(), response.phase1FileNames.size(),
                    new ByteSizeValue(totalSize), response.phase1ExistingFileNames.size(),
                    new ByteSizeValue(existingTotalSize));
            cancellableThreads.execute(new Interruptable() {
                @Override
                public void run() throws InterruptedException {
                    RecoveryFilesInfoRequest recoveryInfoFilesRequest = new RecoveryFilesInfoRequest(
                            request.recoveryId(), request.shardId(), response.phase1FileNames,
                            response.phase1FileSizes, response.phase1ExistingFileNames,
                            response.phase1ExistingFileSizes, translogView.totalOperations());
                    transportService.submitRequest(request.targetNode(), RecoveryTarget.Actions.FILES_INFO,
                            recoveryInfoFilesRequest,
                            TransportRequestOptions.builder()
                                    .withTimeout(recoverySettings.internalActionTimeout()).build(),
                            EmptyTransportResponseHandler.INSTANCE_SAME).txGet();
                }
            });

            // This latch will be used to wait until all files have been transferred to the target node
            final CountDownLatch latch = new CountDownLatch(response.phase1FileNames.size());
            final CopyOnWriteArrayList<Throwable> exceptions = new CopyOnWriteArrayList<>();
            final AtomicReference<Throwable> corruptedEngine = new AtomicReference<>();
            int fileIndex = 0;
            ThreadPoolExecutor pool;

            // How many bytes we've copied since we last called RateLimiter.pause
            final AtomicLong bytesSinceLastPause = new AtomicLong();

            for (final String name : response.phase1FileNames) {
                long fileSize = response.phase1FileSizes.get(fileIndex);

                // Files are split into two categories, files that are "small"
                // (under 5mb) and other files. Small files are transferred
                // using a separate thread pool dedicated to small files.
                //
                // The idea behind this is that while we are transferring an
                // older, large index, a user may create a new index, but that
                // index will not be able to recover until the large index
                // finishes, by using two different thread pools we can allow
                // tiny files (like segments for a brand new index) to be
                // recovered while ongoing large segment recoveries are
                // happening. It also allows these pools to be configured
                // separately.
                if (fileSize > RecoverySettings.SMALL_FILE_CUTOFF_BYTES) {
                    pool = recoverySettings.concurrentStreamPool();
                } else {
                    pool = recoverySettings.concurrentSmallFileStreamPool();
                }

                pool.execute(new AbstractRunnable() {
                    @Override
                    public void onFailure(Throwable t) {
                        // we either got rejected or the store can't be incremented / we are canceled
                        logger.debug("Failed to transfer file [" + name + "] on recovery");
                    }

                    @Override
                    public void onAfter() {
                        // Signify this file has completed by decrementing the latch
                        latch.countDown();
                    }

                    @Override
                    protected void doRun() {
                        cancellableThreads.checkForCancel();
                        store.incRef();
                        final StoreFileMetaData md = recoverySourceMetadata.get(name);
                        try (final IndexInput indexInput = store.directory().openInput(name,
                                IOContext.READONCE)) {
                            final int BUFFER_SIZE = (int) Math.max(1, recoverySettings.fileChunkSize().bytes()); // at least one!
                            final byte[] buf = new byte[BUFFER_SIZE];
                            boolean shouldCompressRequest = recoverySettings.compress();
                            if (CompressorFactory.isCompressed(indexInput)) {
                                shouldCompressRequest = false;
                            }

                            final long len = indexInput.length();
                            long readCount = 0;
                            final TransportRequestOptions requestOptions = TransportRequestOptions.builder()
                                    .withCompress(shouldCompressRequest)
                                    .withType(TransportRequestOptions.Type.RECOVERY)
                                    .withTimeout(recoverySettings.internalActionTimeout()).build();

                            while (readCount < len) {
                                if (shard.state() == IndexShardState.CLOSED) { // check if the shard got closed on us
                                    throw new IndexShardClosedException(shard.shardId());
                                }
                                int toRead = readCount + BUFFER_SIZE > len ? (int) (len - readCount)
                                        : BUFFER_SIZE;
                                final long position = indexInput.getFilePointer();

                                // Pause using the rate limiter, if desired, to throttle the recovery
                                RateLimiter rl = recoverySettings.rateLimiter();
                                long throttleTimeInNanos = 0;
                                if (rl != null) {
                                    long bytes = bytesSinceLastPause.addAndGet(toRead);
                                    if (bytes > rl.getMinPauseCheckBytes()) {
                                        // Time to pause
                                        bytesSinceLastPause.addAndGet(-bytes);
                                        throttleTimeInNanos = rl.pause(bytes);
                                        shard.recoveryStats().addThrottleTime(throttleTimeInNanos);
                                    }
                                }
                                indexInput.readBytes(buf, 0, toRead, false);
                                final BytesArray content = new BytesArray(buf, 0, toRead);
                                readCount += toRead;
                                final boolean lastChunk = readCount == len;
                                final RecoveryFileChunkRequest fileChunkRequest = new RecoveryFileChunkRequest(
                                        request.recoveryId(), request.shardId(), md, position, content,
                                        lastChunk, translogView.totalOperations(), throttleTimeInNanos);
                                cancellableThreads.execute(new Interruptable() {
                                    @Override
                                    public void run() throws InterruptedException {
                                        // Actually send the file chunk to the target node, waiting for it to complete
                                        transportService.submitRequest(request.targetNode(),
                                                RecoveryTarget.Actions.FILE_CHUNK, fileChunkRequest,
                                                requestOptions, EmptyTransportResponseHandler.INSTANCE_SAME)
                                                .txGet();
                                    }
                                });

                            }
                        } catch (Throwable e) {
                            final Throwable corruptIndexException;
                            if ((corruptIndexException = ExceptionsHelper.unwrapCorruption(e)) != null) {
                                if (store.checkIntegrityNoException(md) == false) { // we are corrupted on the primary -- fail!
                                    logger.warn("{} Corrupted file detected {} checksum mismatch",
                                            shard.shardId(), md);
                                    if (corruptedEngine.compareAndSet(null, corruptIndexException) == false) {
                                        // if we are not the first exception, add ourselves as suppressed to the main one:
                                        corruptedEngine.get().addSuppressed(e);
                                    }
                                } else { // corruption has happened on the way to replica
                                    RemoteTransportException exception = new RemoteTransportException(
                                            "File corruption occurred on recovery but checksums are ok", null);
                                    exception.addSuppressed(e);
                                    exceptions.add(0, exception); // last exception first
                                    logger.warn(
                                            "{} Remote file corruption on node {}, recovering {}. local checksum OK",
                                            corruptIndexException, shard.shardId(), request.targetNode(), md);

                                }
                            } else {
                                exceptions.add(0, e); // last exceptions first
                            }
                        } finally {
                            store.decRef();

                        }
                    }
                });
                fileIndex++;
            }

            cancellableThreads.execute(new Interruptable() {
                @Override
                public void run() throws InterruptedException {
                    // Wait for all files that need to be transferred to finish transferring
                    latch.await();
                }
            });

            if (corruptedEngine.get() != null) {
                shard.engine().failEngine("recovery", corruptedEngine.get());
                throw corruptedEngine.get();
            } else {
                ExceptionsHelper.rethrowAndSuppress(exceptions);
            }

            cancellableThreads.execute(new Interruptable() {
                @Override
                public void run() throws InterruptedException {
                    // Send the CLEAN_FILES request, which takes all of the files that
                    // were transferred and renames them from their temporary file
                    // names to the actual file names. It also writes checksums for
                    // the files after they have been renamed.
                    //
                    // Once the files have been renamed, any other files that are not
                    // related to this recovery (out of date segments, for example)
                    // are deleted
                    try {
                        transportService.submitRequest(request.targetNode(), RecoveryTarget.Actions.CLEAN_FILES,
                                new RecoveryCleanFilesRequest(request.recoveryId(), shard.shardId(),
                                        recoverySourceMetadata, translogView.totalOperations()),
                                TransportRequestOptions.builder()
                                        .withTimeout(recoverySettings.internalActionTimeout()).build(),
                                EmptyTransportResponseHandler.INSTANCE_SAME).txGet();
                    } catch (RemoteTransportException remoteException) {
                        final IOException corruptIndexException;
                        // we realized that after the index was copied and we wanted to finalize the recovery
                        // the index was corrupted:
                        //   - maybe due to a broken segments file on an empty index (transferred with no checksum)
                        //   - maybe due to old segments without checksums or length only checks
                        if ((corruptIndexException = ExceptionsHelper
                                .unwrapCorruption(remoteException)) != null) {
                            try {
                                final Store.MetadataSnapshot recoverySourceMetadata = store
                                        .getMetadata(snapshot);
                                StoreFileMetaData[] metadata = Iterables.toArray(recoverySourceMetadata,
                                        StoreFileMetaData.class);
                                ArrayUtil.timSort(metadata, new Comparator<StoreFileMetaData>() {
                                    @Override
                                    public int compare(StoreFileMetaData o1, StoreFileMetaData o2) {
                                        return Long.compare(o1.length(), o2.length()); // check small files first
                                    }
                                });
                                for (StoreFileMetaData md : metadata) {
                                    logger.debug(
                                            "{} checking integrity for file {} after remove corruption exception",
                                            shard.shardId(), md);
                                    if (store.checkIntegrityNoException(md) == false) { // we are corrupted on the primary -- fail!
                                        shard.engine().failEngine("recovery", corruptIndexException);
                                        logger.warn("{} Corrupted file detected {} checksum mismatch",
                                                shard.shardId(), md);
                                        throw corruptIndexException;
                                    }
                                }
                            } catch (IOException ex) {
                                remoteException.addSuppressed(ex);
                                throw remoteException;
                            }
                            // corruption has happened on the way to replica
                            RemoteTransportException exception = new RemoteTransportException(
                                    "File corruption occurred on recovery but checksums are ok", null);
                            exception.addSuppressed(remoteException);
                            logger.warn(
                                    "{} Remote file corruption during finalization on node {}, recovering {}. local checksum OK",
                                    corruptIndexException, shard.shardId(), request.targetNode());
                            throw exception;
                        } else {
                            throw remoteException;
                        }
                    }
                }
            });
        }

        prepareTargetForTranslog(translogView);

        logger.trace("[{}][{}] recovery [phase1] to {}: took [{}]", indexName, shardId, request.targetNode(),
                stopWatch.totalTime());
        response.phase1Time = stopWatch.totalTime().millis();
    } catch (Throwable e) {
        throw new RecoverFilesRecoveryException(request.shardId(), response.phase1FileNames.size(),
                new ByteSizeValue(totalSize), e);
    } finally {
        store.decRef();
    }
}

From source file:org.elasticsearch.messy.tests.RandomScoreFunctionTests.java

License:Apache License

public void testConsistentHitsWithSameSeed() throws Exception {
    createIndex("test");
    ensureGreen(); // make sure we are done otherwise preference could change?
    int docCount = randomIntBetween(100, 200);
    for (int i = 0; i < docCount; i++) {
        index("test", "type", "" + i, jsonBuilder().startObject().endObject());
    }/*  www  .  j a va2  s .  co m*/
    flush();
    refresh();
    int outerIters = scaledRandomIntBetween(10, 20);
    for (int o = 0; o < outerIters; o++) {
        final int seed = randomInt();
        String preference = randomRealisticUnicodeOfLengthBetween(1, 10); // at least one char!!
        // randomPreference should not start with '_' (reserved for known preference types (e.g. _shards, _primary)
        while (preference.startsWith("_")) {
            preference = randomRealisticUnicodeOfLengthBetween(1, 10);
        }
        int innerIters = scaledRandomIntBetween(2, 5);
        SearchHit[] hits = null;
        for (int i = 0; i < innerIters; i++) {
            SearchResponse searchResponse = client().prepareSearch().setSize(docCount) // get all docs otherwise we are prone to tie-breaking
                    .setPreference(preference)
                    .setQuery(functionScoreQuery(matchAllQuery(), randomFunction(seed))).execute().actionGet();
            assertThat("Failures " + Arrays.toString(searchResponse.getShardFailures()),
                    searchResponse.getShardFailures().length, CoreMatchers.equalTo(0));
            final int hitCount = searchResponse.getHits().getHits().length;
            final SearchHit[] currentHits = searchResponse.getHits().getHits();
            ArrayUtil.timSort(currentHits, new Comparator<SearchHit>() {
                @Override
                public int compare(SearchHit o1, SearchHit o2) {
                    // for tie-breaking we have to resort here since if the score is
                    // identical we rely on collection order which might change.
                    int cmp = Float.compare(o1.getScore(), o2.getScore());
                    return cmp == 0 ? o1.getId().compareTo(o2.getId()) : cmp;
                }
            });
            if (i == 0) {
                assertThat(hits, nullValue());
                hits = currentHits;
            } else {
                assertThat(hits.length, equalTo(searchResponse.getHits().getHits().length));
                for (int j = 0; j < hitCount; j++) {
                    assertThat("" + j, currentHits[j].score(), equalTo(hits[j].score()));
                    assertThat("" + j, currentHits[j].id(), equalTo(hits[j].id()));
                }
            }

            // randomly change some docs to get them in different segments
            int numDocsToChange = randomIntBetween(20, 50);
            while (numDocsToChange > 0) {
                int doc = randomInt(docCount - 1);// watch out this is inclusive the max values!
                index("test", "type", "" + doc, jsonBuilder().startObject().endObject());
                --numDocsToChange;
            }
            flush();
            refresh();
        }
    }
}

From source file:org.elasticsearch.mlt.MoreLikeThisActionTests.java

License:Apache License

@Test
public void testCompareMoreLikeThisDSLWithAPI() throws Exception {
    logger.info("Creating index test");
    assertAcked(prepareCreate("test").addMapping("type1",
            jsonBuilder().startObject().startObject("type1").startObject("properties").startObject("text")
                    .field("type", "string").endObject().endObject().endObject().endObject()));

    logger.info("Running Cluster Health");
    assertThat(ensureGreen(), equalTo(ClusterHealthStatus.GREEN));

    logger.info("Indexing...");
    String[] texts = new String[] { "Apache Lucene", "free and open source", "information retrieval",
            "software library", "programmed in Java", "Doug Cutting", "Apache Software Foundation",
            "Apache Software License", "Lucene programming languages",
            "Delphi, Perl, C#, C++, Python, Ruby, and PHP" };
    List<IndexRequestBuilder> builders = new ArrayList<>(10);
    for (int i = 0; i < texts.length; i++) {
        builders.add(/*w  w w. java2 s. c o  m*/
                client().prepareIndex("test", "type1").setSource("text", texts[i]).setId(String.valueOf(i)));
    }
    indexRandom(true, false, builders);

    int iters = between(10, 20);
    for (int j = 0; j < iters; j++) {
        logger.info("Running MoreLikeThis DSL with IDs");
        String id = String.valueOf(getRandom().nextInt(texts.length));
        Client client = client();
        MoreLikeThisQueryBuilder queryBuilder = QueryBuilders.moreLikeThisQuery("text").ids(id).minTermFreq(1)
                .minDocFreq(1).minimumShouldMatch("0%");
        SearchResponse mltResponseDSL = client.prepareSearch().setSearchType(SearchType.QUERY_THEN_FETCH)
                .setTypes("type1").setQuery(queryBuilder).setSize(texts.length).execute().actionGet();
        assertSearchResponse(mltResponseDSL);

        logger.info("Running MoreLikeThis API");
        MoreLikeThisRequest mltRequest = moreLikeThisRequest("test").type("type1").searchSize(texts.length)
                .id(id).minTermFreq(1).minDocFreq(1).minimumShouldMatch("0%");
        SearchResponse mltResponseAPI = client.moreLikeThis(mltRequest).actionGet();
        assertSearchResponse(mltResponseAPI);

        logger.info("Ensure the documents and scores returned are the same.");
        SearchHit[] hitsDSL = mltResponseDSL.getHits().hits();
        SearchHit[] hitsAPI = mltResponseAPI.getHits().hits();

        // we have to resort since the results might come from
        // different shards and docIDs that are used for tie-breaking might not be the same on the shards
        Comparator<SearchHit> cmp = new Comparator<SearchHit>() {

            @Override
            public int compare(SearchHit o1, SearchHit o2) {
                if (Float.compare(o1.getScore(), o2.getScore()) == 0) {
                    return o1.getId().compareTo(o2.getId());
                }
                return Float.compare(o1.getScore(), o2.getScore());
            }
        };
        ArrayUtil.timSort(hitsDSL, cmp);
        ArrayUtil.timSort(hitsAPI, cmp);
        assertThat("Not the same number of results.", hitsAPI.length, equalTo(hitsDSL.length));
        for (int i = 0; i < hitsDSL.length; i++) {
            assertThat("Expected id: " + hitsDSL[i].getId() + " at position " + i + " but wasn't.",
                    hitsAPI[i].getId(), equalTo(hitsDSL[i].getId()));
            assertThat("Expected score: " + hitsDSL[i].getScore() + " at position " + i + " but wasn't.",
                    hitsAPI[i].getScore(), equalTo(hitsDSL[i].getScore()));
        }
    }
}

From source file:org.elasticsearch.search.functionscore.RandomScoreFunctionIT.java

License:Apache License

public void testConsistentHitsWithSameSeed() throws Exception {
    createIndex("test");
    ensureGreen(); // make sure we are done otherwise preference could change?
    int docCount = randomIntBetween(100, 200);
    for (int i = 0; i < docCount; i++) {
        index("test", "type", "" + i, jsonBuilder().startObject().endObject());
    }/*from  w  w  w.ja  v a 2 s  . c om*/
    flush();
    refresh();
    int outerIters = scaledRandomIntBetween(10, 20);
    for (int o = 0; o < outerIters; o++) {
        final int seed = randomInt();
        String preference = randomRealisticUnicodeOfLengthBetween(1, 10); // at least one char!!
        // randomPreference should not start with '_' (reserved for known preference types (e.g. _shards, _primary)
        while (preference.startsWith("_")) {
            preference = randomRealisticUnicodeOfLengthBetween(1, 10);
        }
        int innerIters = scaledRandomIntBetween(2, 5);
        SearchHit[] hits = null;
        for (int i = 0; i < innerIters; i++) {
            SearchResponse searchResponse = client().prepareSearch().setSize(docCount) // get all docs otherwise we are prone to tie-breaking
                    .setPreference(preference)
                    .setQuery(functionScoreQuery(matchAllQuery(), randomFunction(seed))).execute().actionGet();
            assertThat("Failures " + Arrays.toString(searchResponse.getShardFailures()),
                    searchResponse.getShardFailures().length, CoreMatchers.equalTo(0));
            final int hitCount = searchResponse.getHits().getHits().length;
            final SearchHit[] currentHits = searchResponse.getHits().getHits();
            ArrayUtil.timSort(currentHits, (o1, o2) -> {
                // for tie-breaking we have to resort here since if the score is
                // identical we rely on collection order which might change.
                int cmp = Float.compare(o1.getScore(), o2.getScore());
                return cmp == 0 ? o1.getId().compareTo(o2.getId()) : cmp;
            });
            if (i == 0) {
                assertThat(hits, nullValue());
                hits = currentHits;
            } else {
                assertThat(hits.length, equalTo(searchResponse.getHits().getHits().length));
                for (int j = 0; j < hitCount; j++) {
                    assertThat("" + j, currentHits[j].score(), equalTo(hits[j].score()));
                    assertThat("" + j, currentHits[j].id(), equalTo(hits[j].id()));
                }
            }

            // randomly change some docs to get them in different segments
            int numDocsToChange = randomIntBetween(20, 50);
            while (numDocsToChange > 0) {
                int doc = randomInt(docCount - 1);// watch out this is inclusive the max values!
                index("test", "type", "" + doc, jsonBuilder().startObject().endObject());
                --numDocsToChange;
            }
            flush();
            refresh();
        }
    }
}