org.apache.blur.manager.writer.SharedMergeScheduler.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.blur.manager.writer.SharedMergeScheduler.java

Source

package org.apache.blur.manager.writer;

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
import static org.apache.blur.metrics.MetricsConstants.LUCENE;
import static org.apache.blur.metrics.MetricsConstants.MERGE_THROUGHPUT_BYTES;
import static org.apache.blur.metrics.MetricsConstants.ORG_APACHE_BLUR;
import static org.apache.blur.utils.BlurConstants.SHARED_MERGE_SCHEDULER_PREFIX;

import java.io.Closeable;
import java.io.IOException;
import java.lang.reflect.Field;
import java.util.Iterator;
import java.util.List;
import java.util.UUID;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.PriorityBlockingQueue;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;

import org.apache.blur.concurrent.Executors;
import org.apache.blur.log.Log;
import org.apache.blur.log.LogFactory;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.MergePolicy;
import org.apache.lucene.index.MergePolicy.OneMerge;
import org.apache.lucene.index.MergeScheduler;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.SegmentInfoPerCommit;
import org.apache.lucene.store.Directory;

import com.yammer.metrics.Metrics;
import com.yammer.metrics.core.Gauge;
import com.yammer.metrics.core.Meter;
import com.yammer.metrics.core.MetricName;

public class SharedMergeScheduler implements Closeable {

    private static final String LARGE_QUEUE_DEPTH_IN_BYTES = "Large Queue Depth In Bytes";
    private static final String LARGE_QUEUE_DEPTH = "Large Queue Depth";
    private static final String SMALL_QUEUE_DEPTH_IN_BYTES = "Small Queue Depth In Bytes";
    private static final String SMALL_QUEUE_DEPTH = "Small Queue Depth";
    private static final Log LOG = LogFactory.getLog(SharedMergeScheduler.class);
    private static final Meter _throughputBytes;

    static {
        MetricName mergeThoughputBytes = new MetricName(ORG_APACHE_BLUR, LUCENE, MERGE_THROUGHPUT_BYTES);
        _throughputBytes = Metrics.newMeter(mergeThoughputBytes, MERGE_THROUGHPUT_BYTES, TimeUnit.SECONDS);
    }

    private final AtomicBoolean _running = new AtomicBoolean(true);

    private final ExecutorService _smallMergeService;
    private final ExecutorService _largeMergeService;
    private final PriorityBlockingQueue<MergeWork> _smallMergeQueue = new PriorityBlockingQueue<MergeWork>();
    private final PriorityBlockingQueue<MergeWork> _largeMergeQueue = new PriorityBlockingQueue<MergeWork>();
    private final long _smallMergeThreshold;

    static class MergeWork implements Comparable<MergeWork> {

        private final String _id;
        private final MergePolicy.OneMerge _merge;
        private final IndexWriter _writer;
        private final long _size;

        public MergeWork(String id, OneMerge merge, IndexWriter writer) throws IOException {
            _id = id;
            _merge = merge;
            _writer = writer;
            _size = merge.totalBytesSize();
        }

        @Override
        public int compareTo(MergeWork o) {
            if (_size == o._size) {
                return 0;
            }
            return _size < o._size ? -1 : 1;
        }

        public void merge() throws IOException {
            MergeStatus mergeStatus = new MergeStatus(_merge, _writer.getDirectory(), _size, _merge.segments);
            // Trace.setupTrace(BlurConstants.SHARED_MERGE_SCHEDULER_PREFIX + "-" +
            // System.nanoTime());
            startWatching(mergeStatus);
            try {
                _writer.merge(_merge);
            } finally {
                stopWatching(mergeStatus);
                // Trace.tearDownTrace();
            }
            _throughputBytes.mark(_size);
        }

        public String getId() {
            return _id;
        }

        public long getSize() {
            return _size;
        }
    }

    static class MergeStatus {

        final String _id;
        final Directory _directory;
        final long _start;
        final long _size;
        final OneMerge _oneMerge;
        final List<SegmentInfoPerCommit> _segments;

        MergeStatus(OneMerge oneMerge, Directory directory, long size, List<SegmentInfoPerCommit> segments) {
            _id = UUID.randomUUID().toString();
            _directory = directory;
            _start = System.nanoTime();
            _size = size;
            _oneMerge = oneMerge;
            _segments = segments;
        }

        void finalReport() throws IOException {
            long e = System.nanoTime();
            double time = (e - _start) / 1000000000.0;
            double rate = (_size / 1000.0 / 1000.0) / time;
            SegmentInfo segmentInfo = getSegmentInfo(_oneMerge);
            long segmentSize = getSegmentSize(segmentInfo, _directory);
            LOG.info(
                    "Merge took [{0} s] to complete at rate of [{1} MB/s], input bytes [{2}], output bytes [{4}], segments merged {3}",
                    time, rate, _size, _segments, segmentSize);
        }

        void report() throws IOException {
            long e = System.nanoTime();
            double time = (e - _start) / 1000000000.0;
            SegmentInfo segmentInfo = getSegmentInfo(_oneMerge);
            long segmentSize = getSegmentSize(segmentInfo, _directory);
            double rate = (segmentSize / 1000.0 / 1000.0) / time;
            LOG.info(
                    "Merge running for [{0} s] at rate of [{1} MB/s], input bytes [{2}], output bytes [{4}], segments being merged {3}",
                    time, rate, _size, _segments, segmentSize);
        }

    }

    public SharedMergeScheduler(int threads) {
        this(threads, 128 * 1000 * 1000);
    }

    private static ConcurrentMap<String, MergeStatus> _mergeStatusMap = new ConcurrentHashMap<String, MergeStatus>();

    protected static void stopWatching(MergeStatus mergeStatus) throws IOException {
        MergeStatus status = _mergeStatusMap.remove(mergeStatus._id);
        status.finalReport();
    }

    protected static void startWatching(MergeStatus mergeStatus) {
        _mergeStatusMap.put(mergeStatus._id, mergeStatus);
    }

    public SharedMergeScheduler(int threads, long smallMergeThreshold) {
        MetricName mergeSmallQueueDepth = new MetricName(ORG_APACHE_BLUR, LUCENE, SMALL_QUEUE_DEPTH);
        MetricName mergeSmallQueueDepthInBytes = new MetricName(ORG_APACHE_BLUR, LUCENE,
                SMALL_QUEUE_DEPTH_IN_BYTES);
        MetricName mergeLargeQueueDepth = new MetricName(ORG_APACHE_BLUR, LUCENE, LARGE_QUEUE_DEPTH);
        MetricName mergeLargeQueueDepthInBytes = new MetricName(ORG_APACHE_BLUR, LUCENE,
                LARGE_QUEUE_DEPTH_IN_BYTES);

        _smallMergeThreshold = smallMergeThreshold;
        _smallMergeService = Executors.newThreadPool(SHARED_MERGE_SCHEDULER_PREFIX + "-small", threads, false);
        _largeMergeService = Executors.newThreadPool(SHARED_MERGE_SCHEDULER_PREFIX + "-large", threads, false);
        for (int i = 0; i < threads; i++) {
            _smallMergeService.submit(getMergerRunnable(_smallMergeQueue));
            _largeMergeService.submit(getMergerRunnable(_largeMergeQueue));
        }

        Metrics.newGauge(mergeSmallQueueDepth, new Gauge<Long>() {
            @Override
            public Long value() {
                return (long) _smallMergeQueue.size();
            }
        });
        Metrics.newGauge(mergeSmallQueueDepthInBytes, new Gauge<Long>() {
            @Override
            public Long value() {
                return getSizeInBytes(_smallMergeQueue);
            }
        });
        Metrics.newGauge(mergeLargeQueueDepth, new Gauge<Long>() {
            @Override
            public Long value() {
                return (long) _largeMergeQueue.size();
            }
        });
        Metrics.newGauge(mergeLargeQueueDepthInBytes, new Gauge<Long>() {
            @Override
            public Long value() {
                return getSizeInBytes(_largeMergeQueue);
            }
        });
    }

    protected long getSizeInBytes(PriorityBlockingQueue<MergeWork> queue) {
        long total = 0;
        for (MergeWork mergeWork : queue) {
            total += mergeWork.getSize();
        }
        return total;
    }

    private Runnable getMergerRunnable(final PriorityBlockingQueue<MergeWork> queue) {
        return new Runnable() {
            @Override
            public void run() {
                while (_running.get()) {
                    try {
                        MergeWork mergeWork = queue.take();
                        try {
                            mergeWork.merge();
                        } catch (Throwable t) {
                            LOG.error("Unknown error while trying to perform merge on [{0}]", t, mergeWork);
                        }
                    } catch (InterruptedException e) {
                        if (_running.get()) {
                            LOG.error("Unknown error", e);
                        }
                        return;
                    }
                }
            }
        };
    }

    public MergeScheduler getMergeScheduler() {
        return new MergeScheduler() {

            private final String _id = UUID.randomUUID().toString();

            @Override
            public void merge(IndexWriter writer) throws IOException {
                addMerges(_id, writer);
            }

            @Override
            public void close() throws IOException {
                remove(_id);
            }

            @Override
            public MergeScheduler clone() {
                return getMergeScheduler();
            }
        };
    }

    protected void addMerges(String id, IndexWriter writer) throws IOException {
        OneMerge merge;
        while ((merge = writer.getNextMerge()) != null) {
            addMerge(id, writer, merge);
        }
    }

    private void addMerge(String id, IndexWriter writer, OneMerge merge) throws IOException {
        MergeWork mergeWork = new MergeWork(id, merge, writer);
        if (isLargeMerge(merge)) {
            _largeMergeQueue.add(mergeWork);
        } else {
            _smallMergeQueue.add(mergeWork);
        }
    }

    private boolean isLargeMerge(OneMerge merge) throws IOException {
        long totalBytesSize = merge.totalBytesSize();
        if (totalBytesSize <= _smallMergeThreshold) {
            return false;
        }
        return true;
    }

    protected void remove(String id) {
        remove(_smallMergeQueue, id);
        remove(_largeMergeQueue, id);
    }

    private void remove(PriorityBlockingQueue<MergeWork> queue, String id) {
        Iterator<MergeWork> iterator = queue.iterator();
        while (iterator.hasNext()) {
            MergeWork mergeWork = iterator.next();
            if (id.equals(mergeWork.getId())) {
                iterator.remove();
            }
        }
    }

    @Override
    public void close() throws IOException {
        _running.set(false);
        _smallMergeService.shutdownNow();
        _largeMergeService.shutdownNow();
    }

    protected static long getSegmentSize(SegmentInfo newSegmentInfo, Directory directory) throws IOException {
        if (newSegmentInfo == null) {
            return -1L;
        }
        String prefix = newSegmentInfo.name;
        long total = 0;
        for (String name : directory.listAll()) {
            if (name.startsWith(prefix)) {
                total += directory.fileLength(name);
            }
        }
        return total;
    }

    protected static SegmentInfo getSegmentInfo(OneMerge oneMerge) {
        Object segmentInfoPerCommit = getFieldObject(oneMerge, "info");
        if (segmentInfoPerCommit == null) {
            return null;
        }
        return (SegmentInfo) getFieldObject(segmentInfoPerCommit, "info");
    }

    protected static Object getFieldObject(Object o, String fieldName) {
        try {
            Field field = o.getClass().getDeclaredField(fieldName);
            field.setAccessible(true);
            return field.get(o);
        } catch (NoSuchFieldException e) {
            return null;
        } catch (SecurityException e) {
            return null;
        } catch (IllegalArgumentException e) {
            return null;
        } catch (IllegalAccessException e) {
            return null;
        }
    }
}