gobblin.data.management.copy.ConcurrentBoundedWorkUnitList.java Source code

Java tutorial

Introduction

Here is the source code for gobblin.data.management.copy.ConcurrentBoundedWorkUnitList.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package gobblin.data.management.copy;

import java.util.Map;

import lombok.Builder;
import lombok.Getter;

import java.util.Comparator;
import java.util.List;
import java.util.Set;
import java.util.TreeMap;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.Sets;

import gobblin.data.management.partition.FileSet;
import gobblin.source.workunit.WorkUnit;
import lombok.extern.slf4j.Slf4j;

/**
 * A {@link WorkUnit} container that is bounded, supports concurrent all-or-nothing addAll, and supports priority of
 * file sets, ie. attempting to add a file set with higher priority will automatically evict
 * lower priority {@link gobblin.data.management.partition.FileSet}s if necessary.
 *
 * <p>
 *   File sets in {@link CopySource} are handled as {@link gobblin.data.management.partition.FileSet}, so this class uses a {@link gobblin.data.management.partition.FileSet} comparator
 *   for priority. If fileSetA < fileSetB, then fileSetA has higher priority than fileSetB
 *   (similar to {@link java.util.PriorityQueue}).
 * </p>
 */
@Slf4j
class ConcurrentBoundedWorkUnitList {

    private final TreeMap<FileSet<CopyEntity>, List<WorkUnit>> workUnitsMap;
    @Getter
    private final Comparator<FileSet<CopyEntity>> comparator;
    private final int maxSize;
    private final int strictMaxSize;
    private int currentSize;
    /** Set to true the first time a file set is rejected (i.e. doesn't fit in the container) */
    private boolean rejectedFileSet;

    private static class AugmentedComparator implements Comparator<FileSet<CopyEntity>> {
        private final Comparator<FileSet<CopyEntity>> userProvidedComparator;

        public AugmentedComparator(Comparator<FileSet<CopyEntity>> userProvidedComparator) {
            this.userProvidedComparator = userProvidedComparator;
        }

        @Override
        public int compare(FileSet<CopyEntity> p1, FileSet<CopyEntity> p2) {
            int userProvidedCompare = this.userProvidedComparator.compare(p1, p2);
            if (userProvidedCompare == 0) {
                int datasetCompare = p1.getDataset().datasetURN().compareTo(p2.getDataset().datasetURN());
                if (datasetCompare == 0) {
                    return p1.getName().compareTo(p2.getName());
                }
                return datasetCompare;
            }
            return userProvidedCompare;
        }
    }

    /**
     * Creates a new {@link ConcurrentBoundedWorkUnitList}.
     * @param maxSize Maximum number of {@link WorkUnit}s to contain.
     * @param comparator {@link Comparator} for {@link gobblin.data.management.partition.FileSet}s to use for {@link gobblin.data.management.partition.FileSet} priority.
     * @param strictLimitMultiplier the list will only start rejecting {@link WorkUnit}s if its capacity exceeds
     *                              maxSize * strictLimitMultiplier. If this parameter is < 1, it will be auto-set to 1.
     */
    @Builder
    public ConcurrentBoundedWorkUnitList(int maxSize, final Comparator<FileSet<CopyEntity>> comparator,
            double strictLimitMultiplier) {
        this.currentSize = 0;
        this.maxSize = maxSize;
        double actualStrictLimitMultiplier = Math.min((Integer.MAX_VALUE / (double) this.maxSize),
                Math.max(1.0, strictLimitMultiplier));
        this.strictMaxSize = (int) (this.maxSize * actualStrictLimitMultiplier);
        this.comparator = comparator == null ? new AllEqualComparator<FileSet<CopyEntity>>() : comparator;
        this.workUnitsMap = new TreeMap<>(new AugmentedComparator(this.comparator));
        this.rejectedFileSet = false;
    }

    /**
     * Add a file set to the container.
     * @param fileSet File set, expressed as a {@link gobblin.data.management.partition.FileSet} of {@link CopyEntity}s.
     * @param workUnits List of {@link WorkUnit}s corresponding to this file set.
     * @return true if the file set was added to the container, false otherwise (i.e. has reached max size).
     */
    public boolean addFileSet(FileSet<CopyEntity> fileSet, List<WorkUnit> workUnits) {
        boolean addedWorkunits = addFileSetImpl(fileSet, workUnits);
        if (!addedWorkunits) {
            this.rejectedFileSet = true;
        }
        return addedWorkunits;
    }

    private synchronized boolean addFileSetImpl(FileSet<CopyEntity> fileSet, List<WorkUnit> workUnits) {
        if (this.currentSize + workUnits.size() > this.strictMaxSize) {
            if (this.comparator.compare(this.workUnitsMap.lastKey(), fileSet) <= 0) {
                return false;
            }
            int tmpSize = this.currentSize;
            Set<FileSet<CopyEntity>> partitionsToDelete = Sets.newHashSet();

            for (FileSet<CopyEntity> existingFileSet : this.workUnitsMap.descendingKeySet()) {
                if (this.comparator.compare(existingFileSet, fileSet) <= 0) {
                    return false;
                }
                tmpSize -= this.workUnitsMap.get(existingFileSet).size();
                partitionsToDelete.add(existingFileSet);
                if (tmpSize + workUnits.size() <= this.strictMaxSize) {
                    break;
                }
            }

            for (FileSet<CopyEntity> fileSetToRemove : partitionsToDelete) {
                List<WorkUnit> workUnitsRemoved = this.workUnitsMap.remove(fileSetToRemove);
                this.currentSize -= workUnitsRemoved.size();
            }
        }

        // TreeMap determines key equality using provided comparator. If multiple fileSets have same priority, we need
        // to concat their work units, otherwise only the last one will survive. Obviously, the comparator must be
        // transitive, but it need not be consistent with equals.
        if (!this.workUnitsMap.containsKey(fileSet)) {
            this.workUnitsMap.put(fileSet, workUnits);
        } else {
            this.workUnitsMap.get(fileSet).addAll(workUnits);
        }

        this.currentSize += workUnits.size();
        log.info(String.format(
                "Added %d work units to bounded list. Total size: %d, soft limit: %d, hard limit: %d.",
                workUnits.size(), this.currentSize, this.maxSize, this.strictMaxSize));
        return true;
    }

    /**
     * @return Whether any calls to {@link #addFileSet} have returned false, i.e. some file set has been rejected due
     * to strict capacity issues.
     */
    public boolean hasRejectedFileSet() {
        return this.rejectedFileSet;
    }

    /**
     * @return Whether the list has reached its max size.
     */
    public synchronized boolean isFull() {
        return this.currentSize >= this.maxSize;
    }

    /**
     * Get the {@link List} of {@link WorkUnit}s in this container.
     */
    public List<WorkUnit> getWorkUnits() {
        ImmutableList.Builder<WorkUnit> allWorkUnits = ImmutableList.builder();
        for (List<WorkUnit> workUnits : this.workUnitsMap.values()) {
            allWorkUnits.addAll(workUnits);
        }
        return allWorkUnits.build();
    }

    /**
     * Get the raw map backing this object.
     */
    public Map<FileSet<CopyEntity>, List<WorkUnit>> getRawWorkUnitMap() {
        return this.workUnitsMap;
    }
}