com.asakusafw.runtime.util.cache.HadoopFileCacheRepository.java Source code

Java tutorial

Introduction

Here is the source code for com.asakusafw.runtime.util.cache.HadoopFileCacheRepository.java

Source

/**
 * Copyright 2011-2016 Asakusa Framework Team.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.asakusafw.runtime.util.cache;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.text.MessageFormat;
import java.util.zip.CRC32;
import java.util.zip.Checksum;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

import com.asakusafw.runtime.util.lock.LockObject;
import com.asakusafw.runtime.util.lock.LockProvider;
import com.asakusafw.runtime.util.lock.RetryObject;
import com.asakusafw.runtime.util.lock.RetryStrategy;

/**
 * Manages cache files on Hadoop file system.
 * @since 0.7.0
 */
public class HadoopFileCacheRepository implements FileCacheRepository {

    static final Log LOG = LogFactory.getLog(HadoopFileCacheRepository.class);

    static final String KEY_CHECK_BEFORE_DELETE = "com.asakusafw.cache.hadoop.deleteOnlyIfExists"; //$NON-NLS-1$

    static final boolean DEFAULT_CHECK_BEFORE_DELETE = true;

    private final Configuration configuration;

    private final Path repository;

    private final LockProvider<? super Path> lockProvider;

    private final RetryStrategy retryStrategy;

    private final boolean checkBeforeDelete;

    private final ThreadLocal<byte[]> byteBuffers = new ThreadLocal<byte[]>() {
        @Override
        protected byte[] initialValue() {
            return new byte[1024];
        }
    };

    /**
     * Creates a new instance.
     * @param configuration the current configuration
     * @param repository the cache root path (must be absolute)
     * @param lockProvider the cache lock provider
     * @param retryStrategy the retry strategy
     */
    public HadoopFileCacheRepository(Configuration configuration, Path repository,
            LockProvider<? super Path> lockProvider, RetryStrategy retryStrategy) {
        if (repository.toUri().getScheme() == null) {
            throw new IllegalArgumentException(
                    MessageFormat.format("Cache repository location must contan the scheme: {0}", repository));
        }
        this.configuration = configuration;
        this.repository = repository;
        this.lockProvider = lockProvider;
        this.retryStrategy = retryStrategy;
        this.checkBeforeDelete = configuration.getBoolean(KEY_CHECK_BEFORE_DELETE, DEFAULT_CHECK_BEFORE_DELETE);
    }

    @Override
    public Path resolve(Path file) throws IOException, InterruptedException {
        FileSystem fs = file.getFileSystem(configuration);
        Path qualified = fs.makeQualified(file);
        return doResolve(qualified);
    }

    private Path doResolve(Path sourcePath) throws IOException, InterruptedException {
        assert sourcePath.isAbsolute();
        FileSystem fs = sourcePath.getFileSystem(configuration);
        if (fs.exists(sourcePath) == false) {
            throw new FileNotFoundException(sourcePath.toString());
        }
        long sourceChecksum = computeChecksum(fs, sourcePath);
        Path cachePath = computeCachePath(sourcePath);
        Path cacheChecksumPath = computeCacheChecksumPath(cachePath);

        IOException firstException = null;
        RetryObject retry = retryStrategy
                .newInstance(MessageFormat.format("preparing cache ({0} -> {1})", sourcePath, cachePath));
        do {
            try (LockObject<? super Path> lock = lockProvider.tryLock(cachePath)) {
                // TODO reduce lock scope?
                if (lock == null) {
                    continue;
                }
                if (isCached(cachePath, cacheChecksumPath, sourceChecksum)) {
                    if (LOG.isDebugEnabled()) {
                        LOG.debug(MessageFormat.format("cache hit: {0} -> {1}", //$NON-NLS-1$
                                sourcePath, cachePath));
                    }
                    // just returns cached file
                } else {
                    if (LOG.isDebugEnabled()) {
                        LOG.debug(MessageFormat.format("cache miss: {0} -> {1}", //$NON-NLS-1$
                                sourcePath, cachePath));
                    }
                    updateCache(sourcePath, sourceChecksum, cachePath, cacheChecksumPath);
                }
                return cachePath;
            } catch (IOException e) {
                LOG.warn(MessageFormat.format("Failed to prepare cache: {0} -> {1}", sourcePath, cachePath), e);
                if (firstException == null) {
                    firstException = e;
                }
            }
        } while (retry.waitForNextAttempt());
        if (firstException == null) {
            throw new IOException(MessageFormat.format("Failed to acquire a lock for remote cache file: {0} ({1})",
                    sourcePath, cachePath));
        }
        throw firstException;
    }

    private long computeChecksum(FileSystem fs, Path file) throws IOException {
        if (LOG.isDebugEnabled()) {
            LOG.debug(MessageFormat.format("Computing checksum: {0}", //$NON-NLS-1$
                    file));
        }
        Checksum checksum = new CRC32();
        byte[] buf = byteBuffers.get();
        try (FSDataInputStream input = fs.open(file)) {
            while (true) {
                int read = input.read(buf);
                if (read < 0) {
                    break;
                }
                checksum.update(buf, 0, read);
            }
        }
        return checksum.getValue();
    }

    private Path computeCachePath(Path file) {
        assert repository != null;
        String directoryName;
        Path parent = file.getParent();
        if (parent == null) {
            directoryName = String.format("%08x", 0); //$NON-NLS-1$
        } else {
            directoryName = String.format("%08x", parent.toString().hashCode()); //$NON-NLS-1$
        }
        Path directory = new Path(repository, directoryName);
        Path target = new Path(directory, file.getName());
        return target;
    }

    private Path computeCacheChecksumPath(Path cachePath) {
        Path parent = cachePath.getParent();
        String name = String.format("%s.acrc", cachePath.getName()); //$NON-NLS-1$
        return new Path(parent, name);
    }

    private boolean isCached(Path cacheFilePath, Path cacheChecksumPath, long checksum) throws IOException {
        if (LOG.isDebugEnabled()) {
            LOG.debug(MessageFormat.format("checking remote cache: {0}", //$NON-NLS-1$
                    cacheFilePath));
        }
        FileSystem fs = cacheChecksumPath.getFileSystem(configuration);
        if (fs.exists(cacheChecksumPath) == false || fs.exists(cacheFilePath) == false) {
            if (LOG.isDebugEnabled()) {
                LOG.debug(MessageFormat.format("remote cache is not found: {0}", //$NON-NLS-1$
                        cacheFilePath));
            }
            return false;
        } else {
            if (LOG.isDebugEnabled()) {
                LOG.debug(MessageFormat.format("reading remote cache checksum: {0}", //$NON-NLS-1$
                        cacheFilePath));
            }
            long other;
            try (FSDataInputStream input = fs.open(cacheChecksumPath)) {
                other = input.readLong();
            }
            return checksum == other;
        }
    }

    private void updateCache(Path file, long checksum, Path cachePath, Path cacheChecksumPath) throws IOException {
        if (LOG.isInfoEnabled()) {
            LOG.info(MessageFormat.format("updating library cache: {0} -> {1}", file, cachePath));
        }

        FileSystem sourceFs = file.getFileSystem(configuration);
        FileSystem cacheFs = cachePath.getFileSystem(configuration);

        // remove checksum file -> cachePath
        delete(cacheFs, cacheChecksumPath);
        delete(cacheFs, cachePath);

        // sync source file to cache file
        try (FSDataOutputStream checksumOutput = cacheFs.create(cacheChecksumPath, false)) {
            checksumOutput.writeLong(checksum);
            syncFile(sourceFs, file, cacheFs, cachePath);
        }
    }

    private void delete(FileSystem fs, Path path) throws IOException {
        if (checkBeforeDelete && fs.exists(path) == false) {
            return;
        }
        fs.delete(path, false);
    }

    private void syncFile(FileSystem sourceFs, Path sourceFile, FileSystem targetFs, Path targetFile)
            throws IOException {
        byte[] buf = byteBuffers.get();
        try (FSDataOutputStream output = targetFs.create(targetFile, false);
                FSDataInputStream input = sourceFs.open(sourceFile)) {
            while (true) {
                int read = input.read(buf);
                if (read < 0) {
                    break;
                }
                output.write(buf, 0, read);
            }
        }
    }
}