org.n52.youngs.control.impl.SingleThreadBulkRunner.java Source code

Java tutorial

Introduction

Here is the source code for org.n52.youngs.control.impl.SingleThreadBulkRunner.java

Source

/*
 * Copyright 2015-2016 52North Initiative for Geospatial Open Source
 * Software GmbH
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.n52.youngs.control.impl;

import com.google.common.base.MoreObjects;
import com.google.common.base.Stopwatch;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
import org.n52.youngs.api.Report;
import org.n52.youngs.control.Runner;
import org.n52.youngs.exception.MappingError;
import org.n52.youngs.exception.SinkError;
import org.n52.youngs.harvest.Source;
import org.n52.youngs.harvest.SourceRecord;
import org.n52.youngs.impl.ReportImpl;
import org.n52.youngs.load.Sink;
import org.n52.youngs.load.SinkRecord;
import org.n52.youngs.transform.Mapper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * A harvesting runner that downloads a fixed number of records at a time from
 * the source, transforms these, and loads them into the sink.
 *
 * Everything happens within one thread and consequtively when the method "load"
 * is called.
 *
 * @author <a href="mailto:d.nuest@52north.org">Daniel Nst</a>
 */
public class SingleThreadBulkRunner implements Runner {

    private static final Logger log = LoggerFactory.getLogger(SingleThreadBulkRunner.class);

    private static final long DEFAULT_BULK_SIZE = 10;

    private long bulkSize = DEFAULT_BULK_SIZE;

    private Source source;

    private Mapper mapper;

    private long recordsLimit = Long.MAX_VALUE;

    private Optional<Double> completedPercentage = Optional.empty();

    private Sink sink;

    private final boolean testRun = false;

    private long startPosition = 1;

    public SingleThreadBulkRunner() {
        //
    }

    public SingleThreadBulkRunner setBulkSize(long bulkSize) {
        this.bulkSize = bulkSize;
        return this;
    }

    public SingleThreadBulkRunner setStartPosition(long startPosition) {
        this.startPosition = startPosition;
        return this;
    }

    public SingleThreadBulkRunner setRecordsLimit(long recordsLimit) {
        this.recordsLimit = recordsLimit;
        return this;
    }

    @Override
    public SingleThreadBulkRunner harvest(final Source source) {
        this.source = source;
        log.debug("Saved source, waiting for load() to be called...", source);
        return this;
    }

    @Override
    public SingleThreadBulkRunner transform(final Mapper mapper) {
        this.mapper = mapper;
        log.debug("Saved mapper, waiting for load() to be called...", source);
        return this;
    }

    @Override
    public Report load(final Sink sink) {
        this.sink = sink;
        Objects.nonNull(source);
        Objects.nonNull(mapper);
        Objects.nonNull(this.sink);

        log.info("Starting harvest from {} to {} with {}", source, this.sink, mapper);
        Report report = new ReportImpl();

        try {
            boolean prepareSink = sink.prepare(mapper.getMapper());
            if (!prepareSink) {
                String msg = "The sink could not be prepared. Stopping load, please check the logs.";
                log.error(msg);
                report.addMessage(msg);
                return report;
            }
        } catch (SinkError e) {
            log.error("Problem preparing sink", e);
            report.addMessage(String.format("Problem preparing sink: %s", e.getMessage()));
            return report;
        }

        final Stopwatch timer = Stopwatch.createStarted();
        long pageStart = startPosition;
        long count = source.getRecordCount();
        final long limit = Math.min(recordsLimit + startPosition, count);

        final Stopwatch sourceTimer = Stopwatch.createUnstarted();
        final Stopwatch mappingTimer = Stopwatch.createUnstarted();
        final Stopwatch sinkTimer = Stopwatch.createUnstarted();
        final Stopwatch currentBulkTimer = Stopwatch.createUnstarted();
        double bulkTimeAvg = 0d;
        long runNumber = 0;

        while (pageStart <= limit) {
            currentBulkTimer.start();

            long recordsLeft = limit - pageStart + 1;
            long size = Math.min(recordsLeft, bulkSize);
            if (size <= 0) {
                break;
            }
            log.info("### [{}] Requesting {} records from {} starting at {}, last requested record will be {} ###",
                    runNumber, size, source.getEndpoint(), pageStart, limit);

            try {
                sourceTimer.start();
                Collection<SourceRecord> records = source.getRecords(pageStart, size, report);
                sourceTimer.stop();

                log.debug("Mapping {} retrieved records.", records.size());
                mappingTimer.start();
                List<SinkRecord> mappedRecords = records.stream().map(record -> {
                    try {
                        return mapper.map(record);
                    } catch (MappingError e) {
                        report.addFailedRecord(record.toString(), "Problem during mapping: " + e.getMessage());
                        return null;
                    }
                }).filter(Objects::nonNull).collect(Collectors.toList());
                mappingTimer.stop();

                log.debug("Storing {} mapped records.", mappedRecords.size());
                if (!testRun) {
                    sinkTimer.start();
                    mappedRecords.forEach(record -> {
                        try {
                            boolean result = sink.store(record);
                            if (result) {
                                report.addSuccessfulRecord(record.getId());
                            } else {
                                report.addFailedRecord(record.getId(), "see sink log");
                            }
                        } catch (SinkError e) {
                            report.addFailedRecord(record.toString(), "Problem during mapping: " + e.getMessage());
                        }
                    });
                    sinkTimer.stop();
                } else {
                    log.info("TESTRUN, created documents are:\n{}", Arrays.toString(mappedRecords.toArray()));
                }

            } catch (RuntimeException e) {
                if (sourceTimer.isRunning()) {
                    sourceTimer.stop();
                }
                if (mappingTimer.isRunning()) {
                    mappingTimer.stop();
                }
                if (sinkTimer.isRunning()) {
                    sinkTimer.stop();
                }

                String msg = String.format("Problem processing records %s to %s: %s", pageStart, pageStart + size,
                        e.getMessage());
                log.error(msg, e);
                report.addMessage(msg);
            }

            pageStart += bulkSize;

            currentBulkTimer.stop();
            bulkTimeAvg = ((bulkTimeAvg * runNumber) + currentBulkTimer.elapsed(TimeUnit.SECONDS))
                    / (runNumber + 1);
            updateAndLog(runNumber, (runNumber + 1) * bulkSize, currentBulkTimer.elapsed(TimeUnit.SECONDS),
                    bulkTimeAvg);
            currentBulkTimer.reset();

            runNumber++;
        }

        timer.stop();
        log.info("Completed harvesting for {} ({} failed) of {} records in {} minutes",
                report.getNumberOfRecordsAdded(), report.getNumberOfRecordsFailed(), source.getRecordCount(),
                timer.elapsed(TimeUnit.MINUTES));
        log.info("Time spent (minutes): source={}, mapping={}, sink={}", sourceTimer.elapsed(TimeUnit.MINUTES),
                mappingTimer.elapsed(TimeUnit.MINUTES), sinkTimer.elapsed(TimeUnit.MINUTES));

        return report;
    }

    @Override
    public double getCompletedPercentage() {
        return this.completedPercentage.orElse(Double.NEGATIVE_INFINITY);
    }

    @Override
    public String toString() {
        return MoreObjects.toStringHelper(this).add("source", source).add("mapper", mapper).add("sink", sink)
                .toString();
    }

    private void updateAndLog(long run, long pageStart, long bulkSeconds, double bulkAverageSeconds) {
        double percentageTask = (double) pageStart / this.recordsLimit * 100;
        this.completedPercentage = Optional.of(percentageTask);
        log.info("### [{}] Completed {}% of task in {} seconds (avg: {} seconds) ###", run,
                String.format("%1$,.2f", getCompletedPercentage()), bulkSeconds,
                String.format("%1$,.2f", bulkAverageSeconds));
    }

}