org.lilyproject.repository.bulk.serial.BulkImportTool.java Source code

Java tutorial

Introduction

Here is the source code for org.lilyproject.repository.bulk.serial.BulkImportTool.java

Source

/*
 * Copyright 2012 NGDATA nv
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.lilyproject.repository.bulk.serial;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.List;

import com.google.common.base.Charsets;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.lilyproject.repository.bulk.AbstractBulkImportCliTool;
import org.lilyproject.repository.bulk.BulkIngester;
import org.lilyproject.repository.bulk.LineMapper;
import org.lilyproject.repository.bulk.LineMappingContext;
import org.lilyproject.repository.bulk.RecordWriter;
import org.lilyproject.repository.bulk.jython.JythonLineMapper;
import org.python.core.PyException;
import org.python.google.common.io.Files;

/**
 * A bulk import tool similar to {@link org.lilyproject.repository.bulk.mapreduce.BulkImportTool} that works without
 * MapReduce.
 */
public class BulkImportTool extends AbstractBulkImportCliTool {

    private final Log log = LogFactory.getLog(BulkImportTool.class);

    private Option dryRunOption;

    private boolean dryRun;

    @SuppressWarnings("static-access")
    @Override
    public List<Option> getOptions() {
        dryRunOption = OptionBuilder
                .withDescription("Only print out the created records without writing them to Lily")
                .withLongOpt("dryrun").create('d');

        List<Option> options = super.getOptions();
        options.add(dryRunOption);
        return options;
    }

    @Override
    protected int processOptions(CommandLine cmd) throws Exception {
        int status = super.processOptions(cmd);
        if (status != 0) {
            return status;
        }

        dryRun = cmd.hasOption(dryRunOption.getOpt());
        return 0;
    }

    @Override
    protected String getCmdName() {
        return "lily-bulk-import";
    }

    @Override
    public int run(CommandLine cmd) throws Exception {
        BulkIngester bulkIngester = BulkIngester.newBulkIngester(zkConnectionString, 30000, outputRepository,
                outputTable, bulkMode);

        BufferedReader bufferedReader = new BufferedReader(new FileReader(inputPath));
        RecordWriter recordWriter;
        if (dryRun) {
            recordWriter = new DebugRecordWriter(System.out);
        } else {
            recordWriter = new ThreadedRecordWriter(zkConnectionString, 10, outputRepository, outputTable,
                    bulkMode);
        }
        long start = System.currentTimeMillis();
        int numLines = 0;

        try {
            LineMapper lineMapper = new JythonLineMapper(Files.toString(new File(pythonMapperPath), Charsets.UTF_8),
                    pythonSymbol);
            LineMappingContext mappingContext = new LineMappingContext(bulkIngester, recordWriter);
            String line;
            while ((line = bufferedReader.readLine()) != null) {
                lineMapper.mapLine(line, mappingContext);
                numLines++;
            }
        } catch (PyException pe) {
            pe.printStackTrace(); // Print the Jython-native stack trace
            log.error("Exception encountered in Python code", pe);
            return -1;
        } finally {
            bufferedReader.close();
            recordWriter.close();
        }
        float duration = (System.currentTimeMillis() - start) / 1000f;
        if (!dryRun) {
            System.out.printf("Imported %d lines as %d records in %.2f seconds\n", numLines,
                    recordWriter.getNumRecords(), duration);
        }

        return 0;
    }

    public static void main(String[] args) throws IOException {
        new BulkImportTool().start(args);
    }

}