Example usage for org.apache.hadoop.conf Configuration getLong

List of usage examples for org.apache.hadoop.conf Configuration getLong

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration getLong.

Prototype

public long getLong(String name, long defaultValue) 

Source Link

Document

Get the value of the name property as a long.

Usage

From source file:org.apache.tez.runtime.library.common.shuffle.orderedgrouped.MergeManager.java

License:Apache License

/**
 * Construct the MergeManager. Must call start before it becomes usable.
 *//* w w  w  .  j  av  a  2s .co m*/
public MergeManager(Configuration conf, FileSystem localFS, LocalDirAllocator localDirAllocator,
        InputContext inputContext, Combiner combiner, TezCounter spilledRecordsCounter,
        TezCounter reduceCombineInputCounter, TezCounter mergedMapOutputsCounter,
        ExceptionReporter exceptionReporter, long initialMemoryAvailable, CompressionCodec codec,
        boolean ifileReadAheadEnabled, int ifileReadAheadLength) {
    this.inputContext = inputContext;
    this.conf = conf;
    this.localDirAllocator = localDirAllocator;
    this.exceptionReporter = exceptionReporter;
    this.initialMemoryAvailable = initialMemoryAvailable;

    this.combiner = combiner;

    this.reduceCombineInputCounter = reduceCombineInputCounter;
    this.spilledRecordsCounter = spilledRecordsCounter;
    this.mergedMapOutputsCounter = mergedMapOutputsCounter;
    this.mapOutputFile = new TezTaskOutputFiles(conf, inputContext.getUniqueIdentifier());

    this.localFS = localFS;
    this.rfs = ((LocalFileSystem) localFS).getRaw();

    this.numDiskToDiskMerges = inputContext.getCounters().findCounter(TaskCounter.NUM_DISK_TO_DISK_MERGES);
    this.numMemToDiskMerges = inputContext.getCounters().findCounter(TaskCounter.NUM_MEM_TO_DISK_MERGES);
    this.additionalBytesWritten = inputContext.getCounters()
            .findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_WRITTEN);
    this.additionalBytesRead = inputContext.getCounters().findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_READ);

    this.codec = codec;
    this.ifileReadAhead = ifileReadAheadEnabled;
    if (this.ifileReadAhead) {
        this.ifileReadAheadLength = ifileReadAheadLength;
    } else {
        this.ifileReadAheadLength = 0;
    }
    this.ifileBufferSize = conf.getInt("io.file.buffer.size",
            TezRuntimeConfiguration.TEZ_RUNTIME_IFILE_BUFFER_SIZE_DEFAULT);

    // Figure out initial memory req start
    final float maxInMemCopyUse = conf.getFloat(
            TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT,
            TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT_DEFAULT);
    if (maxInMemCopyUse > 1.0 || maxInMemCopyUse < 0.0) {
        throw new IllegalArgumentException("Invalid value for "
                + TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT + ": " + maxInMemCopyUse);
    }

    // Allow unit tests to fix Runtime memory
    long memLimit = conf.getLong(Constants.TEZ_RUNTIME_TASK_MEMORY,
            (long) (inputContext.getTotalMemoryAvailableToTask() * maxInMemCopyUse));

    float maxRedPer = conf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT,
            TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_BUFFER_PERCENT_DEFAULT);
    if (maxRedPer > 1.0 || maxRedPer < 0.0) {
        throw new TezUncheckedException(
                TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT + maxRedPer);
    }

    long maxRedBuffer = (long) (inputContext.getTotalMemoryAvailableToTask() * maxRedPer);
    // Figure out initial memory req end

    if (this.initialMemoryAvailable < memLimit) {
        this.memoryLimit = this.initialMemoryAvailable;
    } else {
        this.memoryLimit = memLimit;
    }

    if (this.initialMemoryAvailable < maxRedBuffer) {
        this.postMergeMemLimit = this.initialMemoryAvailable;
    } else {
        this.postMergeMemLimit = maxRedBuffer;
    }

    LOG.info("InitialRequest: ShuffleMem=" + memLimit + ", postMergeMem=" + maxRedBuffer
            + ", RuntimeTotalAvailable=" + this.initialMemoryAvailable + ". Updated to: ShuffleMem="
            + this.memoryLimit + ", postMergeMem: " + this.postMergeMemLimit);

    this.ioSortFactor = conf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_FACTOR,
            TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_FACTOR_DEFAULT);

    final float singleShuffleMemoryLimitPercent = conf.getFloat(
            TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT,
            TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT_DEFAULT);
    if (singleShuffleMemoryLimitPercent <= 0.0f || singleShuffleMemoryLimitPercent > 1.0f) {
        throw new IllegalArgumentException(
                "Invalid value for " + TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT + ": "
                        + singleShuffleMemoryLimitPercent);
    }

    //TODO: Cap it to MAX_VALUE until MapOutput starts supporting > 2 GB
    this.maxSingleShuffleLimit = (long) Math.min((memoryLimit * singleShuffleMemoryLimitPercent),
            Integer.MAX_VALUE);
    this.memToMemMergeOutputsThreshold = conf
            .getInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMTOMEM_SEGMENTS, ioSortFactor);
    this.mergeThreshold = (long) (this.memoryLimit
            * conf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MERGE_PERCENT,
                    TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MERGE_PERCENT_DEFAULT));
    LOG.info("MergerManager: memoryLimit=" + memoryLimit + ", " + "maxSingleShuffleLimit="
            + maxSingleShuffleLimit + ", " + "mergeThreshold=" + mergeThreshold + ", " + "ioSortFactor="
            + ioSortFactor + ", " + "memToMemMergeOutputsThreshold=" + memToMemMergeOutputsThreshold);

    if (this.maxSingleShuffleLimit >= this.mergeThreshold) {
        throw new RuntimeException("Invlaid configuration: "
                + "maxSingleShuffleLimit should be less than mergeThreshold" + "maxSingleShuffleLimit: "
                + this.maxSingleShuffleLimit + ", mergeThreshold: " + this.mergeThreshold);
    }

    boolean allowMemToMemMerge = conf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_ENABLE_MEMTOMEM,
            TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_ENABLE_MEMTOMEM_DEFAULT);
    if (allowMemToMemMerge) {
        this.memToMemMerger = new IntermediateMemoryToMemoryMerger(this, memToMemMergeOutputsThreshold);
    } else {
        this.memToMemMerger = null;
    }

    this.inMemoryMerger = new InMemoryMerger(this);

    this.onDiskMerger = new OnDiskMerger(this);
}

From source file:org.apache.tez.runtime.library.common.shuffle.orderedgrouped.MergeManager.java

License:Apache License

/**
 * Exposing this to get an initial memory ask without instantiating the object.
 *///ww  w  .jav a 2s.c o m
@Private
static long getInitialMemoryRequirement(Configuration conf, long maxAvailableTaskMemory) {
    final float maxInMemCopyUse = conf.getFloat(
            TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT,
            TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT_DEFAULT);
    if (maxInMemCopyUse > 1.0 || maxInMemCopyUse < 0.0) {
        throw new IllegalArgumentException("Invalid value for "
                + TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT + ": " + maxInMemCopyUse);
    }

    // Allow unit tests to fix Runtime memory
    long memLimit = conf.getLong(Constants.TEZ_RUNTIME_TASK_MEMORY,
            (long) (maxAvailableTaskMemory * maxInMemCopyUse));

    LOG.info("Initial Shuffle Memory Required: " + memLimit + ", based on INPUT_BUFFER_factor: "
            + maxInMemCopyUse);

    float maxRedPer = conf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT,
            TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_BUFFER_PERCENT_DEFAULT);
    if (maxRedPer > 1.0 || maxRedPer < 0.0) {
        throw new TezUncheckedException(
                TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT + maxRedPer);
    }
    long maxRedBuffer = (long) (maxAvailableTaskMemory * maxRedPer);

    LOG.info("Initial Memory required for final merged output: " + maxRedBuffer + ", using factor: "
            + maxRedPer);

    long reqMem = Math.max(maxRedBuffer, memLimit);
    return reqMem;
}

From source file:org.apache.tez.runtime.library.shuffle.common.impl.SimpleFetchedInputAllocator.java

License:Apache License

public SimpleFetchedInputAllocator(String uniqueIdentifier, Configuration conf, long maxTaskAvailableMemory,
        long memoryAvailable) {
    this.conf = conf;
    this.maxAvailableTaskMemory = maxTaskAvailableMemory;
    this.initialMemoryAvailable = memoryAvailable;

    this.fileNameAllocator = new TezTaskOutputFiles(conf, uniqueIdentifier);
    this.localDirAllocator = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS);

    // Setup configuration
    final float maxInMemCopyUse = conf.getFloat(
            TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT,
            TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT_DEFAULT);
    if (maxInMemCopyUse > 1.0 || maxInMemCopyUse < 0.0) {
        throw new IllegalArgumentException("Invalid value for "
                + TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT + ": " + maxInMemCopyUse);
    }//w  w  w.  j a va  2s  . c om

    long memReq = (long) (conf.getLong(Constants.TEZ_RUNTIME_TASK_MEMORY,
            Math.min(maxAvailableTaskMemory, Integer.MAX_VALUE)) * maxInMemCopyUse);

    if (memReq <= this.initialMemoryAvailable) {
        this.memoryLimit = memReq;
    } else {
        this.memoryLimit = initialMemoryAvailable;
    }

    LOG.info("RequestedMem=" + memReq + ", Allocated: " + this.memoryLimit);

    final float singleShuffleMemoryLimitPercent = conf.getFloat(
            TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT,
            TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT_DEFAULT);
    if (singleShuffleMemoryLimitPercent <= 0.0f || singleShuffleMemoryLimitPercent > 1.0f) {
        throw new IllegalArgumentException(
                "Invalid value for " + TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT + ": "
                        + singleShuffleMemoryLimitPercent);
    }

    this.maxSingleShuffleLimit = (long) (memoryLimit * singleShuffleMemoryLimitPercent);

    LOG.info("SimpleInputManager -> " + "MemoryLimit: " + this.memoryLimit + ", maxSingleMemLimit: "
            + this.maxSingleShuffleLimit);
}

From source file:org.apache.twill.yarn.LocationCacheCleaner.java

License:Apache License

LocationCacheCleaner(Configuration config, Location cacheBaseLocation, String sessionId,
        Predicate<Location> cleanupPredicate) {
    this.cacheBaseLocation = cacheBaseLocation;
    this.sessionId = sessionId;
    this.expiry = config.getLong(Configs.Keys.LOCATION_CACHE_EXPIRY_MS,
            Configs.Defaults.LOCATION_CACHE_EXPIRY_MS);
    this.antiqueExpiry = config.getLong(Configs.Keys.LOCATION_CACHE_ANTIQUE_EXPIRY_MS,
            Configs.Defaults.LOCATION_CACHE_ANTIQUE_EXPIRY_MS);
    this.cleanupPredicate = cleanupPredicate;
    this.pendingCleanups = new HashSet<>();
}

From source file:org.dennisit.graph.RandomWalkWorkerContext.java

License:Apache License

/**
 * Initialize sources for Random Walk with Restart. First option
 * (preferential) is single source given from the command line as a parameter.
 * Second option is a file with a list of vertex IDs, one per line. In this
 * second case the preference vector is a uniform distribution over these
 * vertexes./*from w  ww .jav  a 2s .com*/
 * @param configuration The configuration.
 * @return a (possibly empty) set of source vertices
 */
private ImmutableSet<Long> initializeSources(Configuration configuration) {
    ImmutableSet.Builder<Long> builder = ImmutableSet.builder();
    long sourceVertex = configuration.getLong(SOURCE_VERTEX, Long.MIN_VALUE);
    if (sourceVertex != Long.MIN_VALUE) {
        return ImmutableSet.of(sourceVertex);
    } else {
        Path sourceFile = null;
        try {

            Path[] cacheFiles = DistributedCache.getLocalCacheFiles(configuration);
            if (cacheFiles == null || cacheFiles.length == 0) {
                // empty set if no source vertices configured
                return ImmutableSet.of();
            }

            sourceFile = cacheFiles[0];
            FileSystem fs = FileSystem.getLocal(configuration);
            BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(sourceFile)));
            String line;
            while ((line = in.readLine()) != null) {
                builder.add(Long.parseLong(line));
            }
            in.close();
        } catch (IOException e) {
            getContext().setStatus("Could not load local cache files: " + sourceFile);
            LOG.error("Could not load local cache files: " + sourceFile, e);
        }
    }
    return builder.build();
}

From source file:org.elasticsearch.hadoop.yarn.util.YarnUtils.java

License:Apache License

public static long getAmHeartBeatRate(Configuration cfg) {
    return cfg.getLong(RM_AM_EXPIRY_INTERVAL_MS, DEFAULT_RM_AM_EXPIRY_INTERVAL_MS);
}

From source file:org.godhuli.rhipe.RHMRHelper.java

License:Apache License

void setup(Configuration cfg, String argv, boolean doPipe) {
    try {//from   ww w  . jav a 2s . c  om
        //        InetAddress addr = InetAddress.getLocalHost();
        //        hostname = addr.getHostName();
        doPartitionRelatedSetup(cfg);
        String squote = cfg.get("rhipe_string_quote");
        if (squote == null)
            squote = "";

        REXPHelper.setFieldSep(cfg.get("mapred.field.separator", " "));
        REXPHelper.setStringQuote(squote);

        if (cfg.get("rhipe_test_output") != null && cfg.get("rhipe_test_output").equals("TRUE"))
            writeErr = true;
        else
            writeErr = false;

        BUFFER_SIZE = cfg.getInt("rhipe_stream_buffer", 10 * 1024);
        joinDelay_ = cfg.getLong("rhipe_joindelay_milli", 0);
        nonZeroExitIsFailure_ = cfg.getBoolean("rhipe_non_zero_exit_is_failure", true);
        doPipe_ = doPipe;
        thisfs = FileSystem.get(cfg);

        Class<?> _kc = null;

        if (callID.equals("Mapper")) {
            if (cfg.getInt("mapred.reduce.tasks", 0) == 0)
                _kc = Class.forName(cfg.get("rhipe_outputformat_keyclass"));
            else
                _kc = Class.forName(cfg.get("rhipe_map_output_keyclass"));
        } else {
            _kc = Class.forName(cfg.get("rhipe_outputformat_keyclass"));
        }
        keyclass = _kc.asSubclass(RHBytesWritable.class);

        if (cfg.get("rhipe_output_folder") != null)
            outputFolder = new Path(cfg.get("rhipe_output_folder"));
        if (!doPipe_)
            return;
        copyFile = cfg.get("rhipe_copy_file").equals("TRUE") ? true : false;
        String[] argvSplit = argv.split(" ");
        String prog = argvSplit[0];
        Environment childEnv = (Environment) env().clone();
        cfg.set("io_sort_mb", cfg.get("io.sort.mb"));
        addJobConfToEnvironment(cfg, childEnv);
        childEnv.put("TMPDIR", System.getProperty("java.io.tmpdir"));
        // Start the process
        ProcessBuilder builder = new ProcessBuilder(argvSplit);
        builder.environment().putAll(childEnv.toMap());
        sim = builder.start();
        clientOut_ = new DataOutputStream(new BufferedOutputStream(sim.getOutputStream(), BUFFER_SIZE));
        clientIn_ = new DataInputStream(new BufferedInputStream(sim.getInputStream(), BUFFER_SIZE));
        clientErr_ = new DataInputStream(new BufferedInputStream(sim.getErrorStream()));
        startTime_ = System.currentTimeMillis();
        LOG.info(callID + ":" + "Started external program:" + argv);
        errThread_ = new MRErrorThread();
        LOG.info(callID + ":" + "Started Error Thread");
        errThread_.start();
    } catch (Exception e) {
        e.printStackTrace();
        throw new RuntimeException("configuration exception", e);
    }
}

From source file:org.jd.copier.mapred.DistCp.java

License:Apache License

/**
 * Initialize DFSCopyFileMapper specific job-configuration.
 * @param conf : The dfs/mapred configuration.
 * @param jobConf : The handle to the jobConf object to be initialized.
 * @param args Arguments//from  ww w. j av  a2s.c o  m
 * @return true if it is necessary to launch a job.
 */
private static boolean setup(Configuration conf, JobConf jobConf, final Arguments args) throws IOException {
    jobConf.set(DST_DIR_LABEL, args.dst.toUri().toString());

    //set boolean values
    final boolean update = args.flags.contains(Options.UPDATE);
    final boolean skipCRCCheck = args.flags.contains(Options.SKIPCRC);
    final boolean overwrite = !update && args.flags.contains(Options.OVERWRITE);
    jobConf.setBoolean(Options.UPDATE.propertyname, update);
    jobConf.setBoolean(Options.SKIPCRC.propertyname, skipCRCCheck);
    jobConf.setBoolean(Options.OVERWRITE.propertyname, overwrite);
    jobConf.setBoolean(Options.IGNORE_READ_FAILURES.propertyname,
            args.flags.contains(Options.IGNORE_READ_FAILURES));
    jobConf.setBoolean(Options.PRESERVE_STATUS.propertyname, args.flags.contains(Options.PRESERVE_STATUS));

    final String randomId = getRandomId();
    JobClient jClient = new JobClient(jobConf);
    Path stagingArea;
    try {
        stagingArea = JobSubmissionFiles.getStagingDir(jClient, conf);
    } catch (InterruptedException e) {
        throw new IOException(e);
    }

    Path jobDirectory = new Path(stagingArea + NAME + "_" + randomId);
    FsPermission mapredSysPerms = new FsPermission(JobSubmissionFiles.JOB_DIR_PERMISSION);
    FileSystem.mkdirs(jClient.getFs(), jobDirectory, mapredSysPerms);
    jobConf.set(JOB_DIR_LABEL, jobDirectory.toString());

    long maxBytesPerMap = conf.getLong(BYTES_PER_MAP_LABEL, BYTES_PER_MAP);

    FileSystem dstfs = args.dst.getFileSystem(conf);

    // get tokens for all the required FileSystems..
    TokenCache.obtainTokensForNamenodes(jobConf.getCredentials(), new Path[] { args.dst }, conf);

    boolean dstExists = dstfs.exists(args.dst);
    boolean dstIsDir = false;
    if (dstExists) {
        dstIsDir = dstfs.getFileStatus(args.dst).isDir();
    }

    // default logPath
    Path logPath = args.log;
    if (logPath == null) {
        String filename = "_distcp_logs_" + randomId;
        if (!dstExists || !dstIsDir) {
            Path parent = args.dst.getParent();
            if (null == parent) {
                // If dst is '/' on S3, it might not exist yet, but dst.getParent()
                // will return null. In this case, use '/' as its own parent to prevent
                // NPE errors below.
                parent = args.dst;
            }
            if (!dstfs.exists(parent)) {
                dstfs.mkdirs(parent);
            }
            logPath = new Path(parent, filename);
        } else {
            logPath = new Path(args.dst, filename);
        }
    }
    FileOutputFormat.setOutputPath(jobConf, logPath);

    // create src list, dst list
    FileSystem jobfs = jobDirectory.getFileSystem(jobConf);

    Path srcfilelist = new Path(jobDirectory, "_distcp_src_files");
    jobConf.set(SRC_LIST_LABEL, srcfilelist.toString());
    SequenceFile.Writer src_writer = SequenceFile.createWriter(jobfs, jobConf, srcfilelist, LongWritable.class,
            FilePair.class, SequenceFile.CompressionType.NONE);

    Path dstfilelist = new Path(jobDirectory, "_distcp_dst_files");
    SequenceFile.Writer dst_writer = SequenceFile.createWriter(jobfs, jobConf, dstfilelist, Text.class,
            Text.class, SequenceFile.CompressionType.NONE);

    Path dstdirlist = new Path(jobDirectory, "_distcp_dst_dirs");
    jobConf.set(DST_DIR_LIST_LABEL, dstdirlist.toString());
    SequenceFile.Writer dir_writer = SequenceFile.createWriter(jobfs, jobConf, dstdirlist, Text.class,
            FilePair.class, SequenceFile.CompressionType.NONE);

    // handle the case where the destination directory doesn't exist
    // and we've only a single src directory OR we're updating/overwriting
    // the contents of the destination directory.
    final boolean special = (args.srcs.size() == 1 && !dstExists) || update || overwrite;
    int srcCount = 0, cnsyncf = 0, dirsyn = 0;
    long fileCount = 0L, byteCount = 0L, cbsyncs = 0L;
    try {
        for (Iterator<Path> srcItr = args.srcs.iterator(); srcItr.hasNext();) {
            final Path src = srcItr.next();
            FileSystem srcfs = src.getFileSystem(conf);
            FileStatus srcfilestat = srcfs.getFileStatus(src);
            Path root = special && srcfilestat.isDir() ? src : src.getParent();
            if (srcfilestat.isDir()) {
                ++srcCount;
            }

            Stack<FileStatus> pathstack = new Stack<FileStatus>();
            for (pathstack.push(srcfilestat); !pathstack.empty();) {
                FileStatus cur = pathstack.pop();
                FileStatus[] children = srcfs.listStatus(cur.getPath());
                for (int i = 0; i < children.length; i++) {
                    boolean skipfile = false;
                    final FileStatus child = children[i];
                    final String dst = makeRelative(root, child.getPath());
                    ++srcCount;

                    if (child.isDir()) {
                        pathstack.push(child);
                    } else {
                        //skip file if the src and the dst files are the same.
                        skipfile = update
                                && sameFile(srcfs, child, dstfs, new Path(args.dst, dst), skipCRCCheck);
                        //skip file if it exceed file limit or size limit
                        skipfile |= fileCount == args.filelimit || byteCount + child.getLen() > args.sizelimit;

                        if (!skipfile) {
                            ++fileCount;
                            byteCount += child.getLen();

                            if (LOG.isTraceEnabled()) {
                                LOG.trace("adding file " + child.getPath());
                            }

                            ++cnsyncf;
                            cbsyncs += child.getLen();
                            if (cnsyncf > SYNC_FILE_MAX || cbsyncs > maxBytesPerMap) {
                                src_writer.sync();
                                dst_writer.sync();
                                cnsyncf = 0;
                                cbsyncs = 0L;
                            }
                        }
                    }

                    if (!skipfile) {
                        src_writer.append(new LongWritable(child.isDir() ? 0 : child.getLen()),
                                new FilePair(child, dst));
                    }

                    dst_writer.append(new Text(dst), new Text(child.getPath().toString()));
                }

                if (cur.isDir()) {
                    String dst = makeRelative(root, cur.getPath());
                    dir_writer.append(new Text(dst), new FilePair(cur, dst));
                    if (++dirsyn > SYNC_FILE_MAX) {
                        dirsyn = 0;
                        dir_writer.sync();
                    }
                }
            }
        }
    } finally {
        checkAndClose(src_writer);
        checkAndClose(dst_writer);
        checkAndClose(dir_writer);
    }

    FileStatus dststatus = null;
    try {
        dststatus = dstfs.getFileStatus(args.dst);
    } catch (FileNotFoundException fnfe) {
        LOG.info(args.dst + " does not exist.");
    }

    // create dest path dir if copying > 1 file
    if (dststatus == null) {
        if (srcCount > 1 && !dstfs.mkdirs(args.dst)) {
            throw new IOException("Failed to create" + args.dst);
        }
    }

    final Path sorted = new Path(jobDirectory, "_distcp_sorted");
    checkDuplication(jobfs, dstfilelist, sorted, conf);

    if (dststatus != null && args.flags.contains(Options.DELETE)) {
        deleteNonexisting(dstfs, dststatus, sorted, jobfs, jobDirectory, jobConf, conf);
    }

    Path tmpDir = new Path(
            (dstExists && !dstIsDir) || (!dstExists && srcCount == 1) ? args.dst.getParent() : args.dst,
            "_distcp_tmp_" + randomId);
    jobConf.set(TMP_DIR_LABEL, tmpDir.toUri().toString());

    // Explicitly create the tmpDir to ensure that it can be cleaned
    // up by fullyDelete() later.
    tmpDir.getFileSystem(conf).mkdirs(tmpDir);

    LOG.info("sourcePathsCount=" + srcCount);
    LOG.info("filesToCopyCount=" + fileCount);
    LOG.info("bytesToCopyCount=" + StringUtils.humanReadableInt(byteCount));
    jobConf.setInt(SRC_COUNT_LABEL, srcCount);
    jobConf.setLong(TOTAL_SIZE_LABEL, byteCount);
    setMapCount(byteCount, jobConf);
    return fileCount > 0;
}

From source file:org.lab41.hbase.TitanHbaseIdSplitter.java

License:Apache License

public HTableDescriptor createAndSplitTable(String tableName, HBaseAdmin hbaseAdmin,
        Configuration configuration) throws IOException {
    Long maxId = configuration.getLong(MAXID_KEY, MAXID_DEFAULT);
    Long regionSize = configuration.getLong(REGION_SIZE_KEY, REGION_SIZE_DEFAULT);
    ArrayList<byte[]> arrayList = new ArrayList<byte[]>();
    HTableDescriptor hTableDescriptor = new HTableDescriptor(tableName);

    int numSplits = configuration.getInt(NUM_SPLITS_KEY, NUM_SPLITS_DEFAULT);

    for (long i = 1; i < maxId; i += regionSize) {
        byte[] splitPoint = longToBytes(TitanId.toVertexId(i));
        arrayList.add(splitPoint);//from  ww  w .  j a  v a2  s.  c  o  m
    }

    byte[] midStart = new byte[] { 0x01, (byte) 0x00, (int) 0x00, (byte) 0x00, 0x00, (byte) 0x00, (byte) 0x00,
            (byte) 0x00 };
    byte[] midEnd = new byte[] { (byte) 0x01, (byte) 0x01, (byte) 0x00, (byte) 0x00, (byte) 0x00, (byte) 0x00,
            (byte) 0x00, (byte) 0x00 };
    byte[][] midsplits = Bytes.split(midStart, midEnd, (int) Math.ceil(numSplits * 0.75));
    ;
    midsplits = Arrays.copyOfRange(midsplits, 0, midsplits.length - 1);

    for (int i = 0; i < midsplits.length; i++) {
        arrayList.add(midsplits[i]);
    }

    byte[] highStart = new byte[] { 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 };
    byte[] highEnd = new byte[] { (byte) 0xff, (byte) 0xff, (byte) 0xff, (byte) 0xff, (byte) 0xff, (byte) 0xff,
            (byte) 0xff, (byte) 0xff };
    byte[][] highSplits = Bytes.split(highStart, highEnd, (int) Math.ceil(numSplits * 0.25));
    highSplits = Arrays.copyOfRange(highSplits, 0, highSplits.length - 1);

    for (int i = 0; i < highSplits.length; i++) {
        arrayList.add(highSplits[i]);
    }

    byte[][] splits = new byte[arrayList.size()][8];
    arrayList.toArray(splits);
    //debug loop
    logger.info("Splits : " + splits.length);
    for (int j = 0; j < splits.length; j++) {
        logger.info("createAndSplitTable" + Hex.encodeHexString(splits[j]) + " Bytes.toBytesString : "
                + Bytes.toStringBinary(splits[j]));
    }

    hbaseAdmin.createTable(hTableDescriptor, splits);

    return hTableDescriptor;
}

From source file:org.mrgeo.data.vector.VectorInputFormatContext.java

License:Apache License

public static VectorInputFormatContext load(final Configuration conf) {
    VectorInputFormatContext context = new VectorInputFormatContext();
    context.inputs = new HashSet<String>();
    context.featureCount = conf.getLong(FEATURE_COUNT_KEY, -1L);
    context.minFeaturesPerSplit = conf.getInt(MIN_FEATURES_PER_SPLIT_KEY, -1);
    int inputsCount = conf.getInt(INPUTS_COUNT, 0);
    for (int inputIndex = 0; inputIndex < inputsCount; inputIndex++) {
        String input = conf.get(INPUTS_PREFIX + inputIndex);
        context.inputs.add(input);/*w  w w.ja  v a2 s  .  c  om*/
    }
    String strProviderProperties = conf.get(PROVIDER_PROPERTY_KEY);
    if (strProviderProperties != null) {
        context.inputProviderProperties = ProviderProperties.fromDelimitedString(strProviderProperties);
    }
    return context;
}