List of usage examples for org.apache.hadoop.util StringUtils humanReadableInt
@Deprecated public static String humanReadableInt(long number)
From source file:cn.ict.magicube.fs.shell.FixedLs.java
License:Apache License
protected String formatSize(long size) { return humanReadable ? StringUtils.humanReadableInt(size) : String.valueOf(size); }
From source file:com.ailk.oci.ocnosql.tools.load.mutiple.MutipleColumnReducer.java
License:Apache License
@Override protected void reduce(ImmutableBytesWritable row, java.lang.Iterable<Put> puts, Reducer<ImmutableBytesWritable, Put, ImmutableBytesWritable, KeyValue>.Context context) throws java.io.IOException, InterruptedException { // although reduce() is called per-row, handle pathological case long threshold = context.getConfiguration().getLong("putsortreducer.row.threshold", 2L * (1 << 30)); // String newRowkey = (String)rowkeyGenerator.generate(new String(row.copyBytes())); Iterator<Put> iter = puts.iterator(); while (iter.hasNext()) { TreeSet<KeyValue> map = new TreeSet<KeyValue>(KeyValue.COMPARATOR); long curSize = 0; // stop at the end or the RAM threshold while (iter.hasNext() && curSize < threshold) { Put p = iter.next();/* w ww .ja v a 2 s. c om*/ for (List<KeyValue> kvs : p.getFamilyMap().values()) { for (KeyValue kv : kvs) { map.add(kv); curSize += kv.getLength(); } } } context.setStatus("Read " + (map.size()) + " entries of " + map.getClass() + "(" + StringUtils.humanReadableInt(curSize) + ")"); int index = 0; for (KeyValue kv : map) { context.write(row, kv); if (index > 0 && index % 100 == 0) context.setStatus("Wrote " + index); } // if we have more entries to process if (iter.hasNext()) { // force flush because we cannot guarantee intra-row sorted order context.write(null, null); } } }
From source file:com.ci.backports.hadoop.hbase.ZPutSortReducer.java
License:Apache License
@Override protected void reduce(ImmutableBytesWritable row, java.lang.Iterable<Put> puts, Reducer<ImmutableBytesWritable, Put, ImmutableBytesWritable, KeyValue>.Context context) throws java.io.IOException, InterruptedException { // although reduce() is called per-row, handle pathological case long threshold = context.getConfiguration().getLong("putsortreducer.row.threshold", 2L * (1 << 30)); Iterator<Put> iter = puts.iterator(); while (iter.hasNext()) { TreeSet<KeyValue> map = new TreeSet<KeyValue>(KeyValue.COMPARATOR); long curSize = 0; // stop at the end or the RAM threshold while (iter.hasNext() && curSize < threshold) { Put p = iter.next();/*from ww w.j a v a 2 s. c om*/ for (List<KeyValue> kvs : p.getFamilyMap().values()) { for (KeyValue kv : kvs) { map.add(kv); curSize += kv.getValueLength(); } } } context.setStatus("Read " + map.size() + " entries of " + map.getClass() + "(" + StringUtils.humanReadableInt(curSize) + ")"); int index = 0; for (KeyValue kv : map) { context.write(row, kv); if (index > 0 && index % 100 == 0) context.setStatus("Wrote " + index); } // if we have more entries to process if (iter.hasNext()) { // force flush because we cannot guarantee intra-row sorted order context.write(null, null); } } }
From source file:com.kadwa.hadoop.DistExec.java
License:Open Source License
/** * Initialize ExecFilesMapper specific job-configuration. * * @param conf : The dfs/mapred configuration. * @param jobConf : The handle to the jobConf object to be initialized. * @param args Arguments/*w ww . j a v a2s . c o m*/ * @return true if it is necessary to launch a job. */ private static boolean setup(Configuration conf, JobConf jobConf, final Arguments args) throws IOException { jobConf.set(DST_DIR_LABEL, args.dst.toUri().toString()); jobConf.set(EXEC_CMD_LABEL, args.execCmd); //set boolean values jobConf.setBoolean(Options.REDIRECT_ERROR_TO_OUT.propertyname, args.flags.contains(Options.REDIRECT_ERROR_TO_OUT)); final String randomId = getRandomId(); JobClient jClient = new JobClient(jobConf); Path stagingArea; try { stagingArea = JobSubmissionFiles.getStagingDir(jClient, conf); } catch (InterruptedException e) { throw new IOException(e); } Path jobDirectory = new Path(stagingArea + NAME + "_" + randomId); FsPermission mapredSysPerms = new FsPermission(JobSubmissionFiles.JOB_DIR_PERMISSION); FileSystem.mkdirs(FileSystem.get(jobDirectory.toUri(), conf), jobDirectory, mapredSysPerms); jobConf.set(JOB_DIR_LABEL, jobDirectory.toString()); FileSystem dstfs = args.dst.getFileSystem(conf); // get tokens for all the required FileSystems.. TokenCache.obtainTokensForNamenodes(jobConf.getCredentials(), new Path[] { args.dst }, conf); boolean dstExists = dstfs.exists(args.dst); boolean dstIsDir = false; if (dstExists) { dstIsDir = dstfs.getFileStatus(args.dst).isDir(); } // default logPath Path logPath = args.log; if (logPath == null) { String filename = "_" + NAME + "_logs_" + randomId; if (!dstExists || !dstIsDir) { Path parent = args.dst.getParent(); if (!dstfs.exists(parent)) { dstfs.mkdirs(parent); } logPath = new Path(parent, filename); } else { logPath = new Path(args.dst, filename); } } FileOutputFormat.setOutputPath(jobConf, logPath); // create src list, dst list FileSystem jobfs = jobDirectory.getFileSystem(jobConf); Path srcfilelist = new Path(jobDirectory, "_" + NAME + "_src_files"); jobConf.set(SRC_LIST_LABEL, srcfilelist.toString()); SequenceFile.Writer src_writer = SequenceFile.createWriter(jobfs, jobConf, srcfilelist, LongWritable.class, FilePair.class, SequenceFile.CompressionType.NONE); Path dstfilelist = new Path(jobDirectory, "_" + NAME + "_dst_files"); SequenceFile.Writer dst_writer = SequenceFile.createWriter(jobfs, jobConf, dstfilelist, Text.class, Text.class, SequenceFile.CompressionType.NONE); Path dstdirlist = new Path(jobDirectory, "_" + NAME + "_dst_dirs"); jobConf.set(DST_DIR_LIST_LABEL, dstdirlist.toString()); SequenceFile.Writer dir_writer = SequenceFile.createWriter(jobfs, jobConf, dstdirlist, Text.class, FilePair.class, SequenceFile.CompressionType.NONE); // handle the case where the destination directory doesn't exist // and we've only a single src directory. final boolean special = (args.srcs.size() == 1 && !dstExists); int srcCount = 0, cnsyncf = 0, dirsyn = 0; long fileCount = 0L, byteCount = 0L, cbsyncs = 0L; try { for (Iterator<Path> srcItr = args.srcs.iterator(); srcItr.hasNext();) { final Path src = srcItr.next(); FileSystem srcfs = src.getFileSystem(conf); FileStatus srcfilestat = srcfs.getFileStatus(src); Path root = special && srcfilestat.isDir() ? src : src.getParent(); if (srcfilestat.isDir()) { ++srcCount; } Stack<FileStatus> pathstack = new Stack<FileStatus>(); for (pathstack.push(srcfilestat); !pathstack.empty();) { FileStatus cur = pathstack.pop(); FileStatus[] children = srcfs.listStatus(cur.getPath()); for (int i = 0; i < children.length; i++) { boolean skipfile = false; final FileStatus child = children[i]; final String dst = makeRelative(root, child.getPath()); ++srcCount; if (child.isDir()) { pathstack.push(child); } else { if (!skipfile) { ++fileCount; byteCount += child.getLen(); if (LOG.isTraceEnabled()) { LOG.trace("adding file " + child.getPath()); } ++cnsyncf; cbsyncs += child.getLen(); if (cnsyncf > SYNC_FILE_MAX || cbsyncs > BYTES_PER_MAP) { src_writer.sync(); dst_writer.sync(); cnsyncf = 0; cbsyncs = 0L; } } } if (!skipfile) { src_writer.append(new LongWritable(child.isDir() ? 0 : child.getLen()), new FilePair(child, dst)); } dst_writer.append(new Text(dst), new Text(child.getPath().toString())); } if (cur.isDir()) { String dst = makeRelative(root, cur.getPath()); dir_writer.append(new Text(dst), new FilePair(cur, dst)); if (++dirsyn > SYNC_FILE_MAX) { dirsyn = 0; dir_writer.sync(); } } } } } finally { checkAndClose(src_writer); checkAndClose(dst_writer); checkAndClose(dir_writer); } FileStatus dststatus = null; try { dststatus = dstfs.getFileStatus(args.dst); } catch (FileNotFoundException fnfe) { LOG.info(args.dst + " does not exist."); } // create dest path dir if copying > 1 file if (dststatus == null) { if (srcCount > 1 && !dstfs.mkdirs(args.dst)) { throw new IOException("Failed to create" + args.dst); } } final Path sorted = new Path(jobDirectory, "_" + NAME + "_sorted"); checkDuplication(jobfs, dstfilelist, sorted, conf); Path tmpDir = new Path( (dstExists && !dstIsDir) || (!dstExists && srcCount == 1) ? args.dst.getParent() : args.dst, "_" + NAME + "_tmp_" + randomId); jobConf.set(TMP_DIR_LABEL, tmpDir.toUri().toString()); LOG.info("sourcePathsCount=" + srcCount); LOG.info("filesToExecCount=" + fileCount); LOG.info("bytesToExecCount=" + StringUtils.humanReadableInt(byteCount)); jobConf.setInt(SRC_COUNT_LABEL, srcCount); jobConf.setLong(TOTAL_SIZE_LABEL, byteCount); setMapCount(fileCount, jobConf); return fileCount > 0; }
From source file:com.transwarp.hbase.bulkload.PutWritableSortReducer.java
License:Apache License
@Override protected void reduce(ImmutableBytesWritable row, java.lang.Iterable<PutWritable> puts, Reducer<ImmutableBytesWritable, PutWritable, ImmutableBytesWritable, KeyValue>.Context context) throws java.io.IOException, InterruptedException { // although reduce() is called per-row, handle pathological case long threshold = context.getConfiguration().getLong("putsortreducer.row.threshold", 2L * (1 << 30)); Iterator<PutWritable> iter = puts.iterator(); while (iter.hasNext()) { TreeSet<KeyValue> map = new TreeSet<KeyValue>(KeyValue.COMPARATOR); long curSize = 0; // stop at the end or the RAM threshold while (iter.hasNext() && curSize < threshold) { PutWritable p = iter.next(); for (KeyValue kv : p.genKvs()) { map.add(kv);//from ww w. j a v a2s .co m curSize += kv.getLength(); } } context.setStatus("Read " + map.size() + " entries of " + map.getClass() + "(" + StringUtils.humanReadableInt(curSize) + ")"); int index = 0; for (KeyValue kv : map) { context.write(row, kv); if (index > 0 && index % 100 == 0) context.setStatus("Wrote " + index); } // if we have more entries to process if (iter.hasNext()) { // force flush because we cannot guarantee intra-row sorted order context.write(null, null); } } }
From source file:com.transwarp.hbase.bulkload.TextSortReducer.java
License:Apache License
@Override protected void reduce(ImmutableBytesWritable rowKey, java.lang.Iterable<Text> lines, Reducer<ImmutableBytesWritable, Text, ImmutableBytesWritable, KeyValue>.Context context) throws java.io.IOException, InterruptedException { // although reduce() is called per-row, handle pathological case long threshold = context.getConfiguration().getLong("reducer.row.threshold", 1L * (1 << 30)); Iterator<Text> iter = lines.iterator(); while (iter.hasNext()) { Set<KeyValue> map = new TreeSet<KeyValue>(KeyValue.COMPARATOR); long curSize = 0; // stop at the end or the RAM threshold while (iter.hasNext() && curSize < threshold) { Text line = iter.next(); String lineStr = line.toString(); try { ArrayList<String> parsedLine = ParsedLine.parse(converter.getRecordSpec(), lineStr); Put p = converter.convert(parsedLine, rowKey.get()); for (List<KeyValue> kvs : p.getFamilyMap().values()) { for (KeyValue kv : kvs) { map.add(kv);//from w w w . j a va2 s . com curSize += kv.getLength(); } } } catch (FormatException badLine) { if (skipBadLines) { System.err.println("Bad line." + badLine.getMessage()); incrementBadLineCount(1); return; } throw new IOException(badLine); } catch (IllegalArgumentException e) { if (skipBadLines) { System.err.println("Bad line." + e.getMessage()); incrementBadLineCount(1); return; } throw new IOException(e); } } context.setStatus("Read " + map.size() + " entries of " + map.getClass() + "(" + StringUtils.humanReadableInt(curSize) + ")"); int index = 0; for (KeyValue kv : map) { if (isDelete) { kv = new KeyValue(kv.getRow(), kv.getFamily(), kv.getQualifier(), 0, KeyValue.Type.Delete, kv.getValue()); } context.write(rowKey, kv); if (++index > 0 && index % 100 == 0) context.setStatus("Wrote " + index + " key values."); } // if we have more entries to process if (iter.hasNext()) { // force flush because we cannot guarantee intra-row sorted order context.write(null, null); } } }
From source file:com.transwarp.hbase.bulkload.withindex.TextWithIndexSortReducer.java
License:Apache License
@Override protected void reduce(ImmutableBytesWritable rowKey, java.lang.Iterable<Text> lines, Reducer<ImmutableBytesWritable, Text, ImmutableBytesWritable, KeyValue>.Context context) throws java.io.IOException, InterruptedException { // although reduce() is called per-row, handle pathological case long threshold = context.getConfiguration().getLong("reducer.row.threshold", 1L * (1 << 30)); Iterator<Text> iter = lines.iterator(); boolean qualifier = context.getConfiguration().getBoolean("indexqualifier", false); while (iter.hasNext()) { // Get the prefix to judge whethre primary table(Prefix == 0) or index table (prefix > 0) int rowkeyPrefix = Bytes.toInt(rowKey.get(), 0, 4); byte[] rowKeyWithoutPrefix = Bytes.tail(rowKey.get(), rowKey.get().length - 4); Set<KeyValue> map = new TreeSet<KeyValue>(KeyValue.COMPARATOR); long curSize = 0; // stop at the end or the RAM threshold while (iter.hasNext() && curSize < threshold) { Text line = iter.next(); String lineStr = line.toString(); try { Put p = null;/*from www. j a va 2 s . com*/ if (rowkeyPrefix == 0) { ArrayList<String> parsedLine = ParsedLine.parse(converter.getRecordSpec(), lineStr); p = converter.convert(parsedLine, rowKeyWithoutPrefix); } else { p = new Put(rowKeyWithoutPrefix); if (qualifier) { p.add(family, line.getBytes(), emptyByte); } else { p.add(family, this.qualifier, line.getBytes()); } } if (p != null) { for (List<KeyValue> kvs : p.getFamilyMap().values()) { for (KeyValue kv : kvs) { map.add(kv); curSize += kv.getLength(); } } } } catch (FormatException badLine) { if (skipBadLines) { System.err.println("Bad line." + badLine.getMessage()); incrementBadLineCount(1); return; } throw new IOException(badLine); } catch (IllegalArgumentException e) { if (skipBadLines) { System.err.println("Bad line." + e.getMessage()); incrementBadLineCount(1); return; } throw new IOException(e); } } context.setStatus("Read " + map.size() + " entries of " + map.getClass() + "(" + StringUtils.humanReadableInt(curSize) + ")"); int index = 0; for (KeyValue kv : map) { context.write(rowKey, kv); if (++index > 0 && index % 100 == 0) context.setStatus("Wrote " + index + " key values."); } // if we have more entries to process if (iter.hasNext()) { // force flush because we cannot guarantee intra-row sorted order context.write(null, null); } } }
From source file:org.jd.copier.mapred.DistCp.java
License:Apache License
/** * Initialize DFSCopyFileMapper specific job-configuration. * @param conf : The dfs/mapred configuration. * @param jobConf : The handle to the jobConf object to be initialized. * @param args Arguments/*from w w w.ja v a 2 s .c om*/ * @return true if it is necessary to launch a job. */ private static boolean setup(Configuration conf, JobConf jobConf, final Arguments args) throws IOException { jobConf.set(DST_DIR_LABEL, args.dst.toUri().toString()); //set boolean values final boolean update = args.flags.contains(Options.UPDATE); final boolean skipCRCCheck = args.flags.contains(Options.SKIPCRC); final boolean overwrite = !update && args.flags.contains(Options.OVERWRITE); jobConf.setBoolean(Options.UPDATE.propertyname, update); jobConf.setBoolean(Options.SKIPCRC.propertyname, skipCRCCheck); jobConf.setBoolean(Options.OVERWRITE.propertyname, overwrite); jobConf.setBoolean(Options.IGNORE_READ_FAILURES.propertyname, args.flags.contains(Options.IGNORE_READ_FAILURES)); jobConf.setBoolean(Options.PRESERVE_STATUS.propertyname, args.flags.contains(Options.PRESERVE_STATUS)); final String randomId = getRandomId(); JobClient jClient = new JobClient(jobConf); Path stagingArea; try { stagingArea = JobSubmissionFiles.getStagingDir(jClient, conf); } catch (InterruptedException e) { throw new IOException(e); } Path jobDirectory = new Path(stagingArea + NAME + "_" + randomId); FsPermission mapredSysPerms = new FsPermission(JobSubmissionFiles.JOB_DIR_PERMISSION); FileSystem.mkdirs(jClient.getFs(), jobDirectory, mapredSysPerms); jobConf.set(JOB_DIR_LABEL, jobDirectory.toString()); long maxBytesPerMap = conf.getLong(BYTES_PER_MAP_LABEL, BYTES_PER_MAP); FileSystem dstfs = args.dst.getFileSystem(conf); // get tokens for all the required FileSystems.. TokenCache.obtainTokensForNamenodes(jobConf.getCredentials(), new Path[] { args.dst }, conf); boolean dstExists = dstfs.exists(args.dst); boolean dstIsDir = false; if (dstExists) { dstIsDir = dstfs.getFileStatus(args.dst).isDir(); } // default logPath Path logPath = args.log; if (logPath == null) { String filename = "_distcp_logs_" + randomId; if (!dstExists || !dstIsDir) { Path parent = args.dst.getParent(); if (null == parent) { // If dst is '/' on S3, it might not exist yet, but dst.getParent() // will return null. In this case, use '/' as its own parent to prevent // NPE errors below. parent = args.dst; } if (!dstfs.exists(parent)) { dstfs.mkdirs(parent); } logPath = new Path(parent, filename); } else { logPath = new Path(args.dst, filename); } } FileOutputFormat.setOutputPath(jobConf, logPath); // create src list, dst list FileSystem jobfs = jobDirectory.getFileSystem(jobConf); Path srcfilelist = new Path(jobDirectory, "_distcp_src_files"); jobConf.set(SRC_LIST_LABEL, srcfilelist.toString()); SequenceFile.Writer src_writer = SequenceFile.createWriter(jobfs, jobConf, srcfilelist, LongWritable.class, FilePair.class, SequenceFile.CompressionType.NONE); Path dstfilelist = new Path(jobDirectory, "_distcp_dst_files"); SequenceFile.Writer dst_writer = SequenceFile.createWriter(jobfs, jobConf, dstfilelist, Text.class, Text.class, SequenceFile.CompressionType.NONE); Path dstdirlist = new Path(jobDirectory, "_distcp_dst_dirs"); jobConf.set(DST_DIR_LIST_LABEL, dstdirlist.toString()); SequenceFile.Writer dir_writer = SequenceFile.createWriter(jobfs, jobConf, dstdirlist, Text.class, FilePair.class, SequenceFile.CompressionType.NONE); // handle the case where the destination directory doesn't exist // and we've only a single src directory OR we're updating/overwriting // the contents of the destination directory. final boolean special = (args.srcs.size() == 1 && !dstExists) || update || overwrite; int srcCount = 0, cnsyncf = 0, dirsyn = 0; long fileCount = 0L, byteCount = 0L, cbsyncs = 0L; try { for (Iterator<Path> srcItr = args.srcs.iterator(); srcItr.hasNext();) { final Path src = srcItr.next(); FileSystem srcfs = src.getFileSystem(conf); FileStatus srcfilestat = srcfs.getFileStatus(src); Path root = special && srcfilestat.isDir() ? src : src.getParent(); if (srcfilestat.isDir()) { ++srcCount; } Stack<FileStatus> pathstack = new Stack<FileStatus>(); for (pathstack.push(srcfilestat); !pathstack.empty();) { FileStatus cur = pathstack.pop(); FileStatus[] children = srcfs.listStatus(cur.getPath()); for (int i = 0; i < children.length; i++) { boolean skipfile = false; final FileStatus child = children[i]; final String dst = makeRelative(root, child.getPath()); ++srcCount; if (child.isDir()) { pathstack.push(child); } else { //skip file if the src and the dst files are the same. skipfile = update && sameFile(srcfs, child, dstfs, new Path(args.dst, dst), skipCRCCheck); //skip file if it exceed file limit or size limit skipfile |= fileCount == args.filelimit || byteCount + child.getLen() > args.sizelimit; if (!skipfile) { ++fileCount; byteCount += child.getLen(); if (LOG.isTraceEnabled()) { LOG.trace("adding file " + child.getPath()); } ++cnsyncf; cbsyncs += child.getLen(); if (cnsyncf > SYNC_FILE_MAX || cbsyncs > maxBytesPerMap) { src_writer.sync(); dst_writer.sync(); cnsyncf = 0; cbsyncs = 0L; } } } if (!skipfile) { src_writer.append(new LongWritable(child.isDir() ? 0 : child.getLen()), new FilePair(child, dst)); } dst_writer.append(new Text(dst), new Text(child.getPath().toString())); } if (cur.isDir()) { String dst = makeRelative(root, cur.getPath()); dir_writer.append(new Text(dst), new FilePair(cur, dst)); if (++dirsyn > SYNC_FILE_MAX) { dirsyn = 0; dir_writer.sync(); } } } } } finally { checkAndClose(src_writer); checkAndClose(dst_writer); checkAndClose(dir_writer); } FileStatus dststatus = null; try { dststatus = dstfs.getFileStatus(args.dst); } catch (FileNotFoundException fnfe) { LOG.info(args.dst + " does not exist."); } // create dest path dir if copying > 1 file if (dststatus == null) { if (srcCount > 1 && !dstfs.mkdirs(args.dst)) { throw new IOException("Failed to create" + args.dst); } } final Path sorted = new Path(jobDirectory, "_distcp_sorted"); checkDuplication(jobfs, dstfilelist, sorted, conf); if (dststatus != null && args.flags.contains(Options.DELETE)) { deleteNonexisting(dstfs, dststatus, sorted, jobfs, jobDirectory, jobConf, conf); } Path tmpDir = new Path( (dstExists && !dstIsDir) || (!dstExists && srcCount == 1) ? args.dst.getParent() : args.dst, "_distcp_tmp_" + randomId); jobConf.set(TMP_DIR_LABEL, tmpDir.toUri().toString()); // Explicitly create the tmpDir to ensure that it can be cleaned // up by fullyDelete() later. tmpDir.getFileSystem(conf).mkdirs(tmpDir); LOG.info("sourcePathsCount=" + srcCount); LOG.info("filesToCopyCount=" + fileCount); LOG.info("bytesToCopyCount=" + StringUtils.humanReadableInt(byteCount)); jobConf.setInt(SRC_COUNT_LABEL, srcCount); jobConf.setLong(TOTAL_SIZE_LABEL, byteCount); setMapCount(byteCount, jobConf); return fileCount > 0; }