List of usage examples for org.apache.hadoop.fs Path getFileSystem
public FileSystem getFileSystem(Configuration conf) throws IOException
From source file:com.kadwa.hadoop.DistExec.java
License:Open Source License
private static List<Path> fetchFileList(Configuration conf, Path srcList) throws IOException { List<Path> result = new ArrayList<Path>(); FileSystem fs = srcList.getFileSystem(conf); BufferedReader input = null;/*from www. ja va 2s . c o m*/ try { input = new BufferedReader(new InputStreamReader(fs.open(srcList))); String line = input.readLine(); while (line != null) { result.add(new Path(line)); line = input.readLine(); } } finally { checkAndClose(input); } return result; }
From source file:com.kadwa.hadoop.DistExec.java
License:Open Source License
/** * Sanity check for srcPath//from w ww . j a v a 2 s. c om */ private static void checkSrcPath(JobConf jobConf, List<Path> srcPaths) throws IOException { List<IOException> rslt = new ArrayList<IOException>(); Path[] ps = new Path[srcPaths.size()]; ps = srcPaths.toArray(ps); TokenCache.obtainTokensForNamenodes(jobConf.getCredentials(), ps, jobConf); for (Path p : srcPaths) { FileSystem fs = p.getFileSystem(jobConf); if (!fs.exists(p)) { rslt.add(new IOException("Input source " + p + " does not exist.")); } } if (!rslt.isEmpty()) { throw new InvalidInputException(rslt); } }
From source file:com.kadwa.hadoop.DistExec.java
License:Open Source License
/** * Fully delete dir//w ww .j a va2s. com */ static void fullyDelete(String dir, Configuration conf) throws IOException { if (dir != null) { Path tmp = new Path(dir); tmp.getFileSystem(conf).delete(tmp, true); } }
From source file:com.kadwa.hadoop.DistExec.java
License:Open Source License
/** * Initialize ExecFilesMapper specific job-configuration. * * @param conf : The dfs/mapred configuration. * @param jobConf : The handle to the jobConf object to be initialized. * @param args Arguments/*from w ww . j av a 2s . c o m*/ * @return true if it is necessary to launch a job. */ private static boolean setup(Configuration conf, JobConf jobConf, final Arguments args) throws IOException { jobConf.set(DST_DIR_LABEL, args.dst.toUri().toString()); jobConf.set(EXEC_CMD_LABEL, args.execCmd); //set boolean values jobConf.setBoolean(Options.REDIRECT_ERROR_TO_OUT.propertyname, args.flags.contains(Options.REDIRECT_ERROR_TO_OUT)); final String randomId = getRandomId(); JobClient jClient = new JobClient(jobConf); Path stagingArea; try { stagingArea = JobSubmissionFiles.getStagingDir(jClient, conf); } catch (InterruptedException e) { throw new IOException(e); } Path jobDirectory = new Path(stagingArea + NAME + "_" + randomId); FsPermission mapredSysPerms = new FsPermission(JobSubmissionFiles.JOB_DIR_PERMISSION); FileSystem.mkdirs(FileSystem.get(jobDirectory.toUri(), conf), jobDirectory, mapredSysPerms); jobConf.set(JOB_DIR_LABEL, jobDirectory.toString()); FileSystem dstfs = args.dst.getFileSystem(conf); // get tokens for all the required FileSystems.. TokenCache.obtainTokensForNamenodes(jobConf.getCredentials(), new Path[] { args.dst }, conf); boolean dstExists = dstfs.exists(args.dst); boolean dstIsDir = false; if (dstExists) { dstIsDir = dstfs.getFileStatus(args.dst).isDir(); } // default logPath Path logPath = args.log; if (logPath == null) { String filename = "_" + NAME + "_logs_" + randomId; if (!dstExists || !dstIsDir) { Path parent = args.dst.getParent(); if (!dstfs.exists(parent)) { dstfs.mkdirs(parent); } logPath = new Path(parent, filename); } else { logPath = new Path(args.dst, filename); } } FileOutputFormat.setOutputPath(jobConf, logPath); // create src list, dst list FileSystem jobfs = jobDirectory.getFileSystem(jobConf); Path srcfilelist = new Path(jobDirectory, "_" + NAME + "_src_files"); jobConf.set(SRC_LIST_LABEL, srcfilelist.toString()); SequenceFile.Writer src_writer = SequenceFile.createWriter(jobfs, jobConf, srcfilelist, LongWritable.class, FilePair.class, SequenceFile.CompressionType.NONE); Path dstfilelist = new Path(jobDirectory, "_" + NAME + "_dst_files"); SequenceFile.Writer dst_writer = SequenceFile.createWriter(jobfs, jobConf, dstfilelist, Text.class, Text.class, SequenceFile.CompressionType.NONE); Path dstdirlist = new Path(jobDirectory, "_" + NAME + "_dst_dirs"); jobConf.set(DST_DIR_LIST_LABEL, dstdirlist.toString()); SequenceFile.Writer dir_writer = SequenceFile.createWriter(jobfs, jobConf, dstdirlist, Text.class, FilePair.class, SequenceFile.CompressionType.NONE); // handle the case where the destination directory doesn't exist // and we've only a single src directory. final boolean special = (args.srcs.size() == 1 && !dstExists); int srcCount = 0, cnsyncf = 0, dirsyn = 0; long fileCount = 0L, byteCount = 0L, cbsyncs = 0L; try { for (Iterator<Path> srcItr = args.srcs.iterator(); srcItr.hasNext();) { final Path src = srcItr.next(); FileSystem srcfs = src.getFileSystem(conf); FileStatus srcfilestat = srcfs.getFileStatus(src); Path root = special && srcfilestat.isDir() ? src : src.getParent(); if (srcfilestat.isDir()) { ++srcCount; } Stack<FileStatus> pathstack = new Stack<FileStatus>(); for (pathstack.push(srcfilestat); !pathstack.empty();) { FileStatus cur = pathstack.pop(); FileStatus[] children = srcfs.listStatus(cur.getPath()); for (int i = 0; i < children.length; i++) { boolean skipfile = false; final FileStatus child = children[i]; final String dst = makeRelative(root, child.getPath()); ++srcCount; if (child.isDir()) { pathstack.push(child); } else { if (!skipfile) { ++fileCount; byteCount += child.getLen(); if (LOG.isTraceEnabled()) { LOG.trace("adding file " + child.getPath()); } ++cnsyncf; cbsyncs += child.getLen(); if (cnsyncf > SYNC_FILE_MAX || cbsyncs > BYTES_PER_MAP) { src_writer.sync(); dst_writer.sync(); cnsyncf = 0; cbsyncs = 0L; } } } if (!skipfile) { src_writer.append(new LongWritable(child.isDir() ? 0 : child.getLen()), new FilePair(child, dst)); } dst_writer.append(new Text(dst), new Text(child.getPath().toString())); } if (cur.isDir()) { String dst = makeRelative(root, cur.getPath()); dir_writer.append(new Text(dst), new FilePair(cur, dst)); if (++dirsyn > SYNC_FILE_MAX) { dirsyn = 0; dir_writer.sync(); } } } } } finally { checkAndClose(src_writer); checkAndClose(dst_writer); checkAndClose(dir_writer); } FileStatus dststatus = null; try { dststatus = dstfs.getFileStatus(args.dst); } catch (FileNotFoundException fnfe) { LOG.info(args.dst + " does not exist."); } // create dest path dir if copying > 1 file if (dststatus == null) { if (srcCount > 1 && !dstfs.mkdirs(args.dst)) { throw new IOException("Failed to create" + args.dst); } } final Path sorted = new Path(jobDirectory, "_" + NAME + "_sorted"); checkDuplication(jobfs, dstfilelist, sorted, conf); Path tmpDir = new Path( (dstExists && !dstIsDir) || (!dstExists && srcCount == 1) ? args.dst.getParent() : args.dst, "_" + NAME + "_tmp_" + randomId); jobConf.set(TMP_DIR_LABEL, tmpDir.toUri().toString()); LOG.info("sourcePathsCount=" + srcCount); LOG.info("filesToExecCount=" + fileCount); LOG.info("bytesToExecCount=" + StringUtils.humanReadableInt(byteCount)); jobConf.setInt(SRC_COUNT_LABEL, srcCount); jobConf.setLong(TOTAL_SIZE_LABEL, byteCount); setMapCount(fileCount, jobConf); return fileCount > 0; }
From source file:com.knewton.mapreduce.io.SSTableInputFormat.java
License:Apache License
/** * Expands all directories passed as input and keeps only valid data tables. * * @return A list of all the data tables found under the input directories. *///from www . j a v a2s .co m @Override protected List<FileStatus> listStatus(JobContext job) throws IOException { Configuration conf = job.getConfiguration(); List<FileStatus> files = super.listStatus(job); DataTablePathFilter dataTableFilter = getDataTableFilter(conf); files = cleanUpBackupDir(files); for (int i = 0; i < files.size(); i++) { FileStatus file = files.get(i); Path p = file.getPath(); // Expand if directory if (file.isDirectory() && p != null) { LOG.info("Expanding {}", p); FileSystem fs = p.getFileSystem(conf); FileStatus[] children = fs.listStatus(p); List<FileStatus> listChildren = Lists.newArrayList(children); listChildren = cleanUpBackupDir(listChildren); files.addAll(i + 1, listChildren); } if (!dataTableFilter.accept(file.getPath())) { LOG.info("Removing {}", file.getPath()); files.remove(i); i--; } } return files; }
From source file:com.knewton.mrtool.io.JsonRecordReader.java
License:Apache License
/** * Get the line reader to be used for the file. A <code>LineReader</code> can read a file line * by line. This separate method helps with testing too. * //from w w w . j a va 2 s .c om * @param fileSplit * @param conf * @return * @throws IOException */ protected LineReader initLineReader(FileSplit fileSplit, Configuration conf) throws IOException { final Path file = fileSplit.getPath(); final CompressionCodec codec = compressionCodecs.getCodec(file); FileSystem fs = file.getFileSystem(conf); FSDataInputStream fileIn = fs.open(fileSplit.getPath()); seekableIn = fileIn; boolean skipFirstLine = false; LineReader lineReader; if (codec != null) { lineReader = new LineReader(codec.createInputStream(fileIn), conf); } else { // if the start is not the beginning of the file then skip the first line to get the // next complete json record. The previous json record will be read by the record reader // that got assigned the previous InputSplit. if (start != 0) { skipFirstLine = true; --start; fileIn.seek(start); } lineReader = new LineReader(fileIn, conf); } if (skipFirstLine) { start += lineReader.readLine(new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, end - start)); } return lineReader; }
From source file:com.kylinolap.job.hadoop.cube.RowKeyDistributionCheckerMapper.java
License:Apache License
@SuppressWarnings("deprecation") public byte[][] getSplits(Configuration conf, Path path) { List<byte[]> rowkeyList = new ArrayList<byte[]>(); SequenceFile.Reader reader = null; try {//from w w w . j a va 2 s. co m reader = new SequenceFile.Reader(path.getFileSystem(conf), path, conf); Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf); Writable value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf); while (reader.next(key, value)) { byte[] tmp = ((Text) key).copyBytes(); if (rowkeyList.contains(tmp) == false) { rowkeyList.add(tmp); } } } catch (Exception e) { e.printStackTrace(); } finally { IOUtils.closeStream(reader); } byte[][] retValue = rowkeyList.toArray(new byte[rowkeyList.size()][]); return retValue; }
From source file:com.kylinolap.job.hadoop.hbase.CreateHTableJob.java
License:Apache License
@SuppressWarnings("deprecation") public byte[][] getSplits(Configuration conf, Path path) throws Exception { FileSystem fs = path.getFileSystem(conf); if (fs.exists(path) == false) { System.err.println("Path " + path + " not found, no region split, HTable will be one region"); return null; }/*from w w w . ja v a 2s . c o m*/ List<byte[]> rowkeyList = new ArrayList<byte[]>(); SequenceFile.Reader reader = null; try { reader = new SequenceFile.Reader(fs, path, conf); Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf); Writable value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf); while (reader.next(key, value)) { rowkeyList.add(((Text) key).copyBytes()); } } catch (Exception e) { e.printStackTrace(); throw e; } finally { IOUtils.closeStream(reader); } System.out.println((rowkeyList.size() + 1) + " regions"); System.out.println(rowkeyList.size() + " splits"); for (byte[] split : rowkeyList) { System.out.println(StringUtils.byteToHexString(split)); } byte[][] retValue = rowkeyList.toArray(new byte[rowkeyList.size()][]); return retValue.length == 0 ? null : retValue; }
From source file:com.lightboxtechnologies.spectrum.HDFSArchiver.java
License:Apache License
public static int runPipeline(String src, String dst) throws IOException { final Configuration conf = new Configuration(); final FileSystem fs = FileSystem.get(conf); final Path rpath = new Path(src); final Path zpath = new Path(dst); if (!fs.exists(rpath)) { throw new IOException("Source path does not exist."); }/*from ww w . j a va 2s .c o m*/ OutputStream out = null; try { out = zpath.getFileSystem(conf).create(zpath); zip(fs, rpath, out); out.close(); } finally { IOUtils.closeQuietly(out); } return 0; }
From source file:com.lightboxtechnologies.spectrum.SequenceFileExport.java
License:Apache License
public static void main(String[] args) throws Exception { final Configuration conf = new Configuration(); final String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); String imageID;// ww w. j av a 2s . c o m String outpath; String friendlyname; final Set<String> exts = new HashSet<String>(); if ("-f".equals(otherArgs[0])) { if (otherArgs.length != 4) { die(); } // load extensions from file final Path extpath = new Path(otherArgs[1]); InputStream in = null; try { in = extpath.getFileSystem(conf).open(extpath); Reader r = null; try { r = new InputStreamReader(in); BufferedReader br = null; try { br = new BufferedReader(r); String line; while ((line = br.readLine()) != null) { exts.add(line.trim().toLowerCase()); } br.close(); } finally { IOUtils.closeQuietly(br); } r.close(); } finally { IOUtils.closeQuietly(r); } in.close(); } finally { IOUtils.closeQuietly(in); } imageID = otherArgs[2]; friendlyname = otherArgs[3]; outpath = otherArgs[4]; } else { if (otherArgs.length < 3) { die(); } // read extensions from trailing args imageID = otherArgs[0]; friendlyname = otherArgs[1]; outpath = otherArgs[2]; // lowercase all file extensions for (int i = 2; i < otherArgs.length; ++i) { exts.add(otherArgs[i].toLowerCase()); } } conf.setStrings("extensions", exts.toArray(new String[exts.size()])); final Job job = SKJobFactory.createJobFromConf(imageID, friendlyname, "SequenceFileExport", conf); job.setJarByClass(SequenceFileExport.class); job.setMapperClass(SequenceFileExportMapper.class); job.setNumReduceTasks(0); job.setOutputKeyClass(BytesWritable.class); job.setOutputValueClass(MapWritable.class); job.setInputFormatClass(FsEntryHBaseInputFormat.class); FsEntryHBaseInputFormat.setupJob(job, imageID); job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK); FileOutputFormat.setOutputPath(job, new Path(outpath)); System.exit(job.waitForCompletion(true) ? 0 : 1); }