List of usage examples for org.apache.hadoop.fs Path getFileSystem
public FileSystem getFileSystem(Configuration conf) throws IOException
From source file:com.ibm.jaql.io.hadoop.FileOutputConfigurator.java
License:Apache License
public void setParallel(JobConf conf) throws Exception { registerSerializers(conf);//ww w.j a v a2 s . c o m // For map-reduce, multiple files can be produced, so the location is their // parent directory. Path outPath = new Path(location); FileSystem fs = outPath.getFileSystem(conf); fs.delete(outPath, true); // TODO: Jaql currently has overwrite semantics; add flag to control this FileOutputFormat.setOutputPath(conf, outPath); }
From source file:com.ibm.jaql.lang.expr.io.HadoopTempExpr.java
License:Apache License
public JsonRecord eval(Context context) throws Exception { String filename = "jaqltemp_" + System.nanoTime(); // FIXME: figure out where this should go BufferedJsonRecord r = new BufferedJsonRecord(); r.add(Adapter.TYPE_NAME, new JsonString("jaqltemp")); r.add(Adapter.LOCATION_NAME, new JsonString(filename)); BufferedJsonRecord options = new BufferedJsonRecord(); JsonSchema schema = (JsonSchema) exprs[0].eval(context); options.add(new JsonString("schema"), schema); r.add(Adapter.OPTIONS_NAME, options); Configuration conf = new Configuration(); // TODO: where to get this from? Path path = new Path(filename); FileSystem fs = path.getFileSystem(conf); context.doAtReset(new DeleteFileTask(fs, path)); return r; // TODO: memory }
From source file:com.ibm.jaql.lang.expr.system.LsFn.java
License:Apache License
@Override public JsonIterator iter(final Context context) throws Exception { JsonString glob = (JsonString) exprs[0].eval(context); // Configuration conf = context.getConfiguration(); Configuration conf = new Configuration(); // TODO: get from context, incl options //URI uri;//www . j ava2 s . c om //FileSystem fs = FileSystem.get(uri, conf); Path inpath = new Path(glob.toString()); FileSystem fs = inpath.getFileSystem(conf); //final FileStatus[] stats = fs.listStatus(path, filter); final FileStatus[] stats = fs.globStatus(inpath); if (stats == null || stats.length == 0) { return JsonIterator.EMPTY; } final MutableJsonDate accessTime = new MutableJsonDate(); final MutableJsonDate modifyTime = new MutableJsonDate(); final MutableJsonLong length = new MutableJsonLong(); final MutableJsonLong blockSize = new MutableJsonLong(); final MutableJsonLong replication = new MutableJsonLong(); final MutableJsonString path = new MutableJsonString(); final MutableJsonString owner = new MutableJsonString(); final MutableJsonString group = new MutableJsonString(); final MutableJsonString permission = new MutableJsonString(); final JsonValue[] values = new JsonValue[] { accessTime, modifyTime, length, blockSize, replication, path, owner, group, permission }; final BufferedJsonRecord rec = new BufferedJsonRecord(); rec.set(LsField.names, values, values.length, false); return new JsonIterator(rec) { int i = 0; @Override public boolean moveNext() throws Exception { if (i >= stats.length) { return false; } FileStatus stat = stats[i++]; // fs.getUri().toString(); long x = HadoopShim.getAccessTime(stat); if (x <= 0) { values[LsField.ACCESS_TIME.ordinal()] = null; } else { accessTime.set(x); values[LsField.ACCESS_TIME.ordinal()] = accessTime; } modifyTime.set(stat.getModificationTime()); length.set(stat.getLen()); blockSize.set(stat.getBlockSize()); replication.set(stat.getReplication()); path.setCopy(stat.getPath().toString()); owner.setCopy(stat.getOwner()); group.setCopy(stat.getGroup()); permission.setCopy(stat.getPermission().toString()); return true; } }; }
From source file:com.idvp.platform.hdfs.HDFSDataStream.java
License:Apache License
protected FileSystem getDfs(Configuration conf, Path dstPath) throws IOException { return dstPath.getFileSystem(conf); }
From source file:com.ikanow.aleph2.analytics.hadoop.assets.UpdatedFileInputFormat.java
License:Apache License
private List<FileStatus> singleThreadedListStatus(JobContext job, Path[] dirs, PathFilter inputFilter, boolean recursive) throws IOException { List<FileStatus> result = new ArrayList<FileStatus>(); List<IOException> errors = new ArrayList<IOException>(); for (int i = 0; i < dirs.length; ++i) { Path p = dirs[i]; FileSystem fs = p.getFileSystem(job.getConfiguration()); FileStatus[] matches = fs.globStatus(p, inputFilter); if (matches == null) { errors.add(new IOException("Input path does not exist: " + p)); } else if (matches.length == 0) { errors.add(new IOException("Input Pattern " + p + " matches 0 files")); } else {/*w w w . j a v a 2 s. c o m*/ for (FileStatus globStat : matches) { if (globStat.isDirectory()) { RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(globStat.getPath()); while (iter.hasNext()) { LocatedFileStatus stat = iter.next(); if (inputFilter.accept(stat.getPath())) { if (recursive && stat.isDirectory()) { addInputPathRecursively(result, fs, stat.getPath(), inputFilter); } else { result.add(stat); } } } } else { result.add(globStat); } } } } if (!errors.isEmpty()) { throw new InvalidInputException(errors); } return result; }
From source file:com.ikanow.aleph2.analytics.hadoop.assets.UpdatedFileInputFormat.java
License:Apache License
/** * Generate the list of files and make them into FileSplits. * @param job the job context// ww w . ja va2 s .c om * @throws IOException */ public List<InputSplit> getSplits(JobContext job) throws IOException { Stopwatch sw = Stopwatch.createStarted(); long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job)); long maxSize = getMaxSplitSize(job); // generate splits List<InputSplit> splits = new ArrayList<InputSplit>(); List<FileStatus> files = listStatus(job); for (FileStatus file : files) { Path path = file.getPath(); long length = file.getLen(); if (length != 0) { BlockLocation[] blkLocations; if (file instanceof LocatedFileStatus) { blkLocations = ((LocatedFileStatus) file).getBlockLocations(); } else { FileSystem fs = path.getFileSystem(job.getConfiguration()); blkLocations = fs.getFileBlockLocations(file, 0, length); } if (isSplitable(job, path)) { long blockSize = file.getBlockSize(); long splitSize = computeSplitSize(blockSize, minSize, maxSize); long bytesRemaining = length; while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) { int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining); splits.add(makeSplit(path, length - bytesRemaining, splitSize, blkLocations[blkIndex].getHosts(), blkLocations[blkIndex].getCachedHosts())); bytesRemaining -= splitSize; } if (bytesRemaining != 0) { int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining); splits.add(makeSplit(path, length - bytesRemaining, bytesRemaining, blkLocations[blkIndex].getHosts(), blkLocations[blkIndex].getCachedHosts())); } } else { // not splitable splits.add(makeSplit(path, 0, length, blkLocations[0].getHosts(), blkLocations[0].getCachedHosts())); } } else { //Create empty hosts array for zero length files splits.add(makeSplit(path, 0, length, new String[0])); } } // Save the number of input files for metrics/loadgen job.getConfiguration().setLong(NUM_INPUT_FILES, files.size()); sw.stop(); if (LOG.isDebugEnabled()) { LOG.debug("Total # of splits generated by getSplits: " + splits.size() + ", TimeTaken: " + sw.elapsed(TimeUnit.MILLISECONDS)); } return splits; }
From source file:com.indeed.imhotep.builder.tsv.EasyIndexBuilderFromTSV.java
License:Apache License
public FileSystem getHDFS(Path inputFilePath) { try {//from w w w. j a v a2s . co m return inputFilePath.getFileSystem(new org.apache.hadoop.conf.Configuration()); } catch (IOException e) { throw Throwables.propagate(e); } }
From source file:com.indeed.imhotep.builder.tsv.TsvConverter.java
License:Apache License
private FileSystem getFS(Path path) { try {/*from www . jav a2s.c om*/ return path.getFileSystem(hdfsConf); } catch (IOException e) { throw Throwables.propagate(e); } }
From source file:com.inmobi.conduit.distcp.ConduitDistCp.java
License:Apache License
@Override protected Path createInputFileListing(Job job) throws IOException { // get the file path where copy listing file has to be saved Path fileListingPath = getFileListingPath(); Configuration config = job.getConfiguration(); SequenceFile.Writer fileListWriter = null; try {// w w w. ja va 2 s . co m fileListWriter = SequenceFile.createWriter(fileListingPath.getFileSystem(config), config, fileListingPath, Text.class, FileStatus.class, SequenceFile.CompressionType.NONE); for (Map.Entry<String, FileStatus> entry : fileListingMap.entrySet()) { FileStatus status = FileUtil.getFileStatus(entry.getValue(), buffer, in); fileListWriter.append(new Text(entry.getKey()), status); // Create a sync point after each entry. This will ensure that SequenceFile // Reader can work at file entry level granularity, given that SequenceFile // Reader reads from the starting of sync point. fileListWriter.sync(); totalBytesToCopy += entry.getValue().getLen(); totalPaths++; } } finally { if (fileListWriter != null) { fileListWriter.close(); } } LOG.info("Number of paths considered for copy: " + totalPaths); LOG.info("Number of bytes considered for copy: " + totalBytesToCopy + " (Actual number of bytes copied depends on whether any files are " + "skipped or overwritten.)"); // set distcp configurations config.set(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH, fileListingPath.toString()); config.setLong(DistCpConstants.CONF_LABEL_TOTAL_BYTES_TO_BE_COPIED, totalBytesToCopy); config.setLong(DistCpConstants.CONF_LABEL_TOTAL_NUMBER_OF_RECORDS, totalPaths); return fileListingPath; }
From source file:com.inmobi.conduit.distcp.tools.CopyListing.java
License:Apache License
/** * Validate the final resulting path listing to see if there are any duplicate entries * * @param pathToListFile - path listing build by doBuildListing * @throws IOException - Any issues while checking for duplicates and throws * @throws DuplicateFileException - if there are duplicates *///www. j a v a 2s. c om protected void checkForDuplicates(Path pathToListFile) throws DuplicateFileException, IOException { Configuration config = getConf(); FileSystem fs = pathToListFile.getFileSystem(config); Path sortedList = DistCpUtils.sortListing(fs, config, pathToListFile); SequenceFile.Reader reader = new SequenceFile.Reader(fs, sortedList, config); try { Text lastKey = new Text("*"); //source relative path can never hold * FileStatus lastFileStatus = new FileStatus(); Text currentKey = new Text(); while (reader.next(currentKey)) { if (currentKey.equals(lastKey)) { FileStatus currentFileStatus = new FileStatus(); reader.getCurrentValue(currentFileStatus); throw new DuplicateFileException("File " + lastFileStatus.getPath() + " and " + currentFileStatus.getPath() + " would cause duplicates. Aborting"); } reader.getCurrentValue(lastFileStatus); lastKey.set(currentKey); } } finally { IOUtils.closeStream(reader); } }