List of usage examples for org.apache.hadoop.fs FileSystem isFile
@Deprecated public boolean isFile(Path f) throws IOException
From source file:fr.ens.biologie.genomique.eoulsan.util.hadoop.PathUtils.java
License:LGPL
/** * Check if a file exists/* w w w . j a v a 2 s . c o m*/ * @param file File to test * @param conf Configuration * @param msgFileType message for the description of the file * @throws IOException if the file doesn't exists */ public static final void checkExistingStandardFileOrDirectory(final Path file, final Configuration conf, final String msgFileType) throws IOException { checkExistingDirectoryFile(file, conf, msgFileType); final FileSystem fs = file.getFileSystem(conf); if (!fs.isFile(file) && !fs.getFileStatus(file).isDirectory()) { throw new IOException("The " + msgFileType + " is not a standard file or a directory: " + file); } }
From source file:fuse4j.hadoopfs.HdfsClientImpl.java
License:Apache License
@Override public boolean rename(int uid, String src, String dst) { FileSystem dfs = null; try {/*from w w w .ja va 2 s .c o m*/ dfs = getDfs(uid); Path srcPath = new Path(src); Path dstPath = new Path(dst); if (srcPath.equals(dstPath)) { //source and destination are the same path return false; } if (dfs.isFile(dstPath) && dfs.isFile(srcPath)) { //TODO: temporary fix to overwrite files //delete destination file if exists. //"HDFS-654" fixes the problem allowing atomic rename when dst exists dfs.delete(dstPath); } return dfs.rename(srcPath, dstPath); } catch (Exception ioe) { // fall through to failure System.out.println(ioe); } return false; }
From source file:gobblin.util.HadoopUtils.java
License:Apache License
/** * Copies a src {@link Path} from a srcFs {@link FileSystem} to a dst {@link Path} on a dstFs {@link FileSystem}. If * either the srcFs or dstFs are S3 {@link FileSystem}s (as dictated by {@link #FS_SCHEMES_NON_ATOMIC}) then data is directly * copied from the src to the dst. Otherwise data is first copied to a tmp {@link Path}, which is then renamed to the * dst./* w ww.j a v a 2 s .co m*/ * * @param srcFs the source {@link FileSystem} where the src {@link Path} exists * @param src the {@link Path} to copy from the source {@link FileSystem} * @param dstFs the destination {@link FileSystem} where the dst {@link Path} should be created * @param dst the {@link Path} to copy data to * @param tmp the temporary {@link Path} to use when copying data * @param overwriteDst true if the destination and tmp path should should be overwritten, false otherwise */ public static void copyFile(FileSystem srcFs, Path src, FileSystem dstFs, Path dst, Path tmp, boolean overwriteDst, Configuration conf) throws IOException { Preconditions.checkArgument(srcFs.isFile(src), String.format("Cannot copy from %s to %s because src is not a file", src, dst)); if (FS_SCHEMES_NON_ATOMIC.contains(srcFs.getUri().getScheme()) || FS_SCHEMES_NON_ATOMIC.contains(dstFs.getUri().getScheme())) { copyFile(srcFs, src, dstFs, dst, overwriteDst, conf); } else { copyFile(srcFs, src, dstFs, tmp, overwriteDst, conf); try { boolean renamed = false; if (overwriteDst && dstFs.exists(dst)) { try { deletePath(dstFs, dst, true); } finally { renamePath(dstFs, tmp, dst); renamed = true; } } if (!renamed) { renamePath(dstFs, tmp, dst); } } finally { deletePath(dstFs, tmp, true); } } }
From source file:gobblin.util.HadoopUtils.java
License:Apache License
/** * Copy a file from a srcFs {@link FileSystem} to a dstFs {@link FileSystem}. The src {@link Path} must be a file, * that is {@link FileSystem#isFile(Path)} must return true for src. * * <p>/*from w w w . java 2 s . c o m*/ * If overwrite is specified to true, this method may delete the dst directory even if the copy from src to dst fails. * </p> * * @param srcFs the src {@link FileSystem} to copy the file from * @param src the src {@link Path} to copy * @param dstFs the destination {@link FileSystem} to write to * @param dst the destination {@link Path} to write to * @param overwrite true if the dst {@link Path} should be overwritten, false otherwise */ public static void copyFile(FileSystem srcFs, Path src, FileSystem dstFs, Path dst, boolean overwrite, Configuration conf) throws IOException { Preconditions.checkArgument(srcFs.isFile(src), String.format("Cannot copy from %s to %s because src is not a file", src, dst)); Preconditions.checkArgument(overwrite || !dstFs.exists(dst), String.format("Cannot copy from %s to %s because dst exists", src, dst)); try (InputStream in = srcFs.open(src); OutputStream out = dstFs.create(dst, overwrite)) { IOUtils.copyBytes(in, out, conf, false); } catch (Throwable t1) { try { deleteIfExists(dstFs, dst, true); } catch (Throwable t2) { // Do nothing } throw t1; } }
From source file:gobblin.util.ParallelRunnerTest.java
License:Apache License
@Test public void testMovePath() throws IOException, URISyntaxException { String expected = "test"; ByteArrayOutputStream actual = new ByteArrayOutputStream(); Path src = new Path("/src/file.txt"); Path dst = new Path("/dst/file.txt"); FileSystem fs1 = Mockito.mock(FileSystem.class); Mockito.when(fs1.exists(src)).thenReturn(true); Mockito.when(fs1.isFile(src)).thenReturn(true); Mockito.when(fs1.getUri()).thenReturn(new URI("fs1:////")); Mockito.when(fs1.getFileStatus(src)).thenReturn(new FileStatus(1, false, 1, 1, 1, src)); Mockito.when(fs1.open(src)).thenReturn( new FSDataInputStream(new SeekableFSInputStream(new ByteArrayInputStream(expected.getBytes())))); Mockito.when(fs1.delete(src, true)).thenReturn(true); FileSystem fs2 = Mockito.mock(FileSystem.class); Mockito.when(fs2.exists(dst)).thenReturn(false); Mockito.when(fs2.getUri()).thenReturn(new URI("fs2:////")); Mockito.when(fs2.getConf()).thenReturn(new Configuration()); Mockito.when(fs2.create(dst, false)).thenReturn(new FSDataOutputStream(actual, null)); try (ParallelRunner parallelRunner = new ParallelRunner(1, fs1)) { parallelRunner.movePath(src, fs2, dst, Optional.<String>absent()); }/* ww w . j ava 2 s . c o m*/ Assert.assertEquals(actual.toString(), expected); }
From source file:gr.ntua.h2rdf.LoadTriples.DistinctIds.java
License:Open Source License
public Job createSubmittableJob(String[] args) throws IOException, ClassNotFoundException { //io.compression.codecs Job job = new Job(); job.setInputFormatClass(TextInputFormat.class); Configuration conf = new Configuration(); Path blockProjection = new Path("blockIds/"); Path translations = new Path("translations/"); Path sample = new Path("sample/"); Path temp = new Path("temp/"); Path uniqueIds = new Path("uniqueIds/"); FileSystem fs; try {/*from ww w. j av a 2s.com*/ fs = FileSystem.get(conf); if (fs.exists(uniqueIds)) { fs.delete(uniqueIds, true); } if (fs.exists(translations)) { fs.delete(translations, true); } if (fs.exists(blockProjection)) { fs.delete(blockProjection, true); } if (fs.exists(sample)) { fs.delete(sample, true); } if (fs.exists(temp)) { fs.delete(temp, true); } FileOutputFormat.setOutputPath(job, uniqueIds); Path inp = new Path(args[0]); FileInputFormat.setInputPaths(job, inp); double type = 1; double datasetSize = 0; if (fs.isFile(inp)) { datasetSize = fs.getFileStatus(inp).getLen(); } else if (fs.isDirectory(inp)) { FileStatus[] s = fs.listStatus(inp); for (int i = 0; i < s.length; i++) { if (s[i].getPath().getName().toString().endsWith(".gz")) type = 27; if (s[i].getPath().getName().toString().endsWith(".snappy")) type = 10; datasetSize += s[i].getLen(); } } else { FileStatus[] s = fs.globStatus(inp); for (int i = 0; i < s.length; i++) { if (s[i].getPath().getName().toString().endsWith(".gz")) type = 27; if (s[i].getPath().getName().toString().endsWith(".snappy")) type = 10; datasetSize += s[i].getLen(); } } datasetSize = datasetSize * type; System.out.println("type: " + type); System.out.println("datasetSize: " + datasetSize); samplingRate = (double) sampleChunk / (double) datasetSize; if (samplingRate >= 0.1) { samplingRate = 0.1; } if (samplingRate <= 0.001) { samplingRate = 0.001; } numReducers = (int) (datasetSize / ReducerChunk); if (numReducers == 0) numReducers = 1; numReducers++; } catch (IOException e) { e.printStackTrace(); } HBaseAdmin hadmin = new HBaseAdmin(conf); HTableDescriptor desc = new HTableDescriptor(TABLE_NAME); HColumnDescriptor family = new HColumnDescriptor("counter"); desc.addFamily(family); if (!hadmin.tableExists(TABLE_NAME)) { hadmin.createTable(desc); } job.setNumReduceTasks(numReducers); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(ImmutableBytesWritable.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setJarByClass(DistinctIds.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setPartitionerClass(SamplingPartitioner.class); FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); job.getConfiguration().set("mapred.compress.map.output", "true"); job.getConfiguration().set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.SnappyCodec"); //job.setCombinerClass(Combiner.class); job.setJobName("Distinct Id Wordcount"); job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false); job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false); job.getConfiguration().setInt("io.sort.mb", 100); job.getConfiguration().setInt("io.file.buffer.size", 131072); job.getConfiguration().setInt("mapred.job.reuse.jvm.num.tasks", -1); return job; }
From source file:hdfs.FileUtil.java
License:Apache License
/** Copy files between FileSystems. */ public static boolean copy(FileSystem srcFS, Path src, FileSystem dstFS, Path dst, boolean deleteSource, boolean overwrite, Configuration conf) throws IOException { dst = checkDest(src.getName(), dstFS, dst, overwrite); if (srcFS.getFileStatus(src).isDir()) { checkDependencies(srcFS, src, dstFS, dst); if (!dstFS.mkdirs(dst)) { return false; }//from w w w.j av a2s . com FileStatus contents[] = srcFS.listStatus(src); for (int i = 0; i < contents.length; i++) { copy(srcFS, contents[i].getPath(), dstFS, new Path(dst, contents[i].getPath().getName()), deleteSource, overwrite, conf); } } else if (srcFS.isFile(src)) { InputStream in = null; OutputStream out = null; try { in = srcFS.open(src); out = dstFS.create(dst, overwrite); IOUtils.copyBytes(in, out, conf, true); } catch (IOException e) { IOUtils.closeStream(out); IOUtils.closeStream(in); throw e; } } else { throw new IOException(src.toString() + ": No such file or directory"); } if (deleteSource) { return srcFS.delete(src, true); } else { return true; } }
From source file:hdfs.FileUtil.java
License:Apache License
/** Copy FileSystem files to local files. */ public static boolean copy(FileSystem srcFS, Path src, File dst, boolean deleteSource, Configuration conf) throws IOException { if (srcFS.getFileStatus(src).isDir()) { if (!dst.mkdirs()) { return false; }/*w ww . j a v a 2s .c o m*/ FileStatus contents[] = srcFS.listStatus(src); for (int i = 0; i < contents.length; i++) { copy(srcFS, contents[i].getPath(), new File(dst, contents[i].getPath().getName()), deleteSource, conf); } } else if (srcFS.isFile(src)) { InputStream in = srcFS.open(src); IOUtils.copyBytes(in, new FileOutputStream(dst), conf); } else { throw new IOException(src.toString() + ": No such file or directory"); } if (deleteSource) { return srcFS.delete(src, true); } else { return true; } }
From source file:hr.fer.tel.rovkp.homework01.task03.Program.java
public static String work(String hdfsURI, String hdfsPath, String localPath) throws URISyntaxException, IOException { Configuration config = new Configuration(); FileSystem hdfs = FileSystem.get(new URI(hdfsURI), config); LocalFileSystem localFileSystem = LocalFileSystem.getLocal(config); Path pathLocal = new Path(localPath); Path pathHdfs = new Path(hdfsPath); boolean isLocalFile = localFileSystem.isFile(pathLocal) || localFileSystem.isDirectory(pathLocal); boolean isHdfsFile = hdfs.isFile(pathHdfs) || hdfs.isDirectory(pathHdfs); return new StringBuilder().append(localPath).append(isLocalFile ? " is not" : " is") .append(" a valid local path.\n").append(hdfsPath).append(isHdfsFile ? " is not" : " is") .append(" a valid hdfs path.").toString(); }
From source file:hydrograph.engine.cascading.scheme.avro.CustomAvroScheme.java
License:Apache License
/** * This method peeks at the source data to get a schema when none has been * provided./*from w ww .j ava 2 s.co m*/ * * @param flowProcess * The cascading FlowProcess object for this flow. * @param tap * The cascading Tap object. * @return Schema The schema of the peeked at data, or Schema.NULL if none * exists. */ private Schema getSourceSchema(FlowProcess<? extends JobConf> flowProcess, Tap tap) throws IOException { if (tap instanceof CompositeTap) { tap = (Tap) ((CompositeTap) tap).getChildTaps().next(); } final String path = tap.getIdentifier(); Path p = new Path(path); final FileSystem fs = p.getFileSystem(flowProcess.getConfigCopy()); // Get all the input dirs List<FileStatus> statuses = new LinkedList<FileStatus>(Arrays.asList(fs.globStatus(p, filter))); // Now get all the things that are one level down for (FileStatus status : new LinkedList<FileStatus>(statuses)) { if (status.isDir()) for (FileStatus child : Arrays.asList(fs.listStatus(status.getPath(), filter))) { if (child.isDir()) { statuses.addAll(Arrays.asList(fs.listStatus(child.getPath(), filter))); } else if (fs.isFile(child.getPath())) { statuses.add(child); } } } for (FileStatus status : statuses) { Path statusPath = status.getPath(); if (fs.isFile(statusPath)) { // no need to open them all InputStream stream = null; DataFileStream reader = null; try { stream = new BufferedInputStream(fs.open(statusPath)); reader = new DataFileStream(stream, new GenericDatumReader()); return reader.getSchema(); } finally { if (reader == null) { if (stream != null) { stream.close(); } } else { reader.close(); } } } } // couldn't find any Avro files, return null schema return Schema.create(Schema.Type.NULL); }