Example usage for org.apache.hadoop.fs FileSystem isFile

List of usage examples for org.apache.hadoop.fs FileSystem isFile

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem isFile.

Prototype

@Deprecated
public boolean isFile(Path f) throws IOException 

Source Link

Document

True iff the named path is a regular file.

Usage

From source file:fr.ens.biologie.genomique.eoulsan.util.hadoop.PathUtils.java

License:LGPL

/**
 * Check if a file exists/* w w w  .  j  a v a 2 s  .  c o m*/
 * @param file File to test * @param conf Configuration
 * @param msgFileType message for the description of the file
 * @throws IOException if the file doesn't exists
 */
public static final void checkExistingStandardFileOrDirectory(final Path file, final Configuration conf,
        final String msgFileType) throws IOException {

    checkExistingDirectoryFile(file, conf, msgFileType);

    final FileSystem fs = file.getFileSystem(conf);

    if (!fs.isFile(file) && !fs.getFileStatus(file).isDirectory()) {
        throw new IOException("The " + msgFileType + " is  not a standard file or a directory: " + file);
    }
}

From source file:fuse4j.hadoopfs.HdfsClientImpl.java

License:Apache License

@Override
public boolean rename(int uid, String src, String dst) {
    FileSystem dfs = null;
    try {/*from  w  w  w .ja va 2 s .c o  m*/
        dfs = getDfs(uid);
        Path srcPath = new Path(src);
        Path dstPath = new Path(dst);
        if (srcPath.equals(dstPath)) {
            //source and destination are the same path
            return false;
        }
        if (dfs.isFile(dstPath) && dfs.isFile(srcPath)) {
            //TODO: temporary fix to overwrite files
            //delete destination file if exists.
            //"HDFS-654"  fixes the problem allowing atomic rename when dst exists
            dfs.delete(dstPath);
        }
        return dfs.rename(srcPath, dstPath);
    } catch (Exception ioe) {
        // fall through to failure
        System.out.println(ioe);
    }
    return false;
}

From source file:gobblin.util.HadoopUtils.java

License:Apache License

/**
 * Copies a src {@link Path} from a srcFs {@link FileSystem} to a dst {@link Path} on a dstFs {@link FileSystem}. If
 * either the srcFs or dstFs are S3 {@link FileSystem}s (as dictated by {@link #FS_SCHEMES_NON_ATOMIC}) then data is directly
 * copied from the src to the dst. Otherwise data is first copied to a tmp {@link Path}, which is then renamed to the
 * dst./* w  ww.j  a  v  a  2 s  .co  m*/
 *
 * @param srcFs the source {@link FileSystem} where the src {@link Path} exists
 * @param src the {@link Path} to copy from the source {@link FileSystem}
 * @param dstFs the destination {@link FileSystem} where the dst {@link Path} should be created
 * @param dst the {@link Path} to copy data to
 * @param tmp the temporary {@link Path} to use when copying data
 * @param overwriteDst true if the destination and tmp path should should be overwritten, false otherwise
 */
public static void copyFile(FileSystem srcFs, Path src, FileSystem dstFs, Path dst, Path tmp,
        boolean overwriteDst, Configuration conf) throws IOException {

    Preconditions.checkArgument(srcFs.isFile(src),
            String.format("Cannot copy from %s to %s because src is not a file", src, dst));

    if (FS_SCHEMES_NON_ATOMIC.contains(srcFs.getUri().getScheme())
            || FS_SCHEMES_NON_ATOMIC.contains(dstFs.getUri().getScheme())) {
        copyFile(srcFs, src, dstFs, dst, overwriteDst, conf);
    } else {
        copyFile(srcFs, src, dstFs, tmp, overwriteDst, conf);
        try {
            boolean renamed = false;
            if (overwriteDst && dstFs.exists(dst)) {
                try {
                    deletePath(dstFs, dst, true);
                } finally {
                    renamePath(dstFs, tmp, dst);
                    renamed = true;
                }
            }
            if (!renamed) {
                renamePath(dstFs, tmp, dst);
            }
        } finally {
            deletePath(dstFs, tmp, true);
        }
    }
}

From source file:gobblin.util.HadoopUtils.java

License:Apache License

/**
 * Copy a file from a srcFs {@link FileSystem} to a dstFs {@link FileSystem}. The src {@link Path} must be a file,
 * that is {@link FileSystem#isFile(Path)} must return true for src.
 *
 * <p>/*from w w w . java 2  s .  c  o  m*/
 *   If overwrite is specified to true, this method may delete the dst directory even if the copy from src to dst fails.
 * </p>
 *
 * @param srcFs the src {@link FileSystem} to copy the file from
 * @param src the src {@link Path} to copy
 * @param dstFs the destination {@link FileSystem} to write to
 * @param dst the destination {@link Path} to write to
 * @param overwrite true if the dst {@link Path} should be overwritten, false otherwise
 */
public static void copyFile(FileSystem srcFs, Path src, FileSystem dstFs, Path dst, boolean overwrite,
        Configuration conf) throws IOException {

    Preconditions.checkArgument(srcFs.isFile(src),
            String.format("Cannot copy from %s to %s because src is not a file", src, dst));
    Preconditions.checkArgument(overwrite || !dstFs.exists(dst),
            String.format("Cannot copy from %s to %s because dst exists", src, dst));

    try (InputStream in = srcFs.open(src); OutputStream out = dstFs.create(dst, overwrite)) {
        IOUtils.copyBytes(in, out, conf, false);
    } catch (Throwable t1) {
        try {
            deleteIfExists(dstFs, dst, true);
        } catch (Throwable t2) {
            // Do nothing
        }
        throw t1;
    }
}

From source file:gobblin.util.ParallelRunnerTest.java

License:Apache License

@Test
public void testMovePath() throws IOException, URISyntaxException {
    String expected = "test";
    ByteArrayOutputStream actual = new ByteArrayOutputStream();

    Path src = new Path("/src/file.txt");
    Path dst = new Path("/dst/file.txt");
    FileSystem fs1 = Mockito.mock(FileSystem.class);
    Mockito.when(fs1.exists(src)).thenReturn(true);
    Mockito.when(fs1.isFile(src)).thenReturn(true);
    Mockito.when(fs1.getUri()).thenReturn(new URI("fs1:////"));
    Mockito.when(fs1.getFileStatus(src)).thenReturn(new FileStatus(1, false, 1, 1, 1, src));
    Mockito.when(fs1.open(src)).thenReturn(
            new FSDataInputStream(new SeekableFSInputStream(new ByteArrayInputStream(expected.getBytes()))));
    Mockito.when(fs1.delete(src, true)).thenReturn(true);

    FileSystem fs2 = Mockito.mock(FileSystem.class);
    Mockito.when(fs2.exists(dst)).thenReturn(false);
    Mockito.when(fs2.getUri()).thenReturn(new URI("fs2:////"));
    Mockito.when(fs2.getConf()).thenReturn(new Configuration());
    Mockito.when(fs2.create(dst, false)).thenReturn(new FSDataOutputStream(actual, null));

    try (ParallelRunner parallelRunner = new ParallelRunner(1, fs1)) {
        parallelRunner.movePath(src, fs2, dst, Optional.<String>absent());
    }/* ww  w  .  j ava 2  s  .  c o m*/

    Assert.assertEquals(actual.toString(), expected);
}

From source file:gr.ntua.h2rdf.LoadTriples.DistinctIds.java

License:Open Source License

public Job createSubmittableJob(String[] args) throws IOException, ClassNotFoundException {
    //io.compression.codecs
    Job job = new Job();

    job.setInputFormatClass(TextInputFormat.class);
    Configuration conf = new Configuration();
    Path blockProjection = new Path("blockIds/");
    Path translations = new Path("translations/");
    Path sample = new Path("sample/");
    Path temp = new Path("temp/");
    Path uniqueIds = new Path("uniqueIds/");
    FileSystem fs;
    try {/*from   ww w.  j av a  2s.com*/
        fs = FileSystem.get(conf);
        if (fs.exists(uniqueIds)) {
            fs.delete(uniqueIds, true);
        }
        if (fs.exists(translations)) {
            fs.delete(translations, true);
        }
        if (fs.exists(blockProjection)) {
            fs.delete(blockProjection, true);
        }
        if (fs.exists(sample)) {
            fs.delete(sample, true);
        }
        if (fs.exists(temp)) {
            fs.delete(temp, true);
        }

        FileOutputFormat.setOutputPath(job, uniqueIds);
        Path inp = new Path(args[0]);
        FileInputFormat.setInputPaths(job, inp);

        double type = 1;
        double datasetSize = 0;
        if (fs.isFile(inp)) {
            datasetSize = fs.getFileStatus(inp).getLen();
        } else if (fs.isDirectory(inp)) {
            FileStatus[] s = fs.listStatus(inp);
            for (int i = 0; i < s.length; i++) {
                if (s[i].getPath().getName().toString().endsWith(".gz"))
                    type = 27;
                if (s[i].getPath().getName().toString().endsWith(".snappy"))
                    type = 10;
                datasetSize += s[i].getLen();
            }
        } else {
            FileStatus[] s = fs.globStatus(inp);
            for (int i = 0; i < s.length; i++) {
                if (s[i].getPath().getName().toString().endsWith(".gz"))
                    type = 27;
                if (s[i].getPath().getName().toString().endsWith(".snappy"))
                    type = 10;
                datasetSize += s[i].getLen();
            }
        }
        datasetSize = datasetSize * type;
        System.out.println("type: " + type);
        System.out.println("datasetSize: " + datasetSize);
        samplingRate = (double) sampleChunk / (double) datasetSize;
        if (samplingRate >= 0.1) {
            samplingRate = 0.1;
        }
        if (samplingRate <= 0.001) {
            samplingRate = 0.001;
        }
        numReducers = (int) (datasetSize / ReducerChunk);
        if (numReducers == 0)
            numReducers = 1;
        numReducers++;
    } catch (IOException e) {
        e.printStackTrace();
    }

    HBaseAdmin hadmin = new HBaseAdmin(conf);
    HTableDescriptor desc = new HTableDescriptor(TABLE_NAME);

    HColumnDescriptor family = new HColumnDescriptor("counter");
    desc.addFamily(family);
    if (!hadmin.tableExists(TABLE_NAME)) {
        hadmin.createTable(desc);
    }

    job.setNumReduceTasks(numReducers);
    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(ImmutableBytesWritable.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setJarByClass(DistinctIds.class);
    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    job.setPartitionerClass(SamplingPartitioner.class);

    FileOutputFormat.setCompressOutput(job, true);
    FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
    job.getConfiguration().set("mapred.compress.map.output", "true");
    job.getConfiguration().set("mapred.map.output.compression.codec",
            "org.apache.hadoop.io.compress.SnappyCodec");

    //job.setCombinerClass(Combiner.class);
    job.setJobName("Distinct Id Wordcount");
    job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false);
    job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false);
    job.getConfiguration().setInt("io.sort.mb", 100);
    job.getConfiguration().setInt("io.file.buffer.size", 131072);
    job.getConfiguration().setInt("mapred.job.reuse.jvm.num.tasks", -1);

    return job;

}

From source file:hdfs.FileUtil.java

License:Apache License

/** Copy files between FileSystems. */
public static boolean copy(FileSystem srcFS, Path src, FileSystem dstFS, Path dst, boolean deleteSource,
        boolean overwrite, Configuration conf) throws IOException {
    dst = checkDest(src.getName(), dstFS, dst, overwrite);

    if (srcFS.getFileStatus(src).isDir()) {
        checkDependencies(srcFS, src, dstFS, dst);
        if (!dstFS.mkdirs(dst)) {
            return false;
        }//from   w  w w.j  av a2s . com
        FileStatus contents[] = srcFS.listStatus(src);
        for (int i = 0; i < contents.length; i++) {
            copy(srcFS, contents[i].getPath(), dstFS, new Path(dst, contents[i].getPath().getName()),
                    deleteSource, overwrite, conf);
        }
    } else if (srcFS.isFile(src)) {
        InputStream in = null;
        OutputStream out = null;
        try {
            in = srcFS.open(src);
            out = dstFS.create(dst, overwrite);
            IOUtils.copyBytes(in, out, conf, true);
        } catch (IOException e) {
            IOUtils.closeStream(out);
            IOUtils.closeStream(in);
            throw e;
        }
    } else {
        throw new IOException(src.toString() + ": No such file or directory");
    }
    if (deleteSource) {
        return srcFS.delete(src, true);
    } else {
        return true;
    }

}

From source file:hdfs.FileUtil.java

License:Apache License

/** Copy FileSystem files to local files. */
public static boolean copy(FileSystem srcFS, Path src, File dst, boolean deleteSource, Configuration conf)
        throws IOException {
    if (srcFS.getFileStatus(src).isDir()) {
        if (!dst.mkdirs()) {
            return false;
        }/*w ww  . j  a v  a  2s  .c o  m*/
        FileStatus contents[] = srcFS.listStatus(src);
        for (int i = 0; i < contents.length; i++) {
            copy(srcFS, contents[i].getPath(), new File(dst, contents[i].getPath().getName()), deleteSource,
                    conf);
        }
    } else if (srcFS.isFile(src)) {
        InputStream in = srcFS.open(src);
        IOUtils.copyBytes(in, new FileOutputStream(dst), conf);
    } else {
        throw new IOException(src.toString() + ": No such file or directory");
    }
    if (deleteSource) {
        return srcFS.delete(src, true);
    } else {
        return true;
    }
}

From source file:hr.fer.tel.rovkp.homework01.task03.Program.java

public static String work(String hdfsURI, String hdfsPath, String localPath)
        throws URISyntaxException, IOException {

    Configuration config = new Configuration();
    FileSystem hdfs = FileSystem.get(new URI(hdfsURI), config);
    LocalFileSystem localFileSystem = LocalFileSystem.getLocal(config);

    Path pathLocal = new Path(localPath);
    Path pathHdfs = new Path(hdfsPath);

    boolean isLocalFile = localFileSystem.isFile(pathLocal) || localFileSystem.isDirectory(pathLocal);
    boolean isHdfsFile = hdfs.isFile(pathHdfs) || hdfs.isDirectory(pathHdfs);

    return new StringBuilder().append(localPath).append(isLocalFile ? " is not" : " is")
            .append(" a valid local path.\n").append(hdfsPath).append(isHdfsFile ? " is not" : " is")
            .append(" a valid hdfs path.").toString();
}

From source file:hydrograph.engine.cascading.scheme.avro.CustomAvroScheme.java

License:Apache License

/**
 * This method peeks at the source data to get a schema when none has been
 * provided./*from w ww  .j  ava 2  s.co  m*/
 *
 * @param flowProcess
 *            The cascading FlowProcess object for this flow.
 * @param tap
 *            The cascading Tap object.
 * @return Schema The schema of the peeked at data, or Schema.NULL if none
 *         exists.
 */
private Schema getSourceSchema(FlowProcess<? extends JobConf> flowProcess, Tap tap) throws IOException {

    if (tap instanceof CompositeTap) {
        tap = (Tap) ((CompositeTap) tap).getChildTaps().next();
    }
    final String path = tap.getIdentifier();
    Path p = new Path(path);
    final FileSystem fs = p.getFileSystem(flowProcess.getConfigCopy());
    // Get all the input dirs
    List<FileStatus> statuses = new LinkedList<FileStatus>(Arrays.asList(fs.globStatus(p, filter)));
    // Now get all the things that are one level down
    for (FileStatus status : new LinkedList<FileStatus>(statuses)) {
        if (status.isDir())
            for (FileStatus child : Arrays.asList(fs.listStatus(status.getPath(), filter))) {
                if (child.isDir()) {
                    statuses.addAll(Arrays.asList(fs.listStatus(child.getPath(), filter)));
                } else if (fs.isFile(child.getPath())) {
                    statuses.add(child);
                }
            }
    }
    for (FileStatus status : statuses) {
        Path statusPath = status.getPath();
        if (fs.isFile(statusPath)) {
            // no need to open them all
            InputStream stream = null;
            DataFileStream reader = null;
            try {
                stream = new BufferedInputStream(fs.open(statusPath));
                reader = new DataFileStream(stream, new GenericDatumReader());
                return reader.getSchema();
            } finally {
                if (reader == null) {
                    if (stream != null) {
                        stream.close();
                    }
                } else {
                    reader.close();
                }
            }

        }
    }
    // couldn't find any Avro files, return null schema
    return Schema.create(Schema.Type.NULL);
}