Example usage for org.apache.hadoop.fs FileSystem isFile

List of usage examples for org.apache.hadoop.fs FileSystem isFile

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem isFile.

Prototype

@Deprecated
public boolean isFile(Path f) throws IOException 

Source Link

Document

True iff the named path is a regular file.

Usage

From source file:com.ibm.bi.dml.runtime.io.WriterTextCSV.java

License:Open Source License

/**
 * Method to merge multiple CSV part files on HDFS into a single CSV file on HDFS. 
 * The part files are created by CSV_WRITE MR job. 
 * /*from  w  ww .ja  v a  2  s  .com*/
 * This method is invoked from CP-write instruction.
 * 
 * @param srcFileName
 * @param destFileName
 * @param csvprop
 * @param rlen
 * @param clen
 * @throws IOException
 */
public void mergeCSVPartFiles(String srcFileName, String destFileName, CSVFileFormatProperties csvprop,
        long rlen, long clen) throws IOException {
    Configuration conf = new Configuration(ConfigurationManager.getCachedJobConf());

    Path srcFilePath = new Path(srcFileName);
    Path mergedFilePath = new Path(destFileName);
    FileSystem hdfs = FileSystem.get(conf);

    if (hdfs.exists(mergedFilePath)) {
        hdfs.delete(mergedFilePath, true);
    }
    OutputStream out = hdfs.create(mergedFilePath, true);

    // write out the header, if needed
    if (csvprop.hasHeader()) {
        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < clen; i++) {
            sb.append("C" + (i + 1));
            if (i < clen - 1)
                sb.append(csvprop.getDelim());
        }
        sb.append('\n');
        out.write(sb.toString().getBytes());
        sb.setLength(0);
    }

    // if the source is a directory
    if (hdfs.isDirectory(srcFilePath)) {
        try {
            FileStatus[] contents = hdfs.listStatus(srcFilePath);
            Path[] partPaths = new Path[contents.length];
            int numPartFiles = 0;
            for (int i = 0; i < contents.length; i++) {
                if (!contents[i].isDirectory()) {
                    partPaths[i] = contents[i].getPath();
                    numPartFiles++;
                }
            }
            Arrays.sort(partPaths);

            for (int i = 0; i < numPartFiles; i++) {
                InputStream in = hdfs.open(partPaths[i]);
                try {
                    IOUtils.copyBytes(in, out, conf, false);
                    if (i < numPartFiles - 1)
                        out.write('\n');
                } finally {
                    IOUtilFunctions.closeSilently(in);
                }
            }
        } finally {
            IOUtilFunctions.closeSilently(out);
        }
    } else if (hdfs.isFile(srcFilePath)) {
        InputStream in = null;
        try {
            in = hdfs.open(srcFilePath);
            IOUtils.copyBytes(in, out, conf, true);
        } finally {
            IOUtilFunctions.closeSilently(in);
            IOUtilFunctions.closeSilently(out);
        }
    } else {
        throw new IOException(srcFilePath.toString() + ": No such file or directory");
    }
}

From source file:com.ibm.bi.dml.runtime.io.WriterTextCSV.java

License:Open Source License

/**
 * //  w  w  w.j a  va2 s  .  c  om
 * @param srcFileName
 * @param destFileName
 * @param csvprop
 * @param rlen
 * @param clen
 * @throws IOException
 */
@SuppressWarnings("unchecked")
public void addHeaderToCSV(String srcFileName, String destFileName, long rlen, long clen) throws IOException {
    Configuration conf = new Configuration(ConfigurationManager.getCachedJobConf());

    Path srcFilePath = new Path(srcFileName);
    Path destFilePath = new Path(destFileName);
    FileSystem hdfs = FileSystem.get(conf);

    if (!_props.hasHeader()) {
        // simply move srcFile to destFile

        /*
         * TODO: Remove this roundabout way! 
         * For example: destFilePath = /user/biadmin/csv/temp/out/file.csv 
         *              & the only path that exists already on HDFS is /user/biadmin/csv/.
         * In this case: the directory structure /user/biadmin/csv/temp/out must be created. 
         * Simple hdfs.rename() does not seem to create this directory structure.
         */

        // delete the destination file, if exists already
        //boolean ret1 = 
        hdfs.delete(destFilePath, true);

        // Create /user/biadmin/csv/temp/out/file.csv so that ..../temp/out/ is created.
        //boolean ret2 = 
        hdfs.createNewFile(destFilePath);

        // delete the file "file.csv" but preserve the directory structure /user/biadmin/csv/temp/out/
        //boolean ret3 = 
        hdfs.delete(destFilePath, true);

        // finally, move the data to destFilePath = /user/biadmin/csv/temp/out/file.csv
        //boolean ret4 = 
        hdfs.rename(srcFilePath, destFilePath);

        //System.out.println("Return values = del:" + ret1 + ", createNew:" + ret2 + ", del:" + ret3 + ", rename:" + ret4);
        return;
    }

    // construct the header line
    StringBuilder sb = new StringBuilder();
    for (int i = 0; i < clen; i++) {
        sb.append("C" + (i + 1));
        if (i < clen - 1)
            sb.append(_props.getDelim());
    }
    sb.append('\n');

    if (hdfs.isDirectory(srcFilePath)) {

        // compute sorted order among part files
        ArrayList<Path> files = new ArrayList<Path>();
        for (FileStatus stat : hdfs.listStatus(srcFilePath, CSVReblockMR.hiddenFileFilter))
            files.add(stat.getPath());
        Collections.sort(files);

        // first part file path
        Path firstpart = files.get(0);

        // create a temp file, and add header and contents of first part
        Path tmp = new Path(firstpart.toString() + ".tmp");
        OutputStream out = hdfs.create(tmp, true);
        out.write(sb.toString().getBytes());
        sb.setLength(0);

        // copy rest of the data from firstpart
        InputStream in = null;
        try {
            in = hdfs.open(firstpart);
            IOUtils.copyBytes(in, out, conf, true);
        } finally {
            IOUtilFunctions.closeSilently(in);
            IOUtilFunctions.closeSilently(out);
        }

        // rename tmp to firstpart
        hdfs.delete(firstpart, true);
        hdfs.rename(tmp, firstpart);

        // rename srcfile to destFile
        hdfs.delete(destFilePath, true);
        hdfs.createNewFile(destFilePath); // force the creation of directory structure
        hdfs.delete(destFilePath, true); // delete the file, but preserve the directory structure
        hdfs.rename(srcFilePath, destFilePath); // move the data 

    } else if (hdfs.isFile(srcFilePath)) {
        // create destination file
        OutputStream out = hdfs.create(destFilePath, true);

        // write header
        out.write(sb.toString().getBytes());
        sb.setLength(0);

        // copy the data from srcFile
        InputStream in = null;
        try {
            in = hdfs.open(srcFilePath);
            IOUtils.copyBytes(in, out, conf, true);
        } finally {
            IOUtilFunctions.closeSilently(in);
            IOUtilFunctions.closeSilently(out);
        }
    } else {
        throw new IOException(srcFilePath.toString() + ": No such file or directory");
    }
}

From source file:com.ibm.jaql.io.hadoop.FileOutputConfigurator.java

License:Apache License

public void setSequential(JobConf conf) throws Exception {
    registerSerializers(conf);//from w w w.j  a va2s  .c  o  m

    // For an expression, the location is the final file name
    Path outPath = new Path(location);
    FileSystem fs = outPath.getFileSystem(conf);
    outPath = outPath.makeQualified(fs);
    if (fs.exists(outPath)) {
        // TODO: Jaql currently has overwrite semantics; add flag to control this
        if (fs.isFile(outPath)) {
            fs.delete(outPath, false);
        } else {
            // Look for a map-reduce output directory
            FileStatus[] nonMR = fs.listStatus(outPath, new PathFilter() {
                boolean onlyOne = true;

                public boolean accept(Path path) {
                    String name = path.getName();
                    if (name.matches("([.][.]?)|([.]part-[0-9]+.crc)|(part-[0-9]+)")) {
                        return false;
                    }
                    if (onlyOne) {
                        onlyOne = false;
                        return true;
                    }
                    return false;
                }
            });
            if (nonMR.length > 0) {
                throw new IOException(
                        "directory exists and is not a map-reduce output directory: " + nonMR[0].getPath());
            }
            fs.delete(outPath, true);
        }
    }

    // In sequential mode, we will write directly to the output file
    // and bypass the _temporary directory and rename of the standard 
    // FileOutputCommitter by using our own DirectFileOutputCommitter.
    FileOutputFormat.setOutputPath(conf, outPath.getParent());
    conf.setClass("mapred.output.committer.class", DirectFileOutputCommiter.class, OutputCommitter.class);
}

From source file:com.ibm.jaql.lang.expr.system.RUtil.java

License:Apache License

/**
 * Function that puts a local file into HDFS.
 * @param localPath/*  w  w w .jav a  2s .c  o  m*/
 * @param hdfsPath
 * @return
 */
public static boolean saveToHDFS(String localPath, String hdfsPath) {
    try {
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.get(conf);
        int bufferSize = 4 * 1024;
        byte[] buffer = new byte[bufferSize];
        InputStream input = new BufferedInputStream(new FileInputStream(localPath), bufferSize);

        Path outputPath = new Path(hdfsPath);
        if (fs.exists(outputPath)) {
            if (!fs.isFile(outputPath)) {
                throw new IOException("Output path is a directory that already exists.");
            }
            LOG.info("Output path" + outputPath + " already exists. Overwriting it.");
        }
        FSDataOutputStream output = fs.create(outputPath, true);

        int numBytesRead;
        while ((numBytesRead = input.read(buffer)) > 0) {
            output.write(buffer, 0, numBytesRead);
        }
        input.close();
        output.close();
        return true;
    } catch (IOException e) {
        LOG.info("Error in writing file to HDFS.", e);
        return false;
    }
}

From source file:com.idvp.platform.hdfs.HDFSDataStream.java

License:Apache License

protected void doOpen(Configuration conf, Path dstPath, FileSystem hdfs) throws IOException {

    boolean appending = false;
    if (conf.getBoolean("hdfs.append.support", false) == true && hdfs.isFile(dstPath)) {
        outStream = hdfs.append(dstPath);
        appending = true;/*from  ww  w.  j  a  va2 s .  c  o m*/
    } else {
        outStream = hdfs.create(dstPath);
    }

    serializer = new BodyTextEventSerializer.Builder().build(outStream);
    if (appending && !serializer.supportsReopen()) {
        outStream.close();
        serializer = null;
        throw new IOException("serializer (" + "TEXT" + ") does not support append");
    }

    // must call superclass to check for replication issues
    registerCurrentStream(outStream, hdfs, dstPath);

    if (appending) {
        serializer.afterReopen();
    } else {
        serializer.afterCreate();
    }
}

From source file:com.inmobi.conduit.distcp.tools.mapred.TestCopyMapper.java

License:Apache License

@Test
public void testRun() {
    try {/*w ww  .j a v  a 2 s  .co  m*/
        deleteState();
        createSourceData();

        FileSystem fs = cluster.getFileSystem();
        CopyMapper copyMapper = new CopyMapper();
        StatusReporter reporter = new StubStatusReporter();
        InMemoryWriter writer = new InMemoryWriter();
        Mapper<Text, FileStatus, NullWritable, Text>.Context context = getMapperContext(copyMapper, reporter,
                writer);
        copyMapper.setup(context);

        for (Path path : pathList) {
            copyMapper.map(new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH), path)),
                    fs.getFileStatus(path), context);
        }
        // Check that the maps worked.
        for (Path path : pathList) {
            final Path targetPath = new Path(path.toString().replaceAll(SOURCE_PATH, TARGET_PATH));
            Assert.assertTrue(fs.exists(targetPath));
            Assert.assertTrue(fs.isFile(targetPath) == fs.isFile(path));
            Assert.assertEquals(fs.getFileStatus(path).getReplication(),
                    fs.getFileStatus(targetPath).getReplication());
            Assert.assertEquals(fs.getFileStatus(path).getBlockSize(),
                    fs.getFileStatus(targetPath).getBlockSize());
            Assert.assertTrue(
                    !fs.isFile(targetPath) || fs.getFileChecksum(targetPath).equals(fs.getFileChecksum(path)));
        }

        Assert.assertEquals(pathList.size(), reporter.getCounter(CopyMapper.Counter.PATHS_COPIED).getValue());
        // Here file is compressed file. So, we should compare the file length
        // with the number of bytes read
        long totalSize = 0;
        for (Path path : pathList) {
            totalSize += fs.getFileStatus(path).getLen();
        }
        Assert.assertEquals(totalSize, reporter.getCounter(CopyMapper.Counter.BYTES_COPIED).getValue());
        long totalCounterValue = 0;
        for (Text value : writer.values()) {
            String tmp[] = value.toString().split(ConduitConstants.AUDIT_COUNTER_NAME_DELIMITER);
            Assert.assertEquals(4, tmp.length);
            Long numOfMsgs = Long.parseLong(tmp[3]);
            totalCounterValue += numOfMsgs;
        }
        Assert.assertEquals(nFiles * NUMBER_OF_MESSAGES_PER_FILE, totalCounterValue);
        testCopyingExistingFiles(fs, copyMapper, context);
    } catch (Exception e) {
        LOG.error("Unexpected exception: ", e);
        Assert.assertTrue(false);
    }
}

From source file:com.inmobi.conduit.distcp.tools.mapred.TestCopyMapper.java

License:Apache License

private static void changeUserGroup(String user, String group) throws IOException {
    FileSystem fs = cluster.getFileSystem();
    FsPermission changedPermission = new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL);
    for (Path path : pathList)
        if (fs.isFile(path)) {
            fs.setOwner(path, user, group);
            fs.setPermission(path, changedPermission);
        }/*from w ww.  j  av a  2  s .  c  o  m*/
}

From source file:com.inmobi.conduit.distcp.tools.SimpleCopyListing.java

License:Apache License

@Override
protected void validatePaths(DistCpOptions options) throws IOException, InvalidInputException {

    if (options.isSkipPathValidation()) {
        LOG.debug("Skipping Path Validation in disctp");
        return;//from  w w w . ja  v a  2 s  . com
    }

    Path targetPath = options.getTargetPath();
    FileSystem targetFS = targetPath.getFileSystem(getConf());
    boolean targetIsFile = targetFS.isFile(targetPath);

    //If target is a file, then source has to be single file
    if (targetIsFile) {
        if (options.getSourcePaths().size() > 1) {
            throw new InvalidInputException("Multiple source being copied to a file: " + targetPath);
        }

        Path srcPath = options.getSourcePaths().get(0);
        FileSystem sourceFS = srcPath.getFileSystem(getConf());
        if (!sourceFS.isFile(srcPath)) {
            throw new InvalidInputException(
                    "Cannot copy " + srcPath + ", which is not a file to " + targetPath);
        }
    }

    for (Path path : options.getSourcePaths()) {
        FileSystem fs = path.getFileSystem(getConf());
        if (!fs.exists(path)) {
            throw new InvalidInputException(path + " doesn't exist");
        }
    }

    /* This is requires to allow map tasks to access each of the source
       clusters. This would retrieve the delegation token for each unique
       file system and add them to job's private credential store
     */
    Credentials credentials = getCredentials();
    if (credentials != null) {
        Path[] inputPaths = options.getSourcePaths().toArray(new Path[1]);
        TokenCache.obtainTokensForNamenodes(credentials, inputPaths, getConf());
    }
}

From source file:com.inmobi.conduit.distcp.tools.SimpleCopyListing.java

License:Apache License

private Path computeSourceRootPath(FileStatus sourceStatus, DistCpOptions options) throws IOException {

    Path target = options.getTargetPath();
    FileSystem targetFS = target.getFileSystem(getConf());

    boolean solitaryFile = options.getSourcePaths().size() == 1 && !sourceStatus.isDir();

    if (solitaryFile) {
        if (targetFS.isFile(target) || !targetFS.exists(target)) {
            return sourceStatus.getPath();
        } else {/* w ww  . j  ava  2 s  . c  om*/
            return sourceStatus.getPath().getParent();
        }
    } else {
        boolean specialHandling = (options.getSourcePaths().size() == 1 && !targetFS.exists(target))
                || options.shouldSyncFolder() || options.shouldOverwrite();

        return specialHandling && sourceStatus.isDir() ? sourceStatus.getPath()
                : sourceStatus.getPath().getParent();
    }
}

From source file:com.inmobi.conduit.distcp.tools.TestDistCp.java

License:Apache License

private static void verifyResults() throws Exception {
    for (Path path : pathList) {
        FileSystem fs = cluster.getFileSystem();

        Path sourcePath = path.makeQualified(fs);
        Path targetPath = new Path(sourcePath.toString().replaceAll(SOURCE_PATH, TARGET_PATH));

        Assert.assertTrue(fs.exists(targetPath));
        Assert.assertEquals(fs.isFile(sourcePath), fs.isFile(targetPath));
    }/*from   www  .j a v  a 2  s  .c  o  m*/
}