Example usage for org.apache.hadoop.fs FileSystem exists

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem exists.

Prototype

public boolean exists(Path f) throws IOException

Source Link

Document

Check if a path exists.

Usage

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.partitioned.DynamicPartitioningOutputCommitter.java

License:Apache License

/**
 * Merge two paths together.  Anything in from will be moved into to, if there
 * are any name conflicts while merging the files or directories in from win.
 * @param fs the File System to use/*from w  w w .j a  v  a 2  s .  c  o  m*/
 * @param from the path data is coming from.
 * @param to the path data is going to.
 * @throws IOException on any error
 */
private void mergePaths(FileSystem fs, final FileStatus from, final Path to) throws IOException {
    if (from.isFile()) {
        if (fs.exists(to)) {
            if (!fs.delete(to, true)) {
                throw new IOException("Failed to delete " + to);
            }
        }

        if (!fs.rename(from.getPath(), to)) {
            throw new IOException("Failed to rename " + from + " to " + to);
        }
    } else if (from.isDirectory()) {
        if (fs.exists(to)) {
            FileStatus toStat = fs.getFileStatus(to);
            if (!toStat.isDirectory()) {
                if (!fs.delete(to, true)) {
                    throw new IOException("Failed to delete " + to);
                }
                if (!fs.rename(from.getPath(), to)) {
                    throw new IOException("Failed to rename " + from + " to " + to);
                }
            } else {
                //It is a directory so merge everything in the directories
                for (FileStatus subFrom : fs.listStatus(from.getPath())) {
                    Path subTo = new Path(to, subFrom.getPath().getName());
                    mergePaths(fs, subFrom, subTo);
                }
            }
        } else {
            //it does not exist just rename
            if (!fs.rename(from.getPath(), to)) {
                throw new IOException("Failed to rename " + from + " to " + to);
            }
        }
    }
}

From source file:co.cask.hydrator.action.ftp.FTPCopyAction.java

License:Apache License

@Override
public void run(ActionContext context) throws Exception {
    Path destination = new Path(config.getDestDirectory());
    FileSystem fileSystem = FileSystem.get(new Configuration());
    destination = fileSystem.makeQualified(destination);
    if (!fileSystem.exists(destination)) {
        fileSystem.mkdirs(destination);//from ww w.j  a va2 s.c o  m
    }

    FTPClient ftp;
    if ("ftp".equals(config.getProtocol().toLowerCase())) {
        ftp = new FTPClient();
    } else {
        ftp = new FTPSClient();
    }
    ftp.setControlKeepAliveTimeout(5);
    // UNIX type server
    FTPClientConfig ftpConfig = new FTPClientConfig();
    // Set additional parameters required for the ftp
    // for example config.setServerTimeZoneId("Pacific/Pitcairn")
    ftp.configure(ftpConfig);
    try {
        ftp.connect(config.getHost(), config.getPort());
        ftp.enterLocalPassiveMode();
        String replyString = ftp.getReplyString();
        LOG.info("Connected to server {} and port {} with reply from connect as {}.", config.getHost(),
                config.getPort(), replyString);

        // Check the reply code for actual success
        int replyCode = ftp.getReplyCode();

        if (!FTPReply.isPositiveCompletion(replyCode)) {
            ftp.disconnect();
            throw new RuntimeException(String.format("FTP server refused connection with code %s and reply %s.",
                    replyCode, replyString));
        }

        if (!ftp.login(config.getUserName(), config.getPassword())) {
            LOG.error("login command reply code {}, {}", ftp.getReplyCode(), ftp.getReplyString());
            ftp.logout();
            throw new RuntimeException(String.format(
                    "Login to the FTP server %s and port %s failed. " + "Please check user name and password.",
                    config.getHost(), config.getPort()));
        }

        FTPFile[] ftpFiles = ftp.listFiles(config.getSrcDirectory());
        LOG.info("listFiles command reply code: {}, {}.", ftp.getReplyCode(), ftp.getReplyString());
        // Check the reply code for listFiles call.
        // If its "522 Data connections must be encrypted" then it means data channel also need to be encrypted
        if (ftp.getReplyCode() == 522 && "sftp".equalsIgnoreCase(config.getProtocol())) {
            // encrypt data channel and listFiles again
            ((FTPSClient) ftp).execPROT("P");
            LOG.info("Attempting command listFiles on encrypted data channel.");
            ftpFiles = ftp.listFiles(config.getSrcDirectory());
        }
        for (FTPFile file : ftpFiles) {
            String source = config.getSrcDirectory() + "/" + file.getName();

            LOG.info("Current file {}, source {}", file.getName(), source);
            if (config.getExtractZipFiles() && file.getName().endsWith(".zip")) {
                copyZip(ftp, source, fileSystem, destination);
            } else {
                Path destinationPath = fileSystem.makeQualified(new Path(destination, file.getName()));
                LOG.debug("Downloading {} to {}", file.getName(), destinationPath.toString());
                try (OutputStream output = fileSystem.create(destinationPath)) {
                    InputStream is = ftp.retrieveFileStream(source);
                    ByteStreams.copy(is, output);
                }
            }
            if (!ftp.completePendingCommand()) {
                LOG.error("Error completing command.");
            }
        }
        ftp.logout();
    } finally {
        if (ftp.isConnected()) {
            try {
                ftp.disconnect();
            } catch (Throwable e) {
                LOG.error("Failure to disconnect the ftp connection.", e);
            }
        }
    }
}

From source file:co.cask.hydrator.plugin.hive.action.HiveExport.java

License:Apache License

@Override
public void configurePipeline(PipelineConfigurer pipelineConfigurer) throws IllegalArgumentException {
    //validate hive command. For export we only accept Select statements
    SqlParser parser = SqlParser.create(config.statement);
    try {//  w ww .j a  v  a 2 s . com
        SqlNode sqlNode = parser.parseQuery();
        if (!(sqlNode instanceof SqlSelect)) {
            throw new IllegalArgumentException(
                    "Hive Export only uses Select statements. Please provide valid hive "
                            + "select statement.");
        }
    } catch (SqlParseException e) {
        throw new IllegalArgumentException(
                "Error while parsing select statement. Please provide a valid hive select " + "statement.");
    }

    // validate if the directory already exists
    if (config.overwrite.equalsIgnoreCase("no")) {
        Configuration configuration = new Configuration();
        try {
            FileSystem fs = FileSystem.get(configuration);
            if (fs.exists(new Path(config.path))) {
                throw new IllegalArgumentException(
                        String.format("The path %s already exists. Please either delete that "
                                + "path or provide another path.", config.path));
            }
        } catch (IOException e) {
            throw new RuntimeException("Exception occurred while doing directory check", e);
        }

    }
}

From source file:co.nubetech.hiho.common.HihoTestCase.java

License:Apache License

public void createTextFileInHDFS(String inputData, String filePath, String nameOfFile) throws IOException {
    FileSystem fs = getFileSystem();
    FSDataOutputStream out = null;//from   w w  w  . j av  a  2  s .  c  o m
    Path inputFile = new Path(filePath + "/" + nameOfFile);
    try {
        out = fs.create(inputFile, false);
        out.write(inputData.getBytes(), 0, inputData.getBytes().length);
        out.close();
        out = null;
        // Cheking input file exists or not.
        Path inputPath = new Path(fs.getHomeDirectory(), filePath + "/" + nameOfFile);
        assertTrue(fs.exists(inputPath));
    } finally {
        if (out != null) {
            out.close();
        }
    }
}

From source file:co.nubetech.hiho.dedup.TestDedupJob.java

License:Apache License

@Test
public void testDedupByValueWithDelimitedTextInputFormat() throws Exception {
    final String inputData1 = "Xavier Wilson,Mason Holloway,Carlos Johnston,Martin Noel,Drake Mckinney\n"
            + "Drake Mckinney,Murphy Baird,Theodore Lindsey,Nehru Wilcox,Harper Klein\n"
            + "Kennedy Bailey,Jerome Perry,David Cabrera,Edan Fleming,Orlando Tyson";
    final String inputData2 = "Zephania Bauer,Jermaine Gordon,Vincent Moon,Steven Pierce,Jasper Campos\n"
            + "Drake Mckinney,Murphy Baird,Theodore Lindsey,Nehru Wilcox,Harper Klein\n"
            + "Kennedy Bailey,Plato Atkinson,Stuart Guy,Rooney Levy,Judah Benson";
    createTextFileInHDFS(inputData1, "/input1", "testFile1.txt");
    createTextFileInHDFS(inputData2, "/input2", "testFile2.txt");
    String[] args = new String[] { "-inputFormat", "co.nubetech.hiho.dedup.DelimitedTextInputFormat",
            "-inputKeyClassName", "org.apache.hadoop.io.Text", "-inputValueClassName",
            "org.apache.hadoop.io.Text", "-inputPath", "/input1,/input2", "-outputPath", "output", "-delimeter",
            ",", "-column", "1", "-dedupBy", "value" };
    DedupJob job = runDedupJob(args);//from w ww. j  av a  2  s .c o m
    assertEquals(6, job.getTotalRecordsRead());
    assertEquals(0, job.getBadRecords());
    assertEquals(5, job.getOutput());
    assertEquals(1, job.getDuplicateRecords());

    FileSystem outputFS = getFileSystem();
    Path outputPath = new Path(outputFS.getHomeDirectory(), "output");
    FileStatus[] status = outputFS.listStatus(outputPath, getOutputPathFilter());
    assertTrue(outputFS.exists(outputPath));
    List<String> expectedOutput = new ArrayList<String>();
    expectedOutput.add("Xavier Wilson,Mason Holloway,Carlos Johnston,Martin Noel,Drake Mckinney");
    expectedOutput.add("Zephania Bauer,Jermaine Gordon,Vincent Moon,Steven Pierce,Jasper Campos");
    expectedOutput.add("Drake Mckinney,Murphy Baird,Theodore Lindsey,Nehru Wilcox,Harper Klein");
    expectedOutput.add("Kennedy Bailey,Jerome Perry,David Cabrera,Edan Fleming,Orlando Tyson");
    expectedOutput.add("Kennedy Bailey,Plato Atkinson,Stuart Guy,Rooney Levy,Judah Benson");
    int count = 0;
    for (FileStatus fileStat : status) {
        logger.debug("File status is " + fileStat.getPath() + " and is it a dir? " + fileStat.isDirectory());
        FSDataInputStream in = outputFS.open(fileStat.getPath());
        String line = null;
        while ((line = in.readLine()) != null) {
            logger.debug("Output is " + line);
            assertTrue("Matched output " + line, expectedOutput.contains(line));
            expectedOutput.remove(line);
            count++;
        }
        in.close();
    }
    assertEquals(5, count);
}

From source file:co.nubetech.hiho.dedup.TestDedupJob.java

License:Apache License

@Test
public void testDedupByValueWithTextInputFormat() throws Exception {
    final String inputData1 = "Xavier Wilson,Mason Holloway,Carlos Johnston,Martin Noel,Drake Mckinney\n"
            + "Drake Mckinney,Murphy Baird,Theodore Lindsey,Nehru Wilcox,Harper Klein\n"
            + "Kennedy Bailey,Jerome Perry,David Cabrera,Edan Fleming,Orlando Tyson";
    final String inputData2 = "Zephania Bauer,Jermaine Gordon,Vincent Moon,Steven Pierce,Jasper Campos\n"
            + "Drake Mckinney,Murphy Baird,Theodore Lindsey,Nehru Wilcox,Harper Klein\n"
            + "Kennedy Bailey,Plato Atkinson,Stuart Guy,Rooney Levy,Judah Benson";
    createTextFileInHDFS(inputData1, "/input1", "testFile1.txt");
    createTextFileInHDFS(inputData2, "/input2", "testFile2.txt");
    String[] args = new String[] { "-inputFormat", "org.apache.hadoop.mapreduce.lib.input.TextInputFormat",
            "-inputPath", "/input1,/input2", "-outputPath", "output", "-outputFormat",
            "co.nubetech.hiho.mapreduce.lib.output.NoKeyOnlyValueOutputFormat", "-dedupBy", "value" };
    DedupJob job = runDedupJob(args);//from w w  w  .  j a v  a 2  s . co  m
    assertEquals(6, job.getTotalRecordsRead());
    assertEquals(0, job.getBadRecords());
    assertEquals(5, job.getOutput());
    assertEquals(1, job.getDuplicateRecords());

    FileSystem outputFS = getFileSystem();
    Path outputPath = new Path(outputFS.getHomeDirectory(), "output");
    FileStatus[] status = outputFS.listStatus(outputPath, getOutputPathFilter());
    assertTrue(outputFS.exists(outputPath));
    List<String> expectedOutput = new ArrayList<String>();
    expectedOutput.add("Xavier Wilson,Mason Holloway,Carlos Johnston,Martin Noel,Drake Mckinney");
    expectedOutput.add("Zephania Bauer,Jermaine Gordon,Vincent Moon,Steven Pierce,Jasper Campos");
    expectedOutput.add("Drake Mckinney,Murphy Baird,Theodore Lindsey,Nehru Wilcox,Harper Klein");
    expectedOutput.add("Kennedy Bailey,Jerome Perry,David Cabrera,Edan Fleming,Orlando Tyson");
    expectedOutput.add("Kennedy Bailey,Plato Atkinson,Stuart Guy,Rooney Levy,Judah Benson");
    int count = 0;
    for (FileStatus fileStat : status) {
        logger.debug("File status is " + fileStat.getPath() + " and is it a dir? " + fileStat.isDirectory());
        FSDataInputStream in = outputFS.open(fileStat.getPath());
        String line = null;
        while ((line = in.readLine()) != null) {
            logger.debug("Output is " + line);
            assertTrue("Matched output " + line, expectedOutput.contains(line));
            expectedOutput.remove(line);
            count++;
        }
        in.close();
    }
    assertEquals(5, count);
}

From source file:co.nubetech.hiho.dedup.TestDedupJob.java

License:Apache License

@Test
public void testDedupByValueWithSequenceFileAsTextInputFormat() throws Exception {
    HashMap<Text, Text> inputData1 = new HashMap<Text, Text>();
    inputData1.put(new Text("1"),
            new Text("Xavier Wilson,Mason Holloway,Carlos Johnston,Martin Noel,Drake Mckinney"));
    inputData1.put(new Text("2"),
            new Text("Kennedy Bailey,Jerome Perry,David Cabrera,Edan Fleming,Orlando Tyson"));
    inputData1.put(new Text("3"),
            new Text("Drake Mckinney,Murphy Baird,Theodore Lindsey,Nehru Wilcox,Harper Klein"));
    createSequenceFileInHdfs(inputData1, "/input1", "testFile1.seq");

    HashMap<Text, Text> inputData2 = new HashMap<Text, Text>();
    inputData2.put(new Text("1"),
            new Text("Zephania Bauer,Jermaine Gordon,Vincent Moon,Steven Pierce,Jasper Campos"));
    inputData2.put(new Text("2"),
            new Text("Kennedy Bailey,Plato Atkinson,Stuart Guy,Rooney Levy,Judah Benson"));
    inputData2.put(new Text("4"),
            new Text("Drake Mckinney,Murphy Baird,Theodore Lindsey,Nehru Wilcox,Harper Klein"));
    createSequenceFileInHdfs(inputData2, "/input2", "testFile2.seq");

    String[] args = new String[] { "-inputFormat",
            "org.apache.hadoop.mapreduce.lib.input.SequenceFileAsTextInputFormat", "-outputFormat",
            "co.nubetech.hiho.mapreduce.lib.output.NoKeyOnlyValueOutputFormat", "-inputPath", "/input1,/input2",
            "-outputPath", "output", "-inputKeyClassName", "org.apache.hadoop.io.Text", "-inputValueClassName",
            "org.apache.hadoop.io.Text", "-dedupBy", "value" };
    DedupJob job = runDedupJob(args);/*  w w w  .  j  av a  2  s .co m*/
    assertEquals(6, job.getTotalRecordsRead());
    assertEquals(0, job.getBadRecords());
    assertEquals(5, job.getOutput());
    assertEquals(1, job.getDuplicateRecords());

    FileSystem outputFS = getFileSystem();
    Path outputPath = new Path(outputFS.getHomeDirectory(), "output");
    FileStatus[] status = outputFS.listStatus(outputPath, getOutputPathFilter());
    assertTrue(outputFS.exists(outputPath));
    List<String> expectedOutput = new ArrayList<String>();
    expectedOutput.add("Xavier Wilson,Mason Holloway,Carlos Johnston,Martin Noel,Drake Mckinney");
    expectedOutput.add("Kennedy Bailey,Jerome Perry,David Cabrera,Edan Fleming,Orlando Tyson");
    expectedOutput.add("Drake Mckinney,Murphy Baird,Theodore Lindsey,Nehru Wilcox,Harper Klein");
    expectedOutput.add("Zephania Bauer,Jermaine Gordon,Vincent Moon,Steven Pierce,Jasper Campos");
    expectedOutput.add("Kennedy Bailey,Plato Atkinson,Stuart Guy,Rooney Levy,Judah Benson");
    int count = 0;
    for (FileStatus fileStat : status) {
        logger.debug("File status is " + fileStat.getPath() + " and is it a dir? " + fileStat.isDirectory());
        FSDataInputStream in = outputFS.open(fileStat.getPath());
        String line = null;
        while ((line = in.readLine()) != null) {
            logger.debug("Output is " + line);
            assertTrue("Matched output " + line, expectedOutput.contains(line));
            expectedOutput.remove(line);
            count++;
        }
        in.close();
    }
    assertEquals(5, count);
}

From source file:co.nubetech.hiho.job.TestDBQueryInputJobWithCluster.java

License:Apache License

@Test
public void testBasicTableImport() throws Exception {
    DBQueryInputJob job = new DBQueryInputJob();

    String[] args = new String[] { "-jdbcDriver", "org.hsqldb.jdbcDriver", "-jdbcUrl",
            "jdbc:hsqldb:hsql://localhost/URLAccess",
            //   "-jdbcUsername", "",
            //   "-jdbcPassword", "",
            "-outputPath", "testBasicTableImport", "-outputStrategy", "delimited", "-delimiter", "DELIM",
            "-numberOfMappers", "2", "-inputTableName", "Pageview", "-inputOrderBy", "pageview" };
    int res = ToolRunner.run(createJobConf(), job, args);
    assertEquals(0, res);//from  ww  w  . j  a va2  s .  co  m
    //lets verify the result now
    FileSystem outputFS = getFileSystem();
    //Path outputPath = getOutputDir();

    Path outputPath = new Path(outputFS.getHomeDirectory(), "testBasicTableImport");
    FileStatus[] status = outputFS.listStatus(outputPath, getOutputPathFilter());
    assertTrue(outputFS.exists(outputPath));
    List<String> expectedOutput = new ArrayList<String>();
    expectedOutput.add("/aDELIM1000");
    expectedOutput.add("/bDELIM2000");
    expectedOutput.add("/cDELIM3000");
    expectedOutput.add("/dDELIM4000");
    expectedOutput.add("/eDELIM5000");
    expectedOutput.add("/fDELIM6000");
    expectedOutput.add("/gDELIM7000");
    expectedOutput.add("/hDELIM8000");
    expectedOutput.add("/iDELIM9000");
    expectedOutput.add("/jDELIM10000");
    int count = 0;
    for (FileStatus fileStat : status) {
        logger.debug("File status is " + fileStat.getPath() + " and is it a dir? " + fileStat.isDirectory());
        FSDataInputStream in = outputFS.open(fileStat.getPath());
        String line = null;
        while ((line = in.readLine()) != null) {
            logger.debug("Output is " + line);
            assertTrue("Matched output " + line, expectedOutput.contains(line));
            expectedOutput.remove(line);
            count++;
        }
        in.close();
    }
    assertEquals(10, count);
}

From source file:co.nubetech.hiho.job.TestDBQueryInputJobWithCluster.java

License:Apache License

@Test
public void testBasicAvroTableImport() throws Exception {
    DBQueryInputJob job = new DBQueryInputJob();

    String[] args = new String[] { "-jdbcDriver", "org.hsqldb.jdbcDriver", "-jdbcUrl",
            "jdbc:hsqldb:hsql://localhost/URLAccess", "-outputPath", "testQueryBasedImport", "-inputQuery",
            "select url,pageview,commentCount from Pageview, PageComment where Pageview.url = PageComment.url",
            "-inputBoundingQuery", "select min(commentCount), max(commentCount) from PageComment",
            "-outputStrategy", "AVRO", "-delimiter", "DELIM", "-numberOfMappers", "2", "-inputOrderBy",
            "Pageview.pageview" };
    int res = ToolRunner.run(createJobConf(), job, args);
    assertEquals(0, res);//from  w  w  w  . j  a  v  a 2  s  . c om
    //lets verify the result now
    FileSystem outputFS = getFileSystem();
    //Path outputPath = getOutputDir();

    Path outputPath = new Path(outputFS.getHomeDirectory(), "testBasicTableImport");
    FileStatus[] status = outputFS.listStatus(outputPath, getOutputPathFilter());
    assertTrue(outputFS.exists(outputPath));
    /*   List<String> expectedOutput = new ArrayList<String>();
       expectedOutput.add("/aDELIM1000");
       expectedOutput.add("/bDELIM2000");
       expectedOutput.add("/cDELIM3000");
       expectedOutput.add("/dDELIM4000");
       expectedOutput.add("/eDELIM5000");
       expectedOutput.add("/fDELIM6000");
       expectedOutput.add("/gDELIM7000");
       expectedOutput.add("/hDELIM8000");
       expectedOutput.add("/iDELIM9000");
       expectedOutput.add("/jDELIM10000");
       int count = 0;
       for (FileStatus fileStat: status) {
          logger.debug("File status is " + fileStat.getPath() + " and is it a dir? " + fileStat.isDirectory());
          FSDataInputStream in = outputFS.open(fileStat.getPath());
          String line = null;         
          while ((line = in.readLine()) != null) {
    logger.debug("Output is " + line);
    assertTrue("Matched output " + line , expectedOutput.contains(line));
    expectedOutput.remove(line);
    count++;
          }
          in.close();
       }
       assertEquals(10, count);   */
}

From source file:co.nubetech.hiho.job.TestDBQueryInputJobWithCluster.java

License:Apache License

@Test
public void testQueryBasedImport() throws Exception {
    DBQueryInputJob job = new DBQueryInputJob();

    String[] args = new String[] { "-jdbcDriver", "org.hsqldb.jdbcDriver", "-jdbcUrl",
            "jdbc:hsqldb:hsql://localhost/URLAccess", "-outputPath", "testQueryBasedImport", "-inputQuery",
            "select url,pageview,commentCount from Pageview, PageComment where Pageview.url = PageComment.url",
            "-inputBoundingQuery", "select min(commentCount), max(commentCount) from PageComment",
            "-outputStrategy", "delimited", "-delimiter", "DELIM", "-numberOfMappers", "2", "-inputOrderBy",
            "Pageview.pageview" };
    int res = ToolRunner.run(createJobConf(), job, args);
    assertEquals(0, res);/* w ww .ja va2s .  c  o m*/
    //lets verify the result now
    FileSystem outputFS = getFileSystem();
    Path outputPath = new Path(outputFS.getHomeDirectory(), "testQueryBasedImport");
    FileStatus[] status = outputFS.listStatus(outputPath, getOutputPathFilter());
    assertTrue(outputFS.exists(outputPath));
    List<String> expectedOutput = new ArrayList<String>();
    expectedOutput.add("/aDELIM1000DELIM10");
    expectedOutput.add("/bDELIM2000DELIM10");
    expectedOutput.add("/cDELIM3000DELIM10");
    expectedOutput.add("/dDELIM4000DELIM10");
    expectedOutput.add("/eDELIM5000DELIM10");
    expectedOutput.add("/fDELIM6000DELIM10");
    expectedOutput.add("/gDELIM7000DELIM10");
    expectedOutput.add("/hDELIM8000DELIM10");
    expectedOutput.add("/iDELIM9000DELIM10");
    expectedOutput.add("/jDELIM10000DELIM10");
    int count = 0;
    for (FileStatus fileStat : status) {
        logger.debug("File status is " + fileStat.getPath() + " and is it a dir? " + fileStat.isDirectory());
        FSDataInputStream in = outputFS.open(fileStat.getPath());
        String line = null;
        while ((line = in.readLine()) != null) {
            logger.debug("Output is " + line);
            assertTrue("Matched output " + line, expectedOutput.contains(line));
            expectedOutput.remove(line);
            count++;
        }
        in.close();
    }
    assertEquals(10, count);
}