List of usage examples for org.apache.hadoop.fs FileSystem exists
public boolean exists(Path f) throws IOException
From source file:co.cask.cdap.internal.app.runtime.batch.dataset.partitioned.DynamicPartitioningOutputCommitter.java
License:Apache License
/** * Merge two paths together. Anything in from will be moved into to, if there * are any name conflicts while merging the files or directories in from win. * @param fs the File System to use/*from w w w .j a v a 2 s . c o m*/ * @param from the path data is coming from. * @param to the path data is going to. * @throws IOException on any error */ private void mergePaths(FileSystem fs, final FileStatus from, final Path to) throws IOException { if (from.isFile()) { if (fs.exists(to)) { if (!fs.delete(to, true)) { throw new IOException("Failed to delete " + to); } } if (!fs.rename(from.getPath(), to)) { throw new IOException("Failed to rename " + from + " to " + to); } } else if (from.isDirectory()) { if (fs.exists(to)) { FileStatus toStat = fs.getFileStatus(to); if (!toStat.isDirectory()) { if (!fs.delete(to, true)) { throw new IOException("Failed to delete " + to); } if (!fs.rename(from.getPath(), to)) { throw new IOException("Failed to rename " + from + " to " + to); } } else { //It is a directory so merge everything in the directories for (FileStatus subFrom : fs.listStatus(from.getPath())) { Path subTo = new Path(to, subFrom.getPath().getName()); mergePaths(fs, subFrom, subTo); } } } else { //it does not exist just rename if (!fs.rename(from.getPath(), to)) { throw new IOException("Failed to rename " + from + " to " + to); } } } }
From source file:co.cask.hydrator.action.ftp.FTPCopyAction.java
License:Apache License
@Override public void run(ActionContext context) throws Exception { Path destination = new Path(config.getDestDirectory()); FileSystem fileSystem = FileSystem.get(new Configuration()); destination = fileSystem.makeQualified(destination); if (!fileSystem.exists(destination)) { fileSystem.mkdirs(destination);//from ww w.j a va2 s.c o m } FTPClient ftp; if ("ftp".equals(config.getProtocol().toLowerCase())) { ftp = new FTPClient(); } else { ftp = new FTPSClient(); } ftp.setControlKeepAliveTimeout(5); // UNIX type server FTPClientConfig ftpConfig = new FTPClientConfig(); // Set additional parameters required for the ftp // for example config.setServerTimeZoneId("Pacific/Pitcairn") ftp.configure(ftpConfig); try { ftp.connect(config.getHost(), config.getPort()); ftp.enterLocalPassiveMode(); String replyString = ftp.getReplyString(); LOG.info("Connected to server {} and port {} with reply from connect as {}.", config.getHost(), config.getPort(), replyString); // Check the reply code for actual success int replyCode = ftp.getReplyCode(); if (!FTPReply.isPositiveCompletion(replyCode)) { ftp.disconnect(); throw new RuntimeException(String.format("FTP server refused connection with code %s and reply %s.", replyCode, replyString)); } if (!ftp.login(config.getUserName(), config.getPassword())) { LOG.error("login command reply code {}, {}", ftp.getReplyCode(), ftp.getReplyString()); ftp.logout(); throw new RuntimeException(String.format( "Login to the FTP server %s and port %s failed. " + "Please check user name and password.", config.getHost(), config.getPort())); } FTPFile[] ftpFiles = ftp.listFiles(config.getSrcDirectory()); LOG.info("listFiles command reply code: {}, {}.", ftp.getReplyCode(), ftp.getReplyString()); // Check the reply code for listFiles call. // If its "522 Data connections must be encrypted" then it means data channel also need to be encrypted if (ftp.getReplyCode() == 522 && "sftp".equalsIgnoreCase(config.getProtocol())) { // encrypt data channel and listFiles again ((FTPSClient) ftp).execPROT("P"); LOG.info("Attempting command listFiles on encrypted data channel."); ftpFiles = ftp.listFiles(config.getSrcDirectory()); } for (FTPFile file : ftpFiles) { String source = config.getSrcDirectory() + "/" + file.getName(); LOG.info("Current file {}, source {}", file.getName(), source); if (config.getExtractZipFiles() && file.getName().endsWith(".zip")) { copyZip(ftp, source, fileSystem, destination); } else { Path destinationPath = fileSystem.makeQualified(new Path(destination, file.getName())); LOG.debug("Downloading {} to {}", file.getName(), destinationPath.toString()); try (OutputStream output = fileSystem.create(destinationPath)) { InputStream is = ftp.retrieveFileStream(source); ByteStreams.copy(is, output); } } if (!ftp.completePendingCommand()) { LOG.error("Error completing command."); } } ftp.logout(); } finally { if (ftp.isConnected()) { try { ftp.disconnect(); } catch (Throwable e) { LOG.error("Failure to disconnect the ftp connection.", e); } } } }
From source file:co.cask.hydrator.plugin.hive.action.HiveExport.java
License:Apache License
@Override public void configurePipeline(PipelineConfigurer pipelineConfigurer) throws IllegalArgumentException { //validate hive command. For export we only accept Select statements SqlParser parser = SqlParser.create(config.statement); try {// w ww .j a v a 2 s . com SqlNode sqlNode = parser.parseQuery(); if (!(sqlNode instanceof SqlSelect)) { throw new IllegalArgumentException( "Hive Export only uses Select statements. Please provide valid hive " + "select statement."); } } catch (SqlParseException e) { throw new IllegalArgumentException( "Error while parsing select statement. Please provide a valid hive select " + "statement."); } // validate if the directory already exists if (config.overwrite.equalsIgnoreCase("no")) { Configuration configuration = new Configuration(); try { FileSystem fs = FileSystem.get(configuration); if (fs.exists(new Path(config.path))) { throw new IllegalArgumentException( String.format("The path %s already exists. Please either delete that " + "path or provide another path.", config.path)); } } catch (IOException e) { throw new RuntimeException("Exception occurred while doing directory check", e); } } }
From source file:co.nubetech.hiho.common.HihoTestCase.java
License:Apache License
public void createTextFileInHDFS(String inputData, String filePath, String nameOfFile) throws IOException { FileSystem fs = getFileSystem(); FSDataOutputStream out = null;//from w w w . j av a 2 s . c o m Path inputFile = new Path(filePath + "/" + nameOfFile); try { out = fs.create(inputFile, false); out.write(inputData.getBytes(), 0, inputData.getBytes().length); out.close(); out = null; // Cheking input file exists or not. Path inputPath = new Path(fs.getHomeDirectory(), filePath + "/" + nameOfFile); assertTrue(fs.exists(inputPath)); } finally { if (out != null) { out.close(); } } }
From source file:co.nubetech.hiho.dedup.TestDedupJob.java
License:Apache License
@Test public void testDedupByValueWithDelimitedTextInputFormat() throws Exception { final String inputData1 = "Xavier Wilson,Mason Holloway,Carlos Johnston,Martin Noel,Drake Mckinney\n" + "Drake Mckinney,Murphy Baird,Theodore Lindsey,Nehru Wilcox,Harper Klein\n" + "Kennedy Bailey,Jerome Perry,David Cabrera,Edan Fleming,Orlando Tyson"; final String inputData2 = "Zephania Bauer,Jermaine Gordon,Vincent Moon,Steven Pierce,Jasper Campos\n" + "Drake Mckinney,Murphy Baird,Theodore Lindsey,Nehru Wilcox,Harper Klein\n" + "Kennedy Bailey,Plato Atkinson,Stuart Guy,Rooney Levy,Judah Benson"; createTextFileInHDFS(inputData1, "/input1", "testFile1.txt"); createTextFileInHDFS(inputData2, "/input2", "testFile2.txt"); String[] args = new String[] { "-inputFormat", "co.nubetech.hiho.dedup.DelimitedTextInputFormat", "-inputKeyClassName", "org.apache.hadoop.io.Text", "-inputValueClassName", "org.apache.hadoop.io.Text", "-inputPath", "/input1,/input2", "-outputPath", "output", "-delimeter", ",", "-column", "1", "-dedupBy", "value" }; DedupJob job = runDedupJob(args);//from w ww. j av a 2 s .c o m assertEquals(6, job.getTotalRecordsRead()); assertEquals(0, job.getBadRecords()); assertEquals(5, job.getOutput()); assertEquals(1, job.getDuplicateRecords()); FileSystem outputFS = getFileSystem(); Path outputPath = new Path(outputFS.getHomeDirectory(), "output"); FileStatus[] status = outputFS.listStatus(outputPath, getOutputPathFilter()); assertTrue(outputFS.exists(outputPath)); List<String> expectedOutput = new ArrayList<String>(); expectedOutput.add("Xavier Wilson,Mason Holloway,Carlos Johnston,Martin Noel,Drake Mckinney"); expectedOutput.add("Zephania Bauer,Jermaine Gordon,Vincent Moon,Steven Pierce,Jasper Campos"); expectedOutput.add("Drake Mckinney,Murphy Baird,Theodore Lindsey,Nehru Wilcox,Harper Klein"); expectedOutput.add("Kennedy Bailey,Jerome Perry,David Cabrera,Edan Fleming,Orlando Tyson"); expectedOutput.add("Kennedy Bailey,Plato Atkinson,Stuart Guy,Rooney Levy,Judah Benson"); int count = 0; for (FileStatus fileStat : status) { logger.debug("File status is " + fileStat.getPath() + " and is it a dir? " + fileStat.isDirectory()); FSDataInputStream in = outputFS.open(fileStat.getPath()); String line = null; while ((line = in.readLine()) != null) { logger.debug("Output is " + line); assertTrue("Matched output " + line, expectedOutput.contains(line)); expectedOutput.remove(line); count++; } in.close(); } assertEquals(5, count); }
From source file:co.nubetech.hiho.dedup.TestDedupJob.java
License:Apache License
@Test public void testDedupByValueWithTextInputFormat() throws Exception { final String inputData1 = "Xavier Wilson,Mason Holloway,Carlos Johnston,Martin Noel,Drake Mckinney\n" + "Drake Mckinney,Murphy Baird,Theodore Lindsey,Nehru Wilcox,Harper Klein\n" + "Kennedy Bailey,Jerome Perry,David Cabrera,Edan Fleming,Orlando Tyson"; final String inputData2 = "Zephania Bauer,Jermaine Gordon,Vincent Moon,Steven Pierce,Jasper Campos\n" + "Drake Mckinney,Murphy Baird,Theodore Lindsey,Nehru Wilcox,Harper Klein\n" + "Kennedy Bailey,Plato Atkinson,Stuart Guy,Rooney Levy,Judah Benson"; createTextFileInHDFS(inputData1, "/input1", "testFile1.txt"); createTextFileInHDFS(inputData2, "/input2", "testFile2.txt"); String[] args = new String[] { "-inputFormat", "org.apache.hadoop.mapreduce.lib.input.TextInputFormat", "-inputPath", "/input1,/input2", "-outputPath", "output", "-outputFormat", "co.nubetech.hiho.mapreduce.lib.output.NoKeyOnlyValueOutputFormat", "-dedupBy", "value" }; DedupJob job = runDedupJob(args);//from w w w . j a v a 2 s . co m assertEquals(6, job.getTotalRecordsRead()); assertEquals(0, job.getBadRecords()); assertEquals(5, job.getOutput()); assertEquals(1, job.getDuplicateRecords()); FileSystem outputFS = getFileSystem(); Path outputPath = new Path(outputFS.getHomeDirectory(), "output"); FileStatus[] status = outputFS.listStatus(outputPath, getOutputPathFilter()); assertTrue(outputFS.exists(outputPath)); List<String> expectedOutput = new ArrayList<String>(); expectedOutput.add("Xavier Wilson,Mason Holloway,Carlos Johnston,Martin Noel,Drake Mckinney"); expectedOutput.add("Zephania Bauer,Jermaine Gordon,Vincent Moon,Steven Pierce,Jasper Campos"); expectedOutput.add("Drake Mckinney,Murphy Baird,Theodore Lindsey,Nehru Wilcox,Harper Klein"); expectedOutput.add("Kennedy Bailey,Jerome Perry,David Cabrera,Edan Fleming,Orlando Tyson"); expectedOutput.add("Kennedy Bailey,Plato Atkinson,Stuart Guy,Rooney Levy,Judah Benson"); int count = 0; for (FileStatus fileStat : status) { logger.debug("File status is " + fileStat.getPath() + " and is it a dir? " + fileStat.isDirectory()); FSDataInputStream in = outputFS.open(fileStat.getPath()); String line = null; while ((line = in.readLine()) != null) { logger.debug("Output is " + line); assertTrue("Matched output " + line, expectedOutput.contains(line)); expectedOutput.remove(line); count++; } in.close(); } assertEquals(5, count); }
From source file:co.nubetech.hiho.dedup.TestDedupJob.java
License:Apache License
@Test public void testDedupByValueWithSequenceFileAsTextInputFormat() throws Exception { HashMap<Text, Text> inputData1 = new HashMap<Text, Text>(); inputData1.put(new Text("1"), new Text("Xavier Wilson,Mason Holloway,Carlos Johnston,Martin Noel,Drake Mckinney")); inputData1.put(new Text("2"), new Text("Kennedy Bailey,Jerome Perry,David Cabrera,Edan Fleming,Orlando Tyson")); inputData1.put(new Text("3"), new Text("Drake Mckinney,Murphy Baird,Theodore Lindsey,Nehru Wilcox,Harper Klein")); createSequenceFileInHdfs(inputData1, "/input1", "testFile1.seq"); HashMap<Text, Text> inputData2 = new HashMap<Text, Text>(); inputData2.put(new Text("1"), new Text("Zephania Bauer,Jermaine Gordon,Vincent Moon,Steven Pierce,Jasper Campos")); inputData2.put(new Text("2"), new Text("Kennedy Bailey,Plato Atkinson,Stuart Guy,Rooney Levy,Judah Benson")); inputData2.put(new Text("4"), new Text("Drake Mckinney,Murphy Baird,Theodore Lindsey,Nehru Wilcox,Harper Klein")); createSequenceFileInHdfs(inputData2, "/input2", "testFile2.seq"); String[] args = new String[] { "-inputFormat", "org.apache.hadoop.mapreduce.lib.input.SequenceFileAsTextInputFormat", "-outputFormat", "co.nubetech.hiho.mapreduce.lib.output.NoKeyOnlyValueOutputFormat", "-inputPath", "/input1,/input2", "-outputPath", "output", "-inputKeyClassName", "org.apache.hadoop.io.Text", "-inputValueClassName", "org.apache.hadoop.io.Text", "-dedupBy", "value" }; DedupJob job = runDedupJob(args);/* w w w . j av a 2 s .co m*/ assertEquals(6, job.getTotalRecordsRead()); assertEquals(0, job.getBadRecords()); assertEquals(5, job.getOutput()); assertEquals(1, job.getDuplicateRecords()); FileSystem outputFS = getFileSystem(); Path outputPath = new Path(outputFS.getHomeDirectory(), "output"); FileStatus[] status = outputFS.listStatus(outputPath, getOutputPathFilter()); assertTrue(outputFS.exists(outputPath)); List<String> expectedOutput = new ArrayList<String>(); expectedOutput.add("Xavier Wilson,Mason Holloway,Carlos Johnston,Martin Noel,Drake Mckinney"); expectedOutput.add("Kennedy Bailey,Jerome Perry,David Cabrera,Edan Fleming,Orlando Tyson"); expectedOutput.add("Drake Mckinney,Murphy Baird,Theodore Lindsey,Nehru Wilcox,Harper Klein"); expectedOutput.add("Zephania Bauer,Jermaine Gordon,Vincent Moon,Steven Pierce,Jasper Campos"); expectedOutput.add("Kennedy Bailey,Plato Atkinson,Stuart Guy,Rooney Levy,Judah Benson"); int count = 0; for (FileStatus fileStat : status) { logger.debug("File status is " + fileStat.getPath() + " and is it a dir? " + fileStat.isDirectory()); FSDataInputStream in = outputFS.open(fileStat.getPath()); String line = null; while ((line = in.readLine()) != null) { logger.debug("Output is " + line); assertTrue("Matched output " + line, expectedOutput.contains(line)); expectedOutput.remove(line); count++; } in.close(); } assertEquals(5, count); }
From source file:co.nubetech.hiho.job.TestDBQueryInputJobWithCluster.java
License:Apache License
@Test public void testBasicTableImport() throws Exception { DBQueryInputJob job = new DBQueryInputJob(); String[] args = new String[] { "-jdbcDriver", "org.hsqldb.jdbcDriver", "-jdbcUrl", "jdbc:hsqldb:hsql://localhost/URLAccess", // "-jdbcUsername", "", // "-jdbcPassword", "", "-outputPath", "testBasicTableImport", "-outputStrategy", "delimited", "-delimiter", "DELIM", "-numberOfMappers", "2", "-inputTableName", "Pageview", "-inputOrderBy", "pageview" }; int res = ToolRunner.run(createJobConf(), job, args); assertEquals(0, res);//from ww w . j a va2 s . co m //lets verify the result now FileSystem outputFS = getFileSystem(); //Path outputPath = getOutputDir(); Path outputPath = new Path(outputFS.getHomeDirectory(), "testBasicTableImport"); FileStatus[] status = outputFS.listStatus(outputPath, getOutputPathFilter()); assertTrue(outputFS.exists(outputPath)); List<String> expectedOutput = new ArrayList<String>(); expectedOutput.add("/aDELIM1000"); expectedOutput.add("/bDELIM2000"); expectedOutput.add("/cDELIM3000"); expectedOutput.add("/dDELIM4000"); expectedOutput.add("/eDELIM5000"); expectedOutput.add("/fDELIM6000"); expectedOutput.add("/gDELIM7000"); expectedOutput.add("/hDELIM8000"); expectedOutput.add("/iDELIM9000"); expectedOutput.add("/jDELIM10000"); int count = 0; for (FileStatus fileStat : status) { logger.debug("File status is " + fileStat.getPath() + " and is it a dir? " + fileStat.isDirectory()); FSDataInputStream in = outputFS.open(fileStat.getPath()); String line = null; while ((line = in.readLine()) != null) { logger.debug("Output is " + line); assertTrue("Matched output " + line, expectedOutput.contains(line)); expectedOutput.remove(line); count++; } in.close(); } assertEquals(10, count); }
From source file:co.nubetech.hiho.job.TestDBQueryInputJobWithCluster.java
License:Apache License
@Test public void testBasicAvroTableImport() throws Exception { DBQueryInputJob job = new DBQueryInputJob(); String[] args = new String[] { "-jdbcDriver", "org.hsqldb.jdbcDriver", "-jdbcUrl", "jdbc:hsqldb:hsql://localhost/URLAccess", "-outputPath", "testQueryBasedImport", "-inputQuery", "select url,pageview,commentCount from Pageview, PageComment where Pageview.url = PageComment.url", "-inputBoundingQuery", "select min(commentCount), max(commentCount) from PageComment", "-outputStrategy", "AVRO", "-delimiter", "DELIM", "-numberOfMappers", "2", "-inputOrderBy", "Pageview.pageview" }; int res = ToolRunner.run(createJobConf(), job, args); assertEquals(0, res);//from w w w . j a v a 2 s . c om //lets verify the result now FileSystem outputFS = getFileSystem(); //Path outputPath = getOutputDir(); Path outputPath = new Path(outputFS.getHomeDirectory(), "testBasicTableImport"); FileStatus[] status = outputFS.listStatus(outputPath, getOutputPathFilter()); assertTrue(outputFS.exists(outputPath)); /* List<String> expectedOutput = new ArrayList<String>(); expectedOutput.add("/aDELIM1000"); expectedOutput.add("/bDELIM2000"); expectedOutput.add("/cDELIM3000"); expectedOutput.add("/dDELIM4000"); expectedOutput.add("/eDELIM5000"); expectedOutput.add("/fDELIM6000"); expectedOutput.add("/gDELIM7000"); expectedOutput.add("/hDELIM8000"); expectedOutput.add("/iDELIM9000"); expectedOutput.add("/jDELIM10000"); int count = 0; for (FileStatus fileStat: status) { logger.debug("File status is " + fileStat.getPath() + " and is it a dir? " + fileStat.isDirectory()); FSDataInputStream in = outputFS.open(fileStat.getPath()); String line = null; while ((line = in.readLine()) != null) { logger.debug("Output is " + line); assertTrue("Matched output " + line , expectedOutput.contains(line)); expectedOutput.remove(line); count++; } in.close(); } assertEquals(10, count); */ }
From source file:co.nubetech.hiho.job.TestDBQueryInputJobWithCluster.java
License:Apache License
@Test public void testQueryBasedImport() throws Exception { DBQueryInputJob job = new DBQueryInputJob(); String[] args = new String[] { "-jdbcDriver", "org.hsqldb.jdbcDriver", "-jdbcUrl", "jdbc:hsqldb:hsql://localhost/URLAccess", "-outputPath", "testQueryBasedImport", "-inputQuery", "select url,pageview,commentCount from Pageview, PageComment where Pageview.url = PageComment.url", "-inputBoundingQuery", "select min(commentCount), max(commentCount) from PageComment", "-outputStrategy", "delimited", "-delimiter", "DELIM", "-numberOfMappers", "2", "-inputOrderBy", "Pageview.pageview" }; int res = ToolRunner.run(createJobConf(), job, args); assertEquals(0, res);/* w ww .ja va2s . c o m*/ //lets verify the result now FileSystem outputFS = getFileSystem(); Path outputPath = new Path(outputFS.getHomeDirectory(), "testQueryBasedImport"); FileStatus[] status = outputFS.listStatus(outputPath, getOutputPathFilter()); assertTrue(outputFS.exists(outputPath)); List<String> expectedOutput = new ArrayList<String>(); expectedOutput.add("/aDELIM1000DELIM10"); expectedOutput.add("/bDELIM2000DELIM10"); expectedOutput.add("/cDELIM3000DELIM10"); expectedOutput.add("/dDELIM4000DELIM10"); expectedOutput.add("/eDELIM5000DELIM10"); expectedOutput.add("/fDELIM6000DELIM10"); expectedOutput.add("/gDELIM7000DELIM10"); expectedOutput.add("/hDELIM8000DELIM10"); expectedOutput.add("/iDELIM9000DELIM10"); expectedOutput.add("/jDELIM10000DELIM10"); int count = 0; for (FileStatus fileStat : status) { logger.debug("File status is " + fileStat.getPath() + " and is it a dir? " + fileStat.isDirectory()); FSDataInputStream in = outputFS.open(fileStat.getPath()); String line = null; while ((line = in.readLine()) != null) { logger.debug("Output is " + line); assertTrue("Matched output " + line, expectedOutput.contains(line)); expectedOutput.remove(line); count++; } in.close(); } assertEquals(10, count); }