List of usage examples for org.apache.hadoop.fs FileSystem open
public FSDataInputStream open(PathHandle fd) throws IOException
From source file:com.inmobi.conduit.distcp.tools.mapred.RetriableFileCopyCommand.java
License:Apache License
private static ThrottledInputStream getInputStream(Path path, Configuration conf) throws IOException { try {/*from w ww. ja va 2 s. co m*/ FileSystem fs = path.getFileSystem(conf); long bandwidthKB = getAllowedBandwidth(conf); return new ThrottledInputStream(new BufferedInputStream(fs.open(path)), bandwidthKB * 1024); } catch (IOException e) { throw new CopyReadException(e); } }
From source file:com.inmobi.conduit.local.LocalStreamService.java
License:Apache License
private boolean isEmptyFile(FileStatus fileStatus, FileSystem fs) { boolean retVal = false; FSDataInputStream in = null;// www .ja v a 2s . co m try { in = fs.open(fileStatus.getPath()); byte[] data = new byte[1]; // try reading 1 byte int bytesRead = in.read(data); if (bytesRead == 1) { // not empty file retVal = false; } else { // not able to read 1 bytes also then empty file retVal = true; } } catch (IOException e) { LOG.error("Unable to find if file is empty or not [" + fileStatus.getPath() + "]", e); } finally { if (in != null) { try { in.close(); } catch (IOException e1) { LOG.error("Error in closing file [" + fileStatus.getPath() + "]", e1); } } } return retVal; }
From source file:com.inmobi.conduit.local.LocalStreamServiceTest.java
License:Apache License
private void testClusterName(String configName, String currentClusterName) throws Exception { ConduitConfigParser parser = new ConduitConfigParser(configName); ConduitConfig config = parser.getConfig(); Set<String> streamsToProcess = new HashSet<String>(); streamsToProcess.addAll(config.getSourceStreams().keySet()); Set<String> clustersToProcess = new HashSet<String>(); Set<TestLocalStreamService> services = new HashSet<TestLocalStreamService>(); Cluster currentCluster = null;/*ww w. j a va2 s . c o m*/ for (SourceStream sStream : config.getSourceStreams().values()) { for (String cluster : sStream.getSourceClusters()) { clustersToProcess.add(cluster); } } if (currentClusterName != null) { currentCluster = config.getClusters().get(currentClusterName); } for (String clusterName : clustersToProcess) { Cluster cluster = config.getClusters().get(clusterName); cluster.getHadoopConf().set("mapred.job.tracker", super.CreateJobConf().get("mapred.job.tracker")); TestLocalStreamService service = new TestLocalStreamService(config, cluster, currentCluster, new NullCheckPointProvider(), streamsToProcess); services.add(service); } for (TestLocalStreamService service : services) { FileSystem fs = service.getFileSystem(); service.preExecute(); if (currentClusterName != null) Assert.assertEquals(service.getCurrentCluster().getName(), currentClusterName); // creating a job with empty input path Path tmpJobInputPath = new Path("/tmp/job/input/path"); Map<FileStatus, String> fileListing = new TreeMap<FileStatus, String>(); Set<FileStatus> trashSet = new HashSet<FileStatus>(); // checkpointKey, CheckPointPath Table<String, String, String> checkpointPaths = HashBasedTable.create(); service.createMRInput(tmpJobInputPath, fileListing, trashSet, checkpointPaths); Job testJobConf = service.createJob(tmpJobInputPath, 1000); testJobConf.waitForCompletion(true); int numberOfCountersPerFile = 0; long sumOfCounterValues = 0; Path outputCounterPath = new Path(new Path(service.getCluster().getTmpPath(), service.getName()), "counters"); FileStatus[] statuses = fs.listStatus(outputCounterPath, new PathFilter() { public boolean accept(Path path) { return path.toString().contains("part"); } }); for (FileStatus fileSt : statuses) { Scanner scanner = new Scanner(fs.open(fileSt.getPath())); while (scanner.hasNext()) { String counterNameValue = null; try { counterNameValue = scanner.next(); String tmp[] = counterNameValue.split(ConduitConstants.AUDIT_COUNTER_NAME_DELIMITER); Assert.assertEquals(4, tmp.length); Long numOfMsgs = Long.parseLong(tmp[3]); numberOfCountersPerFile++; sumOfCounterValues += numOfMsgs; } catch (Exception e) { LOG.error("Counters file has malformed line with counter name =" + counterNameValue + "..skipping the line", e); } } } // Should have 2 counters for each file Assert.assertEquals(NUMBER_OF_FILES * 2, numberOfCountersPerFile); // sum of all counter values should be equal to total number of messages Assert.assertEquals(NUMBER_OF_FILES * 3, sumOfCounterValues); Assert.assertEquals(testJobConf.getConfiguration().get(FS_DEFAULT_NAME_KEY), service.getCurrentCluster().getHadoopConf().get(FS_DEFAULT_NAME_KEY)); Assert.assertEquals(testJobConf.getConfiguration().get(SRC_FS_DEFAULT_NAME_KEY), service.getCluster().getHadoopConf().get(FS_DEFAULT_NAME_KEY)); if (currentCluster == null) Assert.assertEquals(testJobConf.getConfiguration().get(FS_DEFAULT_NAME_KEY), testJobConf.getConfiguration().get(SRC_FS_DEFAULT_NAME_KEY)); service.getFileSystem().delete(new Path(service.getCluster().getRootDir()), true); } }
From source file:com.inmobi.conduit.utils.CollapseFilesInDir.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration configuration = new Configuration(); configuration.set("fs.default.name", args[0]); String dir = args[1];//from w w w . j a va 2s . c o m FileSystem fs = FileSystem.get(configuration); FileStatus[] fileList; try { fileList = fs.listStatus(new Path(dir)); } catch (FileNotFoundException fe) { fileList = null; } if (fileList != null) { if (fileList.length > 1) { Set<Path> sourceFiles = new HashSet<Path>(); Set<String> consumePaths = new HashSet<String>(); //inputPath has have multiple files due to backlog //read all and create a tmp file for (int i = 0; i < fileList.length; i++) { Path consumeFilePath = fileList[i].getPath().makeQualified(fs); sourceFiles.add(consumeFilePath); FSDataInputStream fsDataInputStream = fs.open(consumeFilePath); try { while (fsDataInputStream.available() > 0) { String fileName = fsDataInputStream.readLine(); if (fileName != null) { consumePaths.add(fileName.trim()); System.out.println("Adding [" + fileName + "] to pull"); } } } finally { fsDataInputStream.close(); } } Path finalPath = new Path(dir, new Long(System.currentTimeMillis()).toString()); FSDataOutputStream out = fs.create(finalPath); BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out)); try { for (String consumePath : consumePaths) { System.out.println("Adding sourceFile [" + consumePath + "] to" + " distcp " + "FinalList"); writer.write(consumePath); writer.write("\n"); } } finally { writer.close(); } LOG.warn("Final File - [" + finalPath + "]"); for (Path deletePath : sourceFiles) { System.out.println("Deleting - [" + deletePath + "]"); fs.delete(deletePath); } } } }
From source file:com.inmobi.databus.distcp.DistcpBaseService.java
License:Apache License
private void readConsumePath(FileSystem fs, Path consumePath, Set<String> minFilesSet) throws IOException { BufferedReader reader = null; try {// ww w . jav a 2 s.c o m FSDataInputStream fsDataInputStream = fs.open(consumePath); reader = new BufferedReader(new InputStreamReader(fsDataInputStream)); String minFileName = null; do { minFileName = reader.readLine(); if (minFileName != null) { /* * To avoid data-loss in all services we publish the paths to * consumers directory first before publishing on HDFS for * finalConsumption. In a distributed transaction failure it's * possible that some of these paths do not exist. Do isExistence * check before adding them as DISTCP input otherwise DISTCP * jobs can fail continously thereby blocking Merge/Mirror * stream to run further */ Path p = new Path(minFileName); if (fs.exists(p)) { LOG.info("Adding sourceFile [" + minFileName + "] to distcp " + "FinalList"); minFilesSet.add(minFileName.trim()); } else { LOG.info("Skipping [" + minFileName + "] to pull as it's an " + "INVALID PATH"); } } } while (minFileName != null); } finally { if (reader != null) reader.close(); } }
From source file:com.inmobi.databus.local.LocalStreamService.java
License:Apache License
private boolean isEmptyFile(FileStatus fileStatus, FileSystem fs) { boolean retVal = false; FSDataInputStream in = null;//from ww w. j a v a 2s. com try { in = fs.open(fileStatus.getPath()); byte[] data = new byte[1]; //try reading 1 byte int bytesRead = in.read(data); if (bytesRead == 1) { //not empty file retVal = false; } else { //not able to read 1 bytes also then empty file retVal = true; } } catch (IOException e) { LOG.error("Unable to find if file is empty or not [" + fileStatus.getPath() + "]", e); } finally { if (in != null) { try { in.close(); } catch (IOException e1) { LOG.error("Error in closing file [" + fileStatus.getPath() + "]", e1); } } } return retVal; }
From source file:com.inmobi.databus.utils.CollapseFilesInDir.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration configuration = new Configuration(); configuration.set("fs.default.name", args[0]); String dir = args[1];/*from ww w.ja v a 2s . c o m*/ FileSystem fs = FileSystem.get(configuration); FileStatus[] fileList = fs.listStatus(new Path(dir)); if (fileList != null) { if (fileList.length > 1) { Set<Path> sourceFiles = new HashSet<Path>(); Set<String> consumePaths = new HashSet<String>(); //inputPath has have multiple files due to backlog //read all and create a tmp file for (int i = 0; i < fileList.length; i++) { Path consumeFilePath = fileList[i].getPath().makeQualified(fs); sourceFiles.add(consumeFilePath); FSDataInputStream fsDataInputStream = fs.open(consumeFilePath); try { while (fsDataInputStream.available() > 0) { String fileName = fsDataInputStream.readLine(); if (fileName != null) { consumePaths.add(fileName.trim()); System.out.println("Adding [" + fileName + "] to pull"); } } } finally { fsDataInputStream.close(); } } Path finalPath = new Path(dir, new Long(System.currentTimeMillis()).toString()); FSDataOutputStream out = fs.create(finalPath); BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out)); try { for (String consumePath : consumePaths) { System.out.println("Adding sourceFile [" + consumePath + "] to" + " distcp " + "FinalList"); writer.write(consumePath); writer.write("\n"); } } finally { writer.close(); } LOG.warn("Final File - [" + finalPath + "]"); for (Path deletePath : sourceFiles) { System.out.println("Deleting - [" + deletePath + "]"); fs.delete(deletePath); } } } }
From source file:com.inmobi.grid.fs.s4fs.NativeS4FileSystem.java
License:Apache License
private String getCredentialFromFile(FileSystem fs, Path credFile) throws IOException { BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(credFile))); String line = br.readLine();//from ww w .ja v a 2 s. c o m if (line == null) { throw new IOException("Access:Secret not found in file: " + credFile); } return line; }
From source file:com.inmobi.grill.driver.hive.TestHiveDriver.java
License:Apache License
private void validatePersistentResult(GrillResultSet resultSet, String dataFile, String outptuDir, boolean formatNulls) throws Exception { assertTrue(resultSet instanceof HivePersistentResultSet); HivePersistentResultSet persistentResultSet = (HivePersistentResultSet) resultSet; String path = persistentResultSet.getOutputPath(); QueryHandle handle = persistentResultSet.getQueryHandle(); Path actualPath = new Path(path); FileSystem fs = actualPath.getFileSystem(conf); assertEquals(actualPath, fs.makeQualified(new Path(outptuDir, handle.toString()))); List<String> actualRows = new ArrayList<String>(); for (FileStatus stat : fs.listStatus(actualPath)) { FSDataInputStream in = fs.open(stat.getPath()); BufferedReader br = null; try {//from ww w .j a v a 2 s. c o m br = new BufferedReader(new InputStreamReader(in)); String line = ""; while ((line = br.readLine()) != null) { System.out.println("Actual:" + line); actualRows.add(line.trim()); } } finally { if (br != null) { br.close(); } } } BufferedReader br = null; List<String> expectedRows = new ArrayList<String>(); try { br = new BufferedReader(new FileReader(new File(dataFile))); String line = ""; while ((line = br.readLine()) != null) { String row = line.trim(); if (formatNulls) { row += ",-NA-,"; row += line.trim(); } expectedRows.add(row); } } finally { if (br != null) { br.close(); } } assertEquals(actualRows, expectedRows); }
From source file:com.inmobi.grill.server.GrillServices.java
License:Apache License
private void setupPersistedState() throws IOException, ClassNotFoundException { if (conf.getBoolean(GrillConfConstants.GRILL_SERVER_RECOVER_ON_RESTART, GrillConfConstants.DEFAULT_GRILL_SERVER_RECOVER_ON_RESTART)) { FileSystem fs = persistDir.getFileSystem(conf); for (GrillService service : grillServices) { ObjectInputStream in = null; try { try { in = new ObjectInputStream(fs.open(getServicePersistPath(service))); } catch (FileNotFoundException fe) { LOG.warn("No persist path available for service:" + service.getName()); continue; }/*from w w w . j a v a 2 s . c o m*/ service.readExternal(in); LOG.info("Recovered service " + service.getName() + " from persisted state"); } finally { if (in != null) { in.close(); } } } } }