Example usage for org.apache.hadoop.fs FileSystem open

List of usage examples for org.apache.hadoop.fs FileSystem open

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem open.

Prototype

public FSDataInputStream open(PathHandle fd) throws IOException 

Source Link

Document

Open an FSDataInputStream matching the PathHandle instance.

Usage

From source file:com.inmobi.conduit.distcp.tools.mapred.RetriableFileCopyCommand.java

License:Apache License

private static ThrottledInputStream getInputStream(Path path, Configuration conf) throws IOException {
    try {/*from   w  ww.  ja va 2 s. co m*/
        FileSystem fs = path.getFileSystem(conf);
        long bandwidthKB = getAllowedBandwidth(conf);
        return new ThrottledInputStream(new BufferedInputStream(fs.open(path)), bandwidthKB * 1024);
    } catch (IOException e) {
        throw new CopyReadException(e);
    }
}

From source file:com.inmobi.conduit.local.LocalStreamService.java

License:Apache License

private boolean isEmptyFile(FileStatus fileStatus, FileSystem fs) {
    boolean retVal = false;
    FSDataInputStream in = null;//  www .ja v a 2s  .  co  m
    try {
        in = fs.open(fileStatus.getPath());
        byte[] data = new byte[1];
        // try reading 1 byte
        int bytesRead = in.read(data);
        if (bytesRead == 1) {
            // not empty file
            retVal = false;
        } else {
            // not able to read 1 bytes also then empty file
            retVal = true;
        }
    } catch (IOException e) {
        LOG.error("Unable to find if file is empty or not [" + fileStatus.getPath() + "]", e);
    } finally {
        if (in != null) {
            try {
                in.close();
            } catch (IOException e1) {
                LOG.error("Error in closing file [" + fileStatus.getPath() + "]", e1);
            }
        }
    }
    return retVal;
}

From source file:com.inmobi.conduit.local.LocalStreamServiceTest.java

License:Apache License

private void testClusterName(String configName, String currentClusterName) throws Exception {
    ConduitConfigParser parser = new ConduitConfigParser(configName);
    ConduitConfig config = parser.getConfig();
    Set<String> streamsToProcess = new HashSet<String>();
    streamsToProcess.addAll(config.getSourceStreams().keySet());
    Set<String> clustersToProcess = new HashSet<String>();
    Set<TestLocalStreamService> services = new HashSet<TestLocalStreamService>();
    Cluster currentCluster = null;/*ww w.  j a  va2 s  .  c  o  m*/
    for (SourceStream sStream : config.getSourceStreams().values()) {
        for (String cluster : sStream.getSourceClusters()) {
            clustersToProcess.add(cluster);
        }
    }
    if (currentClusterName != null) {
        currentCluster = config.getClusters().get(currentClusterName);
    }
    for (String clusterName : clustersToProcess) {
        Cluster cluster = config.getClusters().get(clusterName);
        cluster.getHadoopConf().set("mapred.job.tracker", super.CreateJobConf().get("mapred.job.tracker"));
        TestLocalStreamService service = new TestLocalStreamService(config, cluster, currentCluster,
                new NullCheckPointProvider(), streamsToProcess);
        services.add(service);
    }

    for (TestLocalStreamService service : services) {
        FileSystem fs = service.getFileSystem();
        service.preExecute();
        if (currentClusterName != null)
            Assert.assertEquals(service.getCurrentCluster().getName(), currentClusterName);
        // creating a job with empty input path
        Path tmpJobInputPath = new Path("/tmp/job/input/path");
        Map<FileStatus, String> fileListing = new TreeMap<FileStatus, String>();
        Set<FileStatus> trashSet = new HashSet<FileStatus>();
        // checkpointKey, CheckPointPath
        Table<String, String, String> checkpointPaths = HashBasedTable.create();
        service.createMRInput(tmpJobInputPath, fileListing, trashSet, checkpointPaths);
        Job testJobConf = service.createJob(tmpJobInputPath, 1000);
        testJobConf.waitForCompletion(true);

        int numberOfCountersPerFile = 0;
        long sumOfCounterValues = 0;
        Path outputCounterPath = new Path(new Path(service.getCluster().getTmpPath(), service.getName()),
                "counters");
        FileStatus[] statuses = fs.listStatus(outputCounterPath, new PathFilter() {
            public boolean accept(Path path) {
                return path.toString().contains("part");
            }
        });
        for (FileStatus fileSt : statuses) {
            Scanner scanner = new Scanner(fs.open(fileSt.getPath()));
            while (scanner.hasNext()) {
                String counterNameValue = null;
                try {
                    counterNameValue = scanner.next();
                    String tmp[] = counterNameValue.split(ConduitConstants.AUDIT_COUNTER_NAME_DELIMITER);
                    Assert.assertEquals(4, tmp.length);
                    Long numOfMsgs = Long.parseLong(tmp[3]);
                    numberOfCountersPerFile++;
                    sumOfCounterValues += numOfMsgs;
                } catch (Exception e) {
                    LOG.error("Counters file has malformed line with counter name =" + counterNameValue
                            + "..skipping the line", e);
                }
            }
        }
        // Should have 2 counters for each file
        Assert.assertEquals(NUMBER_OF_FILES * 2, numberOfCountersPerFile);
        // sum of all counter values should be equal to total number of messages
        Assert.assertEquals(NUMBER_OF_FILES * 3, sumOfCounterValues);

        Assert.assertEquals(testJobConf.getConfiguration().get(FS_DEFAULT_NAME_KEY),
                service.getCurrentCluster().getHadoopConf().get(FS_DEFAULT_NAME_KEY));
        Assert.assertEquals(testJobConf.getConfiguration().get(SRC_FS_DEFAULT_NAME_KEY),
                service.getCluster().getHadoopConf().get(FS_DEFAULT_NAME_KEY));
        if (currentCluster == null)
            Assert.assertEquals(testJobConf.getConfiguration().get(FS_DEFAULT_NAME_KEY),
                    testJobConf.getConfiguration().get(SRC_FS_DEFAULT_NAME_KEY));
        service.getFileSystem().delete(new Path(service.getCluster().getRootDir()), true);
    }

}

From source file:com.inmobi.conduit.utils.CollapseFilesInDir.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration configuration = new Configuration();
    configuration.set("fs.default.name", args[0]);
    String dir = args[1];//from w w w  . j a  va 2s .  c  o  m
    FileSystem fs = FileSystem.get(configuration);
    FileStatus[] fileList;
    try {
        fileList = fs.listStatus(new Path(dir));
    } catch (FileNotFoundException fe) {
        fileList = null;
    }
    if (fileList != null) {
        if (fileList.length > 1) {
            Set<Path> sourceFiles = new HashSet<Path>();
            Set<String> consumePaths = new HashSet<String>();
            //inputPath has have multiple files due to backlog
            //read all and create a tmp file
            for (int i = 0; i < fileList.length; i++) {
                Path consumeFilePath = fileList[i].getPath().makeQualified(fs);
                sourceFiles.add(consumeFilePath);
                FSDataInputStream fsDataInputStream = fs.open(consumeFilePath);
                try {
                    while (fsDataInputStream.available() > 0) {
                        String fileName = fsDataInputStream.readLine();
                        if (fileName != null) {
                            consumePaths.add(fileName.trim());
                            System.out.println("Adding [" + fileName + "] to pull");
                        }
                    }
                } finally {
                    fsDataInputStream.close();
                }
            }
            Path finalPath = new Path(dir, new Long(System.currentTimeMillis()).toString());
            FSDataOutputStream out = fs.create(finalPath);
            BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out));
            try {
                for (String consumePath : consumePaths) {
                    System.out.println("Adding sourceFile [" + consumePath + "] to" + " distcp " + "FinalList");
                    writer.write(consumePath);
                    writer.write("\n");
                }
            } finally {
                writer.close();
            }
            LOG.warn("Final File - [" + finalPath + "]");
            for (Path deletePath : sourceFiles) {
                System.out.println("Deleting - [" + deletePath + "]");
                fs.delete(deletePath);
            }
        }
    }
}

From source file:com.inmobi.databus.distcp.DistcpBaseService.java

License:Apache License

private void readConsumePath(FileSystem fs, Path consumePath, Set<String> minFilesSet) throws IOException {
    BufferedReader reader = null;
    try {//  ww  w  . jav  a  2 s.c  o  m
        FSDataInputStream fsDataInputStream = fs.open(consumePath);
        reader = new BufferedReader(new InputStreamReader(fsDataInputStream));
        String minFileName = null;
        do {
            minFileName = reader.readLine();
            if (minFileName != null) {
                /*
                * To avoid data-loss in all services we publish the paths to
                * consumers directory first before publishing on HDFS for
                * finalConsumption. In a distributed transaction failure it's
                * possible that some of these paths do not exist. Do isExistence
                * check before adding them as DISTCP input otherwise DISTCP
                * jobs can fail continously thereby blocking Merge/Mirror
                * stream to run further
                */
                Path p = new Path(minFileName);
                if (fs.exists(p)) {
                    LOG.info("Adding sourceFile [" + minFileName + "] to distcp " + "FinalList");
                    minFilesSet.add(minFileName.trim());
                } else {
                    LOG.info("Skipping [" + minFileName + "] to pull as it's an " + "INVALID PATH");
                }
            }
        } while (minFileName != null);
    } finally {
        if (reader != null)
            reader.close();
    }
}

From source file:com.inmobi.databus.local.LocalStreamService.java

License:Apache License

private boolean isEmptyFile(FileStatus fileStatus, FileSystem fs) {
    boolean retVal = false;
    FSDataInputStream in = null;//from   ww w.  j  a v a  2s. com
    try {
        in = fs.open(fileStatus.getPath());
        byte[] data = new byte[1];
        //try reading 1 byte
        int bytesRead = in.read(data);
        if (bytesRead == 1) {
            //not empty file
            retVal = false;
        } else {
            //not able to read 1 bytes also then empty file
            retVal = true;
        }
    } catch (IOException e) {
        LOG.error("Unable to find if file is empty or not [" + fileStatus.getPath() + "]", e);
    } finally {
        if (in != null) {
            try {
                in.close();
            } catch (IOException e1) {
                LOG.error("Error in closing file [" + fileStatus.getPath() + "]", e1);
            }
        }
    }
    return retVal;
}

From source file:com.inmobi.databus.utils.CollapseFilesInDir.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration configuration = new Configuration();
    configuration.set("fs.default.name", args[0]);
    String dir = args[1];/*from   ww  w.ja v a  2s .  c  o  m*/
    FileSystem fs = FileSystem.get(configuration);
    FileStatus[] fileList = fs.listStatus(new Path(dir));
    if (fileList != null) {
        if (fileList.length > 1) {
            Set<Path> sourceFiles = new HashSet<Path>();
            Set<String> consumePaths = new HashSet<String>();
            //inputPath has have multiple files due to backlog
            //read all and create a tmp file
            for (int i = 0; i < fileList.length; i++) {
                Path consumeFilePath = fileList[i].getPath().makeQualified(fs);
                sourceFiles.add(consumeFilePath);
                FSDataInputStream fsDataInputStream = fs.open(consumeFilePath);
                try {
                    while (fsDataInputStream.available() > 0) {
                        String fileName = fsDataInputStream.readLine();
                        if (fileName != null) {
                            consumePaths.add(fileName.trim());
                            System.out.println("Adding [" + fileName + "] to pull");
                        }
                    }
                } finally {
                    fsDataInputStream.close();
                }
            }
            Path finalPath = new Path(dir, new Long(System.currentTimeMillis()).toString());
            FSDataOutputStream out = fs.create(finalPath);
            BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out));
            try {
                for (String consumePath : consumePaths) {
                    System.out.println("Adding sourceFile [" + consumePath + "] to" + " distcp " + "FinalList");
                    writer.write(consumePath);
                    writer.write("\n");
                }
            } finally {
                writer.close();
            }
            LOG.warn("Final File - [" + finalPath + "]");
            for (Path deletePath : sourceFiles) {
                System.out.println("Deleting - [" + deletePath + "]");
                fs.delete(deletePath);
            }
        }
    }
}

From source file:com.inmobi.grid.fs.s4fs.NativeS4FileSystem.java

License:Apache License

private String getCredentialFromFile(FileSystem fs, Path credFile) throws IOException {
    BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(credFile)));
    String line = br.readLine();//from   ww  w .ja  v  a  2  s. c  o  m
    if (line == null) {
        throw new IOException("Access:Secret not found in file: " + credFile);
    }
    return line;
}

From source file:com.inmobi.grill.driver.hive.TestHiveDriver.java

License:Apache License

private void validatePersistentResult(GrillResultSet resultSet, String dataFile, String outptuDir,
        boolean formatNulls) throws Exception {
    assertTrue(resultSet instanceof HivePersistentResultSet);
    HivePersistentResultSet persistentResultSet = (HivePersistentResultSet) resultSet;
    String path = persistentResultSet.getOutputPath();
    QueryHandle handle = persistentResultSet.getQueryHandle();

    Path actualPath = new Path(path);
    FileSystem fs = actualPath.getFileSystem(conf);
    assertEquals(actualPath, fs.makeQualified(new Path(outptuDir, handle.toString())));
    List<String> actualRows = new ArrayList<String>();
    for (FileStatus stat : fs.listStatus(actualPath)) {
        FSDataInputStream in = fs.open(stat.getPath());
        BufferedReader br = null;
        try {//from ww  w  .j  a  v  a 2  s.  c  o m
            br = new BufferedReader(new InputStreamReader(in));
            String line = "";

            while ((line = br.readLine()) != null) {
                System.out.println("Actual:" + line);
                actualRows.add(line.trim());
            }
        } finally {
            if (br != null) {
                br.close();
            }
        }
    }

    BufferedReader br = null;
    List<String> expectedRows = new ArrayList<String>();

    try {
        br = new BufferedReader(new FileReader(new File(dataFile)));
        String line = "";
        while ((line = br.readLine()) != null) {
            String row = line.trim();
            if (formatNulls) {
                row += ",-NA-,";
                row += line.trim();
            }
            expectedRows.add(row);
        }
    } finally {
        if (br != null) {
            br.close();
        }
    }
    assertEquals(actualRows, expectedRows);
}

From source file:com.inmobi.grill.server.GrillServices.java

License:Apache License

private void setupPersistedState() throws IOException, ClassNotFoundException {
    if (conf.getBoolean(GrillConfConstants.GRILL_SERVER_RECOVER_ON_RESTART,
            GrillConfConstants.DEFAULT_GRILL_SERVER_RECOVER_ON_RESTART)) {
        FileSystem fs = persistDir.getFileSystem(conf);

        for (GrillService service : grillServices) {
            ObjectInputStream in = null;
            try {
                try {
                    in = new ObjectInputStream(fs.open(getServicePersistPath(service)));
                } catch (FileNotFoundException fe) {
                    LOG.warn("No persist path available for service:" + service.getName());
                    continue;
                }/*from w  w w .  j  a v  a 2 s  . c  o m*/
                service.readExternal(in);
                LOG.info("Recovered service " + service.getName() + " from persisted state");
            } finally {
                if (in != null) {
                    in.close();
                }
            }
        }
    }
}