List of usage examples for org.apache.hadoop.fs Path getName
public String getName()
From source file:com.indeed.imhotep.builder.tsv.EasyIndexBuilderFromTSV.java
License:Apache License
protected InputReader getInputReader() { final Path inputFile = getInputFilePath(); final BufferedReader fileReader = getInputFileReader(inputFile); final String fileName = inputFile.getName(); if (fileName.endsWith(".csv") || fileName.endsWith(".csv.gz")) { return new CSVInputReader(fileReader); } else {//from w ww . j a v a 2s .c o m // Should we also using the escaped TSV in some situations? What would be the trigger? return new TSVInputReaderNoEscaping(fileReader); } }
From source file:com.indeed.imhotep.builder.tsv.EasyIndexBuilderFromTSV.java
License:Apache License
private BufferedReader getInputFileReader(Path inputFile) { try {//from w w w . j a v a2 s . c om final FileSystem hdfs = getHDFS(inputFile); final Path qualifiedInputFile = inputFile.makeQualified(hdfs); if (!hdfs.exists(inputFile)) { throw new RuntimeException("The provided input file doesn't exist " + qualifiedInputFile + "\nFor hdfs files use 'hdfs:' prefix like hdfs:/tmp/file.tsv"); } log.info("Reading TSV data from " + qualifiedInputFile); InputStream inputStream = hdfs.open(inputFile); if (inputFile.getName().endsWith(".gz")) { inputStream = new GZIPInputStream(inputStream); } return new BufferedReader(new InputStreamReader(inputStream, Charsets.UTF_8)); } catch (IOException e) { throw Throwables.propagate(e); } }
From source file:com.indeed.imhotep.builder.tsv.TsvConverter.java
License:Apache License
private List<FileToIndex> findNewFilesToIndex() { try {/*from w ww.j a v a 2 s.com*/ final List<FileToIndex> files = Lists.newArrayList(); for (FileStatus dir : inputFS.listStatus(toIndexPath)) { if (!dir.isDir()) { continue; } final Path indexPath = dir.getPath(); final String indexName = indexPath.getName(); if (!indexName.matches(ALLOWED_INDEX_NAMES)) { log.info("Skipped directory " + indexPath + ". Index names should match regex " + ALLOWED_INDEX_NAMES); continue; } for (FileStatus file : inputFS.listStatus(indexPath)) { if (file.isDir()) { continue; } final Path filePath = file.getPath(); String fileName = filePath.getName(); boolean extFound = false; for (String allowedExt : ALLOWED_FILE_EXT) { if (!fileName.endsWith(allowedExt)) { continue; } fileName = fileName.substring(0, fileName.length() - allowedExt.length()); files.add(new FileToIndex(fileName, indexName, filePath)); extFound = true; break; } if (!extFound) { log.info("Not one of supported extensions (" + StringUtils.join(ALLOWED_FILE_EXT, ", ") + ") file: " + filePath); } } } return files; } catch (IOException e) { throw Throwables.propagate(e); } }
From source file:com.inmobi.conduit.AbstractService.java
License:Apache License
private long getPreviousRuntime(FileSystem fs, String destDir, String category) throws Exception { String localDestDir = destDir + File.separator + category; LOG.warn("Querying Directory [" + localDestDir + "]"); Path latestyeardir = getLatestDir(fs, new Path(localDestDir)); int latestyear = 0, latestmonth = 0, latestday = 0, latesthour = 0, latestminute = 0; if (latestyeardir != null) { latestyear = Integer.parseInt(latestyeardir.getName()); Path latestmonthdir = getLatestDir(fs, latestyeardir); if (latestmonthdir != null) { latestmonth = Integer.parseInt(latestmonthdir.getName()); Path latestdaydir = getLatestDir(fs, latestmonthdir); if (latestdaydir != null) { latestday = Integer.parseInt(latestdaydir.getName()); Path latesthourdir = getLatestDir(fs, latestdaydir); if (latesthourdir != null) { latesthour = Integer.parseInt(latesthourdir.getName()); Path latestminutedir = getLatestDir(fs, latesthourdir); if (latestminutedir != null) { latestminute = Integer.parseInt(latestminutedir.getName()); }//from ww w . jav a 2 s. c om } } } } else return -1; LOG.debug("Date Found " + latestyear + File.separator + latestmonth + File.separator + latestday + File.separator + latesthour + File.separator + latestminute); return CalendarHelper.getDateHourMinute(latestyear, latestmonth, latestday, latesthour, latestminute) .getTimeInMillis(); }
From source file:com.inmobi.conduit.distcp.DistcpBaseService.java
License:Apache License
protected String getTopicNameFromDestnPath(Path destnPath) { String destnPathAsString = destnPath.toString(); String destnDirAsString = new Path(destCluster.getFinalDestDirRoot()).toString(); String pathWithoutRoot = destnPathAsString.substring(destnDirAsString.length()); Path tmpPath = new Path(pathWithoutRoot); while (tmpPath.depth() != 1) tmpPath = tmpPath.getParent();/*from ww w. j ava 2 s . c o m*/ return tmpPath.getName(); }
From source file:com.inmobi.conduit.distcp.MergedStreamService.java
License:Apache License
public Map<Path, Path> createLocalCommitPaths(Path tmpOut, long commitTime, Map<String, List<Path>> categoriesToCommit) throws Exception { // find final destination paths Map<Path, Path> mvPaths = new LinkedHashMap<Path, Path>(); Set<Map.Entry<String, List<Path>>> commitEntries = categoriesToCommit.entrySet(); Iterator<Entry<String, List<Path>>> it = commitEntries.iterator(); while (it.hasNext()) { Map.Entry<String, List<Path>> entry = (Map.Entry<String, List<Path>>) it.next(); String category = entry.getKey(); List<Path> filesInCategory = entry.getValue(); for (Path filePath : filesInCategory) { Path destParentPath = new Path(getDestCluster().getFinalDestDir(category, commitTime)); Path commitPath = new Path(destParentPath, filePath.getName()); mvPaths.put(filePath, commitPath); }//from w w w .ja va 2 s . co m } return mvPaths; }
From source file:com.inmobi.conduit.distcp.MirrorStreamService.java
License:Apache License
private void createCommitPaths(LinkedHashMap<FileStatus, Path> commitPaths, List<FileStatus> streamPaths) { /* Path eg in streamPaths - * /conduit/system/distcp_mirror_<srcCluster>_<destCluster>/conduit/streams * /<streamName>/2012/1/13/15/7/<hostname>-<streamName>-2012-01-16-07 * -21_00000.gz// w w w . j av a 2 s . c om * * or it could be an emptyDir like * /* Path eg in streamPaths - * /conduit/system/distcp_mirror_<srcCluster>_<destCluster>/conduit/streams * /<streamName>/2012/1/13/15/7/ * */ for (FileStatus fileStatus : streamPaths) { String fileName = null; Path prefixDir = null; if (fileStatus.isDir()) { //empty directory prefixDir = fileStatus.getPath(); } else { fileName = fileStatus.getPath().getName(); prefixDir = fileStatus.getPath().getParent(); } Path min = prefixDir; Path hr = min.getParent(); Path day = hr.getParent(); Path month = day.getParent(); Path year = month.getParent(); Path streamName = year.getParent(); String finalPath = getDestCluster().getFinalDestDirRoot() + File.separator + streamName.getName() + File.separator + year.getName() + File.separator + month.getName() + File.separator + day.getName() + File.separator + hr.getName() + File.separator + min.getName(); if (fileName != null) { finalPath += File.separator + fileName; } commitPaths.put(fileStatus, new Path(finalPath)); LOG.debug("Going to commit [" + fileStatus.getPath() + "] to [" + finalPath + "]"); } }
From source file:com.inmobi.conduit.distcp.tools.DistCp.java
License:Apache License
/** * Setup ssl configuration on the job configuration to enable hsftp access * from map job. Also copy the ssl configuration file to Distributed cache * * @param configuration - Reference to job's configruation handle * @throws java.io.IOException - Exception if unable to locate ssl config file *//*from w w w .j a v a2s . com*/ private void setupSSLConfig(Configuration configuration) throws IOException { Path sslConfigPath = new Path(configuration.getResource(inputOptions.getSslConfigurationFile()).toString()); addSSLFilesToDistCache(configuration, sslConfigPath); configuration.set(DistCpConstants.CONF_LABEL_SSL_CONF, sslConfigPath.getName()); configuration.set(DistCpConstants.CONF_LABEL_SSL_KEYSTORE, sslConfigPath.getName()); }
From source file:com.inmobi.conduit.distcp.tools.DistCp.java
License:Apache License
/** * Add SSL files to distributed cache. Trust store, key store and ssl config xml * * @param configuration - Job configuration * @param sslConfigPath - ssl Configuration file specified through options * @throws IOException - If any//w w w. j a va2s .com */ private void addSSLFilesToDistCache(Configuration configuration, Path sslConfigPath) throws IOException { FileSystem localFS = FileSystem.getLocal(configuration); Configuration sslConf = new Configuration(false); sslConf.addResource(sslConfigPath); Path localStorePath = getLocalStorePath(sslConf, "ssl.client.truststore.location"); DistributedCache.addCacheFile(localStorePath.makeQualified(localFS).toUri(), configuration); configuration.set("ssl.client.truststore.location", localStorePath.getName()); localStorePath = getLocalStorePath(sslConf, "ssl.client.keystore.location"); DistributedCache.addCacheFile(localStorePath.makeQualified(localFS).toUri(), configuration); configuration.set("ssl.client.keystore.location", localStorePath.getName()); DistributedCache.addCacheFile(sslConfigPath.makeQualified(localFS).toUri(), configuration); }
From source file:com.inmobi.conduit.distcp.tools.DistCp.java
License:Apache License
/** * Setup output format appropriately/*from w w w . j a va 2 s. c o m*/ * * @param job - Job handle * @throws IOException - Exception if any */ private void configureOutputFormat(Job job) throws IOException { final Configuration configuration = job.getConfiguration(); Path targetPath = inputOptions.getTargetPath(); targetPath = targetPath.makeQualified(targetPath.getFileSystem(configuration)); if (inputOptions.shouldAtomicCommit()) { Path workDir = inputOptions.getAtomicWorkPath(); if (workDir == null) { workDir = targetPath.getParent(); } workDir = new Path(workDir, WIP_PREFIX + targetPath.getName() + rand.nextInt()); FileSystem workFS = workDir.getFileSystem(configuration); FileSystem targetFS = targetPath.getFileSystem(configuration); if (!DistCpUtils.compareFs(targetFS, workFS)) { throw new IllegalArgumentException("Work path " + workDir + " and target path " + targetPath + " are in different file system"); } CopyOutputFormat.setWorkingDirectory(job, workDir); } else { CopyOutputFormat.setWorkingDirectory(job, targetPath); } CopyOutputFormat.setCommitDirectory(job, targetPath); Path counterFilePath = inputOptions.getOutPutDirectory(); if (counterFilePath == null) { LOG.error("Output directory is null for distcp"); } else { LOG.info("DistCp output directory path: " + counterFilePath); CopyOutputFormat.setOutputPath(job, counterFilePath); } }