List of usage examples for org.apache.hadoop.fs Path getName
public String getName()
From source file:com.inmobi.databus.distcp.MirrorStreamService.java
License:Apache License
private void createCommitPaths(LinkedHashMap<FileStatus, Path> commitPaths, List<FileStatus> streamPaths) { /* Path eg in streamPaths - * /databus/system/distcp_mirror_<srcCluster>_<destCluster>/databus/streams * /<streamName>/2012/1/13/15/7/<hostname>-<streamName>-2012-01-16-07 * -21_00000.gz/*from w ww. j a v a 2s . c o m*/ * * or it could be an emptyDir like * /* Path eg in streamPaths - * /databus/system/distcp_mirror_<srcCluster>_<destCluster>/databus/streams * /<streamName>/2012/1/13/15/7/ * */ for (FileStatus fileStatus : streamPaths) { String fileName = null; Path prefixDir = null; if (fileStatus.isDir()) { //empty directory prefixDir = fileStatus.getPath(); } else { fileName = fileStatus.getPath().getName(); prefixDir = fileStatus.getPath().getParent(); } Path min = prefixDir; Path hr = min.getParent(); Path day = hr.getParent(); Path month = day.getParent(); Path year = month.getParent(); Path streamName = year.getParent(); String finalPath = getDestCluster().getFinalDestDirRoot() + File.separator + streamName.getName() + File.separator + year.getName() + File.separator + month.getName() + File.separator + day.getName() + File.separator + hr.getName() + File.separator + min.getName(); if (fileName != null) { finalPath += File.separator + fileName; } commitPaths.put(fileStatus, new Path(finalPath)); LOG.debug("Going to commit [" + fileStatus.getPath() + "] to [" + finalPath + "]"); } }
From source file:com.inmobi.databus.local.CopyMapper.java
License:Apache License
@Override public void map(Text key, Text value, Context context) throws IOException, InterruptedException { Path src = new Path(key.toString()); String dest = value.toString(); String collector = src.getParent().getName(); String category = src.getParent().getParent().getName(); FileSystem fs = FileSystem.get(context.getConfiguration()); Path target = getTempPath(context, src, category, collector); FileUtil.gzip(src, target, context.getConfiguration()); // move to final destination fs.mkdirs(new Path(dest).makeQualified(fs)); Path destPath = new Path(dest + File.separator + collector + "-" + src.getName() + ".gz"); LOG.info("Renaming file " + target + " to " + destPath); fs.rename(target, destPath);/*from www. j a v a 2 s. co m*/ }
From source file:com.inmobi.databus.purge.DataPurgerServiceTest.java
License:Apache License
private void createTestPurgefiles(FileSystem fs, Cluster cluster, Calendar date) throws Exception { for (String streamname : cluster.getSourceStreams()) { String[] files = new String[NUM_OF_FILES]; String datapath = Cluster.getDateAsYYYYMMDDHHMNPath(date.getTime()); String commitpath = cluster.getLocalFinalDestDirRoot() + File.separator + streamname + File.separator + datapath;// w w w. j a v a 2 s . c om String mergecommitpath = cluster.getFinalDestDirRoot() + File.separator + streamname + File.separator + datapath; String trashpath = cluster.getTrashPath() + File.separator + CalendarHelper.getDateAsString(date) + File.separator; fs.mkdirs(new Path(commitpath)); for (int j = 0; j < NUM_OF_FILES; ++j) { files[j] = new String(cluster.getName() + "-" + TestLocalStreamService.getDateAsYYYYMMDDHHmm(new Date()) + "_" + idFormat.format(j)); { Path path = new Path(commitpath + File.separator + files[j]); // LOG.info("Creating streams_local File " + path.getName()); FSDataOutputStream streamout = fs.create(path); streamout.writeBytes("Creating Test data for teststream " + path.toString()); streamout.close(); Assert.assertTrue(fs.exists(path)); } { Path path = new Path(mergecommitpath + File.separator + files[j]); // LOG.info("Creating streams File " + path.getName()); FSDataOutputStream streamout = fs.create(path); streamout.writeBytes("Creating Test data for teststream " + path.toString()); streamout.close(); Assert.assertTrue(fs.exists(path)); } { Path path = new Path(trashpath + File.separator + String.valueOf(date.get(Calendar.HOUR_OF_DAY)) + File.separator + files[j]); // LOG.info("Creating trash File " + path.toString()); FSDataOutputStream streamout = fs.create(path); streamout.writeBytes("Creating Test trash data for teststream " + path.getName()); streamout.close(); Assert.assertTrue(fs.exists(path)); } } } }
From source file:com.inmobi.databus.readers.CollectorStreamReader.java
License:Apache License
protected FileMap<CollectorFile> createFileMap() throws IOException { return new FileMap<CollectorFile>() { @Override//from w w w .j av a 2s. co m protected PathFilter createPathFilter() { return new PathFilter() { @Override public boolean accept(Path p) { if (p.getName().endsWith("_current") || p.getName().endsWith("_stats")) { return false; } return true; } }; } /* * prepare a fileMap with files which are beyond the stopTime */ @Override protected void buildList() throws IOException { if (fsIsPathExists(streamDir)) { FileStatus[] fileStatuses = fsListFileStatus(streamDir, pathFilter); if (fileStatuses == null || fileStatuses.length == 0) { LOG.info("No files in directory:" + streamDir); return; } if (stopTime == null) { for (FileStatus file : fileStatuses) { addPath(file); } } else { for (FileStatus file : fileStatuses) { Date currentTimeStamp = getDateFromCollectorFile(file.getPath().getName()); if (stopTime.before(currentTimeStamp)) { stopListing(); continue; } addPath(file); } } } else { LOG.info("Collector directory does not exist"); } } @Override protected TreeMap<CollectorFile, FileStatus> createFilesMap() { return new TreeMap<CollectorFile, FileStatus>(); } @Override protected CollectorFile getStreamFile(String fileName) { return CollectorFile.create(fileName); } @Override protected CollectorFile getStreamFile(FileStatus file) { return CollectorFile.create(file.getPath().getName()); } }; }
From source file:com.inmobi.databus.readers.DatabusStreamWaitingReader.java
License:Apache License
@Override protected FileMap<HadoopStreamFile> createFileMap() throws IOException { return new FileMap<HadoopStreamFile>() { @Override/*from ww w . j av a 2 s .c om*/ protected void buildList() throws IOException { buildListing(this, pathFilter); } @Override protected TreeMap<HadoopStreamFile, FileStatus> createFilesMap() { return new TreeMap<HadoopStreamFile, FileStatus>(); } @Override protected HadoopStreamFile getStreamFile(String fileName) { throw new RuntimeException("Not implemented"); } @Override protected HadoopStreamFile getStreamFile(FileStatus file) { return HadoopStreamFile.create(file); } @Override protected PathFilter createPathFilter() { return new PathFilter() { @Override public boolean accept(Path path) { if (path.getName().startsWith("_")) { return false; } return true; } }; } }; }
From source file:com.inmobi.databus.readers.LocalStreamCollectorReader.java
License:Apache License
public FileMap<DatabusStreamFile> createFileMap() throws IOException { return new FileMap<DatabusStreamFile>() { @Override/*from www .j a v a 2s . c o m*/ protected void buildList() throws IOException { buildListing(this, pathFilter); } @Override protected TreeMap<DatabusStreamFile, FileStatus> createFilesMap() { return new TreeMap<DatabusStreamFile, FileStatus>(); } @Override protected DatabusStreamFile getStreamFile(String fileName) { return DatabusStreamFile.create(streamName, fileName); } @Override protected DatabusStreamFile getStreamFile(FileStatus file) { return DatabusStreamFile.create(streamName, file.getPath().getName()); } @Override protected PathFilter createPathFilter() { return new PathFilter() { @Override public boolean accept(Path p) { if (p.getName().startsWith(collector)) { return true; } return false; } }; } }; }
From source file:com.intel.hibench.datagen.streaming.util.SourceFileReader.java
License:Apache License
static private InputStream openMultipleParts(FileSystem fs, Path pt, long offset) throws IOException { System.out.println("opening all parts in path: " + pt + ", from offset: " + offset); // list all files in given path RemoteIterator<LocatedFileStatus> rit = fs.listFiles(pt, false); Vector<FSDataInputStream> fileHandleList = new Vector<FSDataInputStream>(); while (rit.hasNext()) { Path path = rit.next().getPath(); // Only read those files start with "part-" if (path.getName().startsWith("part-")) { long fileSize = fs.getFileStatus(path).getLen(); if (offset < fileSize) { FSDataInputStream inputStream = fs.open(path); if (offset > 0) { inputStream.seek(offset); }/*w ww .j a v a2 s.c om*/ fileHandleList.add(inputStream); } offset -= fileSize; } } if (!fileHandleList.isEmpty()) { return new SequenceInputStream(fileHandleList.elements()); } else { System.err.println("Error, no source file loaded. run genSeedDataset.sh first!"); return null; } }
From source file:com.jkoolcloud.tnt4j.streams.inputs.HdfsFileLineStream.java
License:Apache License
/** * Searches for files matching name pattern. Name pattern also may contain path of directory, where file search * should be performed, e.g., C:/Tomcat/logs/localhost_access_log.*.txt. If no path is defined (just file name * pattern) then files are searched in {@code System.getProperty("user.dir")}. Files array is ordered by file create * timestamp in descending order./* w ww.j a v a 2 s . c om*/ * * @param path * path of file * @param fs * file system * * @return array of found files paths. * @throws IOException * if files can't be listed by file system. * * @see FileSystem#listStatus(Path, PathFilter) * @see FilenameUtils#wildcardMatch(String, String, IOCase) */ public static Path[] searchFiles(Path path, FileSystem fs) throws IOException { FileStatus[] dir = fs.listStatus(path.getParent(), new PathFilter() { @Override public boolean accept(Path path) { String name = path.getName(); return FilenameUtils.wildcardMatch(name, "*", IOCase.INSENSITIVE); // NON-NLS } }); Path[] activityFiles = new Path[dir == null ? 0 : dir.length]; if (dir != null) { Arrays.sort(dir, new Comparator<FileStatus>() { @Override public int compare(FileStatus o1, FileStatus o2) { return Long.valueOf(o1.getModificationTime()).compareTo(o2.getModificationTime()) * (-1); } }); for (int i = 0; i < dir.length; i++) { activityFiles[i] = dir[i].getPath(); } } return activityFiles; }
From source file:com.knewton.mapreduce.SSTableRecordReader.java
License:Apache License
/** * Copies a remote path to the local filesystem, while updating hadoop that we're making * progress. Doesn't support directories. *//*from w w w . j a v a2 s . c o m*/ @VisibleForTesting void copyToLocalFile(FileSystem remoteFS, FileSystem localFS, Path remote, Path local) throws IOException { // don't support transferring from remote directories FileStatus remoteStat = remoteFS.getFileStatus(remote); Preconditions.checkArgument(!remoteStat.isDirectory(), String.format("Path %s is directory!", remote)); // if local is a dir, copy to inside that dir, like 'cp /path/file /tmp/' would do if (localFS.exists(local)) { FileStatus localStat = localFS.getFileStatus(local); if (localStat.isDirectory()) { local = new Path(local, remote.getName()); } } long remoteFileSize = remoteStat.getLen(); // do actual copy InputStream in = null; OutputStream out = null; try { long startTime = System.currentTimeMillis(); long lastLogTime = 0; long bytesCopied = 0; in = remoteFS.open(remote); out = localFS.create(local, true); int buffSize = this.conf.getInt(CommonConfigurationKeys.IO_FILE_BUFFER_SIZE_KEY, CommonConfigurationKeys.IO_FILE_BUFFER_SIZE_DEFAULT); byte[] buf = new byte[buffSize]; int bytesRead = in.read(buf); while (bytesRead >= 0) { long now = System.currentTimeMillis(); // log transfer rate once per min, starting 1 min after transfer began if (now - lastLogTime > 60000L && now - startTime > 60000L) { double elapsedSec = (now - startTime) / 1000D; double bytesPerSec = bytesCopied / elapsedSec; LOG.info("Transferred {} of {} bytes at {} bytes per second", bytesCopied, remoteFileSize, bytesPerSec); lastLogTime = now; } this.ctx.progress(); out.write(buf, 0, bytesRead); bytesCopied += bytesRead; bytesRead = in.read(buf); } // try to close these outside of finally so we receive exception on failure out.close(); out = null; in.close(); in = null; } finally { // make sure everything's closed IOUtils.closeStream(out); IOUtils.closeStream(in); } }
From source file:com.lightboxtechnologies.spectrum.ExtractData.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length != 4) { System.err.println("Usage: ExtractData <imageID> <friendly_name> <extents_file> <evidence file>"); return 2; }//from w w w.j a v a2 s. com final String imageID = args[0]; final String friendlyName = args[1]; final String extentsPath = args[2]; final String image = args[3]; Configuration conf = getConf(); final Job job = SKJobFactory.createJobFromConf(imageID, friendlyName, "ExtractData", conf); job.setJarByClass(ExtractData.class); job.setMapperClass(ExtractDataMapper.class); job.setReducerClass(KeyValueSortReducer.class); job.setNumReduceTasks(1); // job ctor copies the Configuration we pass it, get the real one conf = job.getConfiguration(); conf.setLong("timestamp", System.currentTimeMillis()); job.setInputFormatClass(RawFileInputFormat.class); RawFileInputFormat.addInputPath(job, new Path(image)); job.setOutputFormatClass(HFileOutputFormat.class); job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(KeyValue.class); conf.setInt("mapreduce.job.jvm.numtasks", -1); final FileSystem fs = FileSystem.get(conf); Path hfileDir = new Path("/texaspete/ev/tmp", UUID.randomUUID().toString()); hfileDir = hfileDir.makeQualified(fs); LOG.info("Hashes will be written temporarily to " + hfileDir); HFileOutputFormat.setOutputPath(job, hfileDir); final Path extp = new Path(extentsPath); final URI extents = extp.toUri(); LOG.info("extents file is " + extents); DistributedCache.addCacheFile(extents, conf); conf.set("com.lbt.extentsname", extp.getName()); // job.getConfiguration().setBoolean("mapred.task.profile", true); // job.getConfiguration().setBoolean("mapreduce.task.profile", true); HBaseTables.summon(conf, HBaseTables.HASH_TBL_B, HBaseTables.HASH_COLFAM_B); HBaseTables.summon(conf, HBaseTables.ENTRIES_TBL_B, HBaseTables.ENTRIES_COLFAM_B); final boolean result = job.waitForCompletion(true); if (result) { LoadIncrementalHFiles loader = new LoadIncrementalHFiles(conf); HBaseConfiguration.addHbaseResources(conf); loader.setConf(conf); LOG.info("Loading hashes into hbase"); chmodR(fs, hfileDir); loader.doBulkLoad(hfileDir, new HTable(conf, HBaseTables.HASH_TBL_B)); // result = fs.delete(hfileDir, true); } return result ? 0 : 1; }