List of usage examples for org.apache.hadoop.fs Path getName
public String getName()
From source file:cmd.download.java
License:Apache License
private void mergeToLocalFile2(FileSystem fs, Path src, String outPath, Configuration configuration) throws FileNotFoundException, IOException { // Find all the right paths and copy .gz files locally FileStatus[] status = fs.listStatus(src); Map<String, Path> paths = new TreeMap<String, Path>(); for (FileStatus fileStatus : status) { Path path = fileStatus.getPath(); String pathName = path.getName(); if (pathName.startsWith(Constants.NAME_FOURTH)) { paths.put(pathName, path);// w ww. j ava2 s. c om } } for (String pathName : paths.keySet()) { Path path = new Path(src, paths.get(pathName)); status = fs.listStatus(path); for (FileStatus fileStatus : status) { Path p = fileStatus.getPath(); log.debug("Copying {} to {}...", p.toUri(), outPath); fs.copyToLocalFile(p, new Path(outPath, p.getName())); } } // Merge .gz files into indexName.gz File fileOutputPath = new File(outPath); File[] files = fileOutputPath.listFiles(new FileFilter() { @Override public boolean accept(File pathname) { return pathname.getName().endsWith(".gz"); } }); Arrays.sort(files); String prevIndexName = null; OutputStream out = null; for (File file : files) { log.debug("Processing {}... ", file.getName()); String indexName = file.getName().substring(0, file.getName().indexOf("_")); if (prevIndexName == null) prevIndexName = indexName; if (out == null) out = new GZIPOutputStream(new FileOutputStream(new File(outPath, indexName + ".gz"))); if (!prevIndexName.equals(indexName)) { if (out != null) out.close(); log.debug("Index name set to {}", indexName); out = new GZIPOutputStream(new FileOutputStream(new File(outPath, indexName + ".gz"))); } InputStream in = new GZIPInputStream(new FileInputStream(file)); log.debug("Copying {} into {}.gz ...", file.getName(), indexName); IOUtils.copyBytes(in, out, 8192, false); in.close(); file.delete(); prevIndexName = indexName; } if (out != null) out.close(); // build B+Tree indexes Location location = new Location(outPath); for (String idxName : Constants.indexNames) { log.debug("Creating {} index...", idxName); String indexFilename = location.absolute(idxName, "gz"); if (new File(indexFilename).exists()) { new File(outPath, idxName + ".dat").delete(); new File(outPath, idxName + ".idn").delete(); CmdIndexBuild.main(location.getDirectoryPath(), idxName, indexFilename); // To save some disk space new File(indexFilename).delete(); } } }
From source file:cmd.tdbloader4.java
License:Apache License
private void createOffsetsFile(FileSystem fs, String input, String output) throws IOException { log.debug("Creating offsets file..."); Map<Long, Long> offsets = new TreeMap<Long, Long>(); FileStatus[] status = fs.listStatus(new Path(input)); for (FileStatus fileStatus : status) { Path file = fileStatus.getPath(); if (file.getName().startsWith("part-r-")) { log.debug("Processing: {}", file.getName()); BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(file))); String line = in.readLine(); String[] tokens = line.split("\\s"); long partition = Long.valueOf(tokens[0]); long offset = Long.valueOf(tokens[1]); log.debug("Partition {} has offset {}", partition, offset); offsets.put(partition, offset); }//from w w w.ja va2 s. c o m } Path outputPath = new Path(output, Constants.OFFSETS_FILENAME); PrintWriter out = new PrintWriter(new OutputStreamWriter(fs.create(outputPath))); for (Long partition : offsets.keySet()) { out.println(partition + "\t" + offsets.get(partition)); } out.close(); log.debug("Offset file created."); }
From source file:cn.spark.Case.MyMultipleOutputFormat.java
License:Apache License
/** * Generate the outfile name based on a given anme and the input file name. * If the map input file does not exists (i.e. this is not for a map only * job), the given name is returned unchanged. If the config value for * "num.of.trailing.legs.to.use" is not set, or set 0 or negative, the given * name is returned unchanged. Otherwise, return a file name consisting of * the N trailing legs of the input file name where N is the config value * for "num.of.trailing.legs.to.use".//from w w w .j a v a 2s . com * * @param job * the job config * @param name * the output file name * @return the outfile name based on a given anme and the input file name. */ protected String getInputFileBasedOutputFileName(JobConf job, String name) { String infilepath = job.get("map.input.file"); if (infilepath == null) { // if the map input file does not exists, then return the given name return name; } int numOfTrailingLegsToUse = job.getInt("mapred.outputformat.numOfTrailingLegs", 0); if (numOfTrailingLegsToUse <= 0) { return name; } Path infile = new Path(infilepath); Path parent = infile.getParent(); String midName = infile.getName(); Path outPath = new Path(midName); for (int i = 1; i < numOfTrailingLegsToUse; i++) { if (parent == null) break; midName = parent.getName(); if (midName.length() == 0) break; parent = parent.getParent(); outPath = new Path(midName, outPath); } return outPath.toString(); }
From source file:cn.uc.hadoop.mapreduce.lib.input.FileNameLineRecordReader.java
License:Apache License
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException { FileSplit split = (FileSplit) genericSplit; Configuration job = context.getConfiguration(); this.maxLineLength = job.getInt(MAX_LINE_LENGTH, Integer.MAX_VALUE); start = split.getStart();/* ww w . j a v a 2s . c o m*/ end = start + split.getLength(); final Path file = split.getPath(); //ADD by qiujw key?? key = new Text(file.getName()); compressionCodecs = new CompressionCodecFactory(job); codec = compressionCodecs.getCodec(file); // open the file and seek to the start of the split final FileSystem fs = file.getFileSystem(job); fileIn = fs.open(file); if (isCompressedInput()) { decompressor = CodecPool.getDecompressor(codec); if (codec instanceof SplittableCompressionCodec) { final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream( fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK); if (null == this.recordDelimiterBytes) { in = new LineReader(cIn, job); } else { in = new LineReader(cIn, job, this.recordDelimiterBytes); } start = cIn.getAdjustedStart(); end = cIn.getAdjustedEnd(); filePosition = cIn; } else { if (null == this.recordDelimiterBytes) { in = new LineReader(codec.createInputStream(fileIn, decompressor), job); } else { in = new LineReader(codec.createInputStream(fileIn, decompressor), job, this.recordDelimiterBytes); } filePosition = fileIn; } } else { fileIn.seek(start); if (null == this.recordDelimiterBytes) { in = new LineReader(fileIn, job); } else { in = new LineReader(fileIn, job, this.recordDelimiterBytes); } filePosition = fileIn; } // If this is not the first split, we always throw away first record // because we always (except the last split) read one extra line in // next() method. if (start != 0) { start += in.readLine(new Text(), 0, maxBytesToConsume(start)); } this.pos = start; }
From source file:co.cask.cdap.template.etl.common.BatchFileFilter.java
License:Apache License
@Override public boolean accept(Path path) { String filePathName = path.toString(); //The path filter will first check the directory if a directory is given if (filePathName.equals(pathName) || filePathName.equals(pathName + "/")) { return true; }/*from w w w . jav a2s .co m*/ //filter by file name using regex from configuration if (!useTimeFilter) { Matcher matcher = regex.matcher(filePathName); return matcher.matches(); } //use hourly time filter if (lastRead.equals("-1")) { String currentTime = sdf.format(prevHour); return filePathName.contains(currentTime); } //use stateful time filter Date fileDate; String filename = path.getName(); try { fileDate = sdf.parse(filename.substring(0, DATE_LENGTH)); } catch (Exception pe) { //Try to parse cloudfront format try { int startIndex = filename.indexOf(".") + 1; fileDate = sdf.parse(filename.substring(startIndex, startIndex + DATE_LENGTH)); } catch (Exception e) { LOG.warn("Couldn't parse file: " + filename); return false; } } return isWithinRange(fileDate); }
From source file:co.cask.hydrator.plugin.batch.action.FileAction.java
License:Apache License
@SuppressWarnings("ConstantConditions") @Override//from w w w . ja v a 2s . c o m public void run(BatchActionContext context) throws Exception { if (!config.shouldRun(context)) { return; } config.substituteMacros(context); Job job = JobUtils.createInstance(); Configuration conf = job.getConfiguration(); FileSystem fileSystem = FileSystem.get(conf); Path[] paths; Path sourcePath = new Path(config.path); if (fileSystem.isDirectory(sourcePath)) { FileStatus[] status = fileSystem.listStatus(sourcePath); paths = FileUtil.stat2Paths(status); } else { paths = new Path[] { sourcePath }; } //get regex pattern for file name filtering. boolean patternSpecified = !Strings.isNullOrEmpty(config.pattern); if (patternSpecified) { regex = Pattern.compile(config.pattern); } switch (config.action.toLowerCase()) { case "delete": for (Path path : paths) { if (!patternSpecified || isFileNameMatch(path.getName())) { fileSystem.delete(path, true); } } break; case "move": for (Path path : paths) { if (!patternSpecified || isFileNameMatch(path.getName())) { Path targetFileMovePath = new Path(config.targetFolder, path.getName()); fileSystem.rename(path, targetFileMovePath); } } break; case "archive": for (Path path : paths) { if (!patternSpecified || isFileNameMatch(path.getName())) { try (FSDataOutputStream archivedStream = fileSystem .create(new Path(config.targetFolder, path.getName() + ".zip")); ZipOutputStream zipArchivedStream = new ZipOutputStream(archivedStream); FSDataInputStream fdDataInputStream = fileSystem.open(path)) { zipArchivedStream.putNextEntry(new ZipEntry(path.getName())); int length; byte[] buffer = new byte[1024]; while ((length = fdDataInputStream.read(buffer)) > 0) { zipArchivedStream.write(buffer, 0, length); } zipArchivedStream.closeEntry(); } fileSystem.delete(path, true); } } break; default: LOG.warn("No action required on the file."); break; } }
From source file:co.cask.hydrator.plugin.common.BatchFileFilter.java
License:Apache License
@Override public boolean accept(Path path) { String filePathName = path.toString(); //The path filter will first check the directory if a directory is given if (filePathName.equals(pathName) || filePathName.equals(pathName + "/")) { return true; }//from w ww . j a v a 2s . com //filter by file name using regex from configuration if (!useTimeFilter) { String fileName = path.getName(); Matcher matcher = regex.matcher(fileName); return matcher.matches(); } //use hourly time filter if (lastRead.equals("-1")) { String currentTime = sdf.format(prevHour); return filePathName.contains(currentTime); } //use stateful time filter Date fileDate; String filename = path.getName(); try { fileDate = sdf.parse(filename.substring(0, DATE_LENGTH)); } catch (Exception pe) { //Try to parse cloudfront format try { int startIndex = filename.indexOf(".") + 1; fileDate = sdf.parse(filename.substring(startIndex, startIndex + DATE_LENGTH)); } catch (Exception e) { LOG.warn("Couldn't parse file: " + filename); return false; } } return isWithinRange(fileDate); }
From source file:co.cask.hydrator.plugin.common.BatchXMLFileFilter.java
License:Apache License
@Override public boolean accept(Path path) { String filePathName = path.toString(); //The path filter will first check the directory if a directory is given if (filePathName.equals(pathName)) { return true; }//from ww w .ja v a 2 s. c o m Matcher matcher = regex.matcher(path.getName()); boolean patternMatch = matcher.find(); if (patternMatch && CollectionUtils.isNotEmpty(preProcessedFileList)) { patternMatch = !preProcessedFileList.contains(filePathName); } return patternMatch; }
From source file:co.nubetech.hiho.job.TestExportToOracleDb.java
License:Apache License
@Test public void testAlterTableDMl() throws HIHOException, IOException { Configuration conf = mock(Configuration.class); Path path = mock(Path.class); FileStatus status1 = mock(FileStatus.class); Path path1 = mock(Path.class); when(path1.getName()).thenReturn("part-xxxxx"); when(status1.getPath()).thenReturn(path1); FileStatus status2 = mock(FileStatus.class); Path path2 = mock(Path.class); when(path2.getName()).thenReturn("part-yyyyy"); when(status2.getPath()).thenReturn(path2); FileSystem fs = mock(FileSystem.class); when(fs.listStatus(path)).thenReturn(new FileStatus[] { status1, status2 }); when(path.getFileSystem(conf)).thenReturn(fs); when(conf.get(HIHOConf.EXTERNAL_TABLE_DML)).thenReturn( "create table age( i Number, n Varchar(20), a Number)organization external ( type oracle_loader default directory ext_dir access parameters (records delimited by newlinefields terminated by ','missing field values are null )location (/home/nube/:file.txt) reject' limit unlimited;"); String dml = ExportToOracleDb.getAlterTableDML(path, conf); assertEquals(" ALTER TABLE age LOCATION ('part-xxxxx','part-yyyyy')", dml); }
From source file:co.nubetech.hiho.mapred.input.FileStreamRecordReader.java
License:Apache License
@Override public FSDataInputStream createValue() { logger.debug("Creating value"); FSDataInputStream stream = null;// www. j a v a 2s . c om Path file = split.getPath(); logger.debug("Path is " + file); fileName = file.getName(); try { FileSystem fs = file.getFileSystem(configuration); stream = new FSDataInputStream(fs.open(file)); } catch (IOException e) { e.printStackTrace(); } logger.debug("Opened stream"); return stream; }