Example usage for org.apache.hadoop.fs Path getName

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path getName.

Prototype

public String getName()

Source Link

Document

Returns the final component of this path.

Usage

From source file:cmd.download.java

License:Apache License

private void mergeToLocalFile2(FileSystem fs, Path src, String outPath, Configuration configuration)
        throws FileNotFoundException, IOException {
    // Find all the right paths and copy .gz files locally
    FileStatus[] status = fs.listStatus(src);
    Map<String, Path> paths = new TreeMap<String, Path>();
    for (FileStatus fileStatus : status) {
        Path path = fileStatus.getPath();
        String pathName = path.getName();
        if (pathName.startsWith(Constants.NAME_FOURTH)) {
            paths.put(pathName, path);//  w ww. j  ava2 s.  c  om
        }
    }

    for (String pathName : paths.keySet()) {
        Path path = new Path(src, paths.get(pathName));
        status = fs.listStatus(path);
        for (FileStatus fileStatus : status) {
            Path p = fileStatus.getPath();
            log.debug("Copying {} to {}...", p.toUri(), outPath);
            fs.copyToLocalFile(p, new Path(outPath, p.getName()));
        }
    }

    // Merge .gz files into indexName.gz
    File fileOutputPath = new File(outPath);
    File[] files = fileOutputPath.listFiles(new FileFilter() {
        @Override
        public boolean accept(File pathname) {
            return pathname.getName().endsWith(".gz");
        }
    });
    Arrays.sort(files);
    String prevIndexName = null;
    OutputStream out = null;
    for (File file : files) {
        log.debug("Processing {}... ", file.getName());
        String indexName = file.getName().substring(0, file.getName().indexOf("_"));
        if (prevIndexName == null)
            prevIndexName = indexName;
        if (out == null)
            out = new GZIPOutputStream(new FileOutputStream(new File(outPath, indexName + ".gz")));
        if (!prevIndexName.equals(indexName)) {
            if (out != null)
                out.close();
            log.debug("Index name set to {}", indexName);
            out = new GZIPOutputStream(new FileOutputStream(new File(outPath, indexName + ".gz")));
        }
        InputStream in = new GZIPInputStream(new FileInputStream(file));
        log.debug("Copying {} into {}.gz ...", file.getName(), indexName);
        IOUtils.copyBytes(in, out, 8192, false);
        in.close();
        file.delete();
        prevIndexName = indexName;
    }
    if (out != null)
        out.close();

    // build B+Tree indexes
    Location location = new Location(outPath);
    for (String idxName : Constants.indexNames) {
        log.debug("Creating {} index...", idxName);
        String indexFilename = location.absolute(idxName, "gz");
        if (new File(indexFilename).exists()) {
            new File(outPath, idxName + ".dat").delete();
            new File(outPath, idxName + ".idn").delete();
            CmdIndexBuild.main(location.getDirectoryPath(), idxName, indexFilename);
            // To save some disk space
            new File(indexFilename).delete();
        }
    }
}

From source file:cmd.tdbloader4.java

License:Apache License

private void createOffsetsFile(FileSystem fs, String input, String output) throws IOException {
    log.debug("Creating offsets file...");
    Map<Long, Long> offsets = new TreeMap<Long, Long>();
    FileStatus[] status = fs.listStatus(new Path(input));
    for (FileStatus fileStatus : status) {
        Path file = fileStatus.getPath();
        if (file.getName().startsWith("part-r-")) {
            log.debug("Processing: {}", file.getName());
            BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(file)));
            String line = in.readLine();
            String[] tokens = line.split("\\s");
            long partition = Long.valueOf(tokens[0]);
            long offset = Long.valueOf(tokens[1]);
            log.debug("Partition {} has offset {}", partition, offset);
            offsets.put(partition, offset);
        }//from  w  w  w.ja va2  s.  c  o  m
    }

    Path outputPath = new Path(output, Constants.OFFSETS_FILENAME);
    PrintWriter out = new PrintWriter(new OutputStreamWriter(fs.create(outputPath)));
    for (Long partition : offsets.keySet()) {
        out.println(partition + "\t" + offsets.get(partition));
    }
    out.close();
    log.debug("Offset file created.");
}

From source file:cn.spark.Case.MyMultipleOutputFormat.java

License:Apache License

/**
 * Generate the outfile name based on a given anme and the input file name.
 * If the map input file does not exists (i.e. this is not for a map only
 * job), the given name is returned unchanged. If the config value for
 * "num.of.trailing.legs.to.use" is not set, or set 0 or negative, the given
 * name is returned unchanged. Otherwise, return a file name consisting of
 * the N trailing legs of the input file name where N is the config value
 * for "num.of.trailing.legs.to.use".//from   w  w w .j a v  a 2s .  com
 * 
 * @param job
 *            the job config
 * @param name
 *            the output file name
 * @return the outfile name based on a given anme and the input file name.
 */
protected String getInputFileBasedOutputFileName(JobConf job, String name) {
    String infilepath = job.get("map.input.file");
    if (infilepath == null) {
        // if the map input file does not exists, then return the given name
        return name;
    }
    int numOfTrailingLegsToUse = job.getInt("mapred.outputformat.numOfTrailingLegs", 0);
    if (numOfTrailingLegsToUse <= 0) {
        return name;
    }
    Path infile = new Path(infilepath);
    Path parent = infile.getParent();
    String midName = infile.getName();
    Path outPath = new Path(midName);
    for (int i = 1; i < numOfTrailingLegsToUse; i++) {
        if (parent == null)
            break;
        midName = parent.getName();
        if (midName.length() == 0)
            break;
        parent = parent.getParent();
        outPath = new Path(midName, outPath);
    }
    return outPath.toString();
}

From source file:cn.uc.hadoop.mapreduce.lib.input.FileNameLineRecordReader.java

License:Apache License

public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    this.maxLineLength = job.getInt(MAX_LINE_LENGTH, Integer.MAX_VALUE);
    start = split.getStart();/*  ww w .  j  a  v a  2s . c o  m*/
    end = start + split.getLength();
    final Path file = split.getPath();
    //ADD by qiujw key??
    key = new Text(file.getName());

    compressionCodecs = new CompressionCodecFactory(job);
    codec = compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    final FileSystem fs = file.getFileSystem(job);
    fileIn = fs.open(file);
    if (isCompressedInput()) {
        decompressor = CodecPool.getDecompressor(codec);
        if (codec instanceof SplittableCompressionCodec) {
            final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream(
                    fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK);
            if (null == this.recordDelimiterBytes) {
                in = new LineReader(cIn, job);
            } else {
                in = new LineReader(cIn, job, this.recordDelimiterBytes);
            }

            start = cIn.getAdjustedStart();
            end = cIn.getAdjustedEnd();
            filePosition = cIn;
        } else {
            if (null == this.recordDelimiterBytes) {
                in = new LineReader(codec.createInputStream(fileIn, decompressor), job);
            } else {
                in = new LineReader(codec.createInputStream(fileIn, decompressor), job,
                        this.recordDelimiterBytes);
            }
            filePosition = fileIn;
        }
    } else {
        fileIn.seek(start);
        if (null == this.recordDelimiterBytes) {
            in = new LineReader(fileIn, job);
        } else {
            in = new LineReader(fileIn, job, this.recordDelimiterBytes);
        }

        filePosition = fileIn;
    }
    // If this is not the first split, we always throw away first record
    // because we always (except the last split) read one extra line in
    // next() method.
    if (start != 0) {
        start += in.readLine(new Text(), 0, maxBytesToConsume(start));
    }
    this.pos = start;
}

From source file:co.cask.cdap.template.etl.common.BatchFileFilter.java

License:Apache License

@Override
public boolean accept(Path path) {
    String filePathName = path.toString();
    //The path filter will first check the directory if a directory is given
    if (filePathName.equals(pathName) || filePathName.equals(pathName + "/")) {
        return true;
    }/*from  w  w  w . jav a2s  .co  m*/

    //filter by file name using regex from configuration
    if (!useTimeFilter) {
        Matcher matcher = regex.matcher(filePathName);
        return matcher.matches();
    }

    //use hourly time filter
    if (lastRead.equals("-1")) {
        String currentTime = sdf.format(prevHour);
        return filePathName.contains(currentTime);
    }

    //use stateful time filter
    Date fileDate;
    String filename = path.getName();
    try {
        fileDate = sdf.parse(filename.substring(0, DATE_LENGTH));
    } catch (Exception pe) {
        //Try to parse cloudfront format
        try {
            int startIndex = filename.indexOf(".") + 1;
            fileDate = sdf.parse(filename.substring(startIndex, startIndex + DATE_LENGTH));
        } catch (Exception e) {
            LOG.warn("Couldn't parse file: " + filename);
            return false;
        }
    }
    return isWithinRange(fileDate);
}

From source file:co.cask.hydrator.plugin.batch.action.FileAction.java

License:Apache License

@SuppressWarnings("ConstantConditions")
@Override//from  w w  w  . ja  v a 2s  . c o  m
public void run(BatchActionContext context) throws Exception {
    if (!config.shouldRun(context)) {
        return;
    }
    config.substituteMacros(context);

    Job job = JobUtils.createInstance();
    Configuration conf = job.getConfiguration();
    FileSystem fileSystem = FileSystem.get(conf);
    Path[] paths;
    Path sourcePath = new Path(config.path);
    if (fileSystem.isDirectory(sourcePath)) {
        FileStatus[] status = fileSystem.listStatus(sourcePath);
        paths = FileUtil.stat2Paths(status);
    } else {
        paths = new Path[] { sourcePath };
    }

    //get regex pattern for file name filtering.
    boolean patternSpecified = !Strings.isNullOrEmpty(config.pattern);
    if (patternSpecified) {
        regex = Pattern.compile(config.pattern);
    }

    switch (config.action.toLowerCase()) {
    case "delete":
        for (Path path : paths) {
            if (!patternSpecified || isFileNameMatch(path.getName())) {
                fileSystem.delete(path, true);
            }
        }
        break;
    case "move":
        for (Path path : paths) {
            if (!patternSpecified || isFileNameMatch(path.getName())) {
                Path targetFileMovePath = new Path(config.targetFolder, path.getName());
                fileSystem.rename(path, targetFileMovePath);
            }
        }
        break;
    case "archive":
        for (Path path : paths) {
            if (!patternSpecified || isFileNameMatch(path.getName())) {
                try (FSDataOutputStream archivedStream = fileSystem
                        .create(new Path(config.targetFolder, path.getName() + ".zip"));
                        ZipOutputStream zipArchivedStream = new ZipOutputStream(archivedStream);
                        FSDataInputStream fdDataInputStream = fileSystem.open(path)) {
                    zipArchivedStream.putNextEntry(new ZipEntry(path.getName()));
                    int length;
                    byte[] buffer = new byte[1024];
                    while ((length = fdDataInputStream.read(buffer)) > 0) {
                        zipArchivedStream.write(buffer, 0, length);
                    }
                    zipArchivedStream.closeEntry();
                }
                fileSystem.delete(path, true);
            }
        }
        break;
    default:
        LOG.warn("No action required on the file.");
        break;
    }
}

From source file:co.cask.hydrator.plugin.common.BatchFileFilter.java

License:Apache License

@Override
public boolean accept(Path path) {
    String filePathName = path.toString();
    //The path filter will first check the directory if a directory is given
    if (filePathName.equals(pathName) || filePathName.equals(pathName + "/")) {
        return true;
    }//from w ww  . j a  v  a 2s  .  com

    //filter by file name using regex from configuration
    if (!useTimeFilter) {
        String fileName = path.getName();
        Matcher matcher = regex.matcher(fileName);
        return matcher.matches();
    }

    //use hourly time filter
    if (lastRead.equals("-1")) {
        String currentTime = sdf.format(prevHour);
        return filePathName.contains(currentTime);
    }

    //use stateful time filter
    Date fileDate;
    String filename = path.getName();
    try {
        fileDate = sdf.parse(filename.substring(0, DATE_LENGTH));
    } catch (Exception pe) {
        //Try to parse cloudfront format
        try {
            int startIndex = filename.indexOf(".") + 1;
            fileDate = sdf.parse(filename.substring(startIndex, startIndex + DATE_LENGTH));
        } catch (Exception e) {
            LOG.warn("Couldn't parse file: " + filename);
            return false;
        }
    }
    return isWithinRange(fileDate);
}

From source file:co.cask.hydrator.plugin.common.BatchXMLFileFilter.java

License:Apache License

@Override
public boolean accept(Path path) {
    String filePathName = path.toString();
    //The path filter will first check the directory if a directory is given
    if (filePathName.equals(pathName)) {
        return true;
    }//from ww w  .ja v  a  2 s.  c  o  m
    Matcher matcher = regex.matcher(path.getName());
    boolean patternMatch = matcher.find();
    if (patternMatch && CollectionUtils.isNotEmpty(preProcessedFileList)) {
        patternMatch = !preProcessedFileList.contains(filePathName);
    }
    return patternMatch;
}

From source file:co.nubetech.hiho.job.TestExportToOracleDb.java

License:Apache License

@Test
public void testAlterTableDMl() throws HIHOException, IOException {
    Configuration conf = mock(Configuration.class);
    Path path = mock(Path.class);
    FileStatus status1 = mock(FileStatus.class);
    Path path1 = mock(Path.class);
    when(path1.getName()).thenReturn("part-xxxxx");
    when(status1.getPath()).thenReturn(path1);
    FileStatus status2 = mock(FileStatus.class);
    Path path2 = mock(Path.class);
    when(path2.getName()).thenReturn("part-yyyyy");
    when(status2.getPath()).thenReturn(path2);
    FileSystem fs = mock(FileSystem.class);
    when(fs.listStatus(path)).thenReturn(new FileStatus[] { status1, status2 });
    when(path.getFileSystem(conf)).thenReturn(fs);
    when(conf.get(HIHOConf.EXTERNAL_TABLE_DML)).thenReturn(
            "create table age(  i   Number,  n   Varchar(20),  a   Number)organization external (  type  oracle_loader default directory ext_dir access parameters (records delimited  by newlinefields  terminated by ','missing field values are null )location  (/home/nube/:file.txt) reject' limit unlimited;");
    String dml = ExportToOracleDb.getAlterTableDML(path, conf);
    assertEquals(" ALTER TABLE age LOCATION ('part-xxxxx','part-yyyyy')", dml);
}

From source file:co.nubetech.hiho.mapred.input.FileStreamRecordReader.java

License:Apache License

@Override
public FSDataInputStream createValue() {
    logger.debug("Creating value");
    FSDataInputStream stream = null;//  www.  j a  v a 2s  . c om
    Path file = split.getPath();
    logger.debug("Path is " + file);
    fileName = file.getName();
    try {
        FileSystem fs = file.getFileSystem(configuration);
        stream = new FSDataInputStream(fs.open(file));
    } catch (IOException e) {
        e.printStackTrace();
    }
    logger.debug("Opened stream");
    return stream;
}