Example usage for org.apache.hadoop.fs Path getName

List of usage examples for org.apache.hadoop.fs Path getName

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path getName.

Prototype

public String getName() 

Source Link

Document

Returns the final component of this path.

Usage

From source file:com.inmobi.databus.distcp.MirrorStreamService.java

License:Apache License

private void createCommitPaths(LinkedHashMap<FileStatus, Path> commitPaths, List<FileStatus> streamPaths) {
    /*  Path eg in streamPaths -
     *  /databus/system/distcp_mirror_<srcCluster>_<destCluster>/databus/streams
     *  /<streamName>/2012/1/13/15/7/<hostname>-<streamName>-2012-01-16-07
     *  -21_00000.gz/*from w ww.  j a v  a 2s .  c  o  m*/
     *
     * or it could be an emptyDir like
     *  /* Path eg in streamPaths -
     *  /databus/system/distcp_mirror_<srcCluster>_<destCluster>/databus/streams
     *  /<streamName>/2012/1/13/15/7/
     *
     */

    for (FileStatus fileStatus : streamPaths) {
        String fileName = null;

        Path prefixDir = null;
        if (fileStatus.isDir()) {
            //empty directory
            prefixDir = fileStatus.getPath();
        } else {
            fileName = fileStatus.getPath().getName();
            prefixDir = fileStatus.getPath().getParent();
        }

        Path min = prefixDir;
        Path hr = min.getParent();
        Path day = hr.getParent();
        Path month = day.getParent();
        Path year = month.getParent();
        Path streamName = year.getParent();

        String finalPath = getDestCluster().getFinalDestDirRoot() + File.separator + streamName.getName()
                + File.separator + year.getName() + File.separator + month.getName() + File.separator
                + day.getName() + File.separator + hr.getName() + File.separator + min.getName();

        if (fileName != null) {
            finalPath += File.separator + fileName;
        }

        commitPaths.put(fileStatus, new Path(finalPath));
        LOG.debug("Going to commit [" + fileStatus.getPath() + "] to [" + finalPath + "]");
    }

}

From source file:com.inmobi.databus.local.CopyMapper.java

License:Apache License

@Override
public void map(Text key, Text value, Context context) throws IOException, InterruptedException {
    Path src = new Path(key.toString());
    String dest = value.toString();
    String collector = src.getParent().getName();
    String category = src.getParent().getParent().getName();

    FileSystem fs = FileSystem.get(context.getConfiguration());
    Path target = getTempPath(context, src, category, collector);
    FileUtil.gzip(src, target, context.getConfiguration());
    // move to final destination
    fs.mkdirs(new Path(dest).makeQualified(fs));
    Path destPath = new Path(dest + File.separator + collector + "-" + src.getName() + ".gz");
    LOG.info("Renaming file " + target + " to " + destPath);
    fs.rename(target, destPath);/*from www. j  a  v a  2  s.  co m*/

}

From source file:com.inmobi.databus.purge.DataPurgerServiceTest.java

License:Apache License

private void createTestPurgefiles(FileSystem fs, Cluster cluster, Calendar date) throws Exception {
    for (String streamname : cluster.getSourceStreams()) {
        String[] files = new String[NUM_OF_FILES];
        String datapath = Cluster.getDateAsYYYYMMDDHHMNPath(date.getTime());
        String commitpath = cluster.getLocalFinalDestDirRoot() + File.separator + streamname + File.separator
                + datapath;//  w w w.  j  a  v  a 2  s .  c  om
        String mergecommitpath = cluster.getFinalDestDirRoot() + File.separator + streamname + File.separator
                + datapath;
        String trashpath = cluster.getTrashPath() + File.separator + CalendarHelper.getDateAsString(date)
                + File.separator;
        fs.mkdirs(new Path(commitpath));

        for (int j = 0; j < NUM_OF_FILES; ++j) {
            files[j] = new String(cluster.getName() + "-"
                    + TestLocalStreamService.getDateAsYYYYMMDDHHmm(new Date()) + "_" + idFormat.format(j));
            {
                Path path = new Path(commitpath + File.separator + files[j]);
                // LOG.info("Creating streams_local File " + path.getName());
                FSDataOutputStream streamout = fs.create(path);
                streamout.writeBytes("Creating Test data for teststream " + path.toString());
                streamout.close();
                Assert.assertTrue(fs.exists(path));
            }
            {
                Path path = new Path(mergecommitpath + File.separator + files[j]);
                // LOG.info("Creating streams File " + path.getName());
                FSDataOutputStream streamout = fs.create(path);
                streamout.writeBytes("Creating Test data for teststream " + path.toString());
                streamout.close();
                Assert.assertTrue(fs.exists(path));
            }

            {
                Path path = new Path(trashpath + File.separator + String.valueOf(date.get(Calendar.HOUR_OF_DAY))
                        + File.separator + files[j]);
                // LOG.info("Creating trash File " + path.toString());
                FSDataOutputStream streamout = fs.create(path);
                streamout.writeBytes("Creating Test trash data for teststream " + path.getName());
                streamout.close();
                Assert.assertTrue(fs.exists(path));
            }
        }
    }

}

From source file:com.inmobi.databus.readers.CollectorStreamReader.java

License:Apache License

protected FileMap<CollectorFile> createFileMap() throws IOException {
    return new FileMap<CollectorFile>() {

        @Override//from   w  w w .j av  a  2s.  co  m
        protected PathFilter createPathFilter() {
            return new PathFilter() {
                @Override
                public boolean accept(Path p) {
                    if (p.getName().endsWith("_current") || p.getName().endsWith("_stats")) {
                        return false;
                    }
                    return true;
                }
            };
        }

        /*
         * prepare a fileMap with files which are beyond the stopTime
         */
        @Override
        protected void buildList() throws IOException {
            if (fsIsPathExists(streamDir)) {
                FileStatus[] fileStatuses = fsListFileStatus(streamDir, pathFilter);
                if (fileStatuses == null || fileStatuses.length == 0) {
                    LOG.info("No files in directory:" + streamDir);
                    return;
                }
                if (stopTime == null) {
                    for (FileStatus file : fileStatuses) {
                        addPath(file);
                    }
                } else {
                    for (FileStatus file : fileStatuses) {
                        Date currentTimeStamp = getDateFromCollectorFile(file.getPath().getName());
                        if (stopTime.before(currentTimeStamp)) {
                            stopListing();
                            continue;
                        }
                        addPath(file);
                    }
                }
            } else {
                LOG.info("Collector directory does not exist");
            }
        }

        @Override
        protected TreeMap<CollectorFile, FileStatus> createFilesMap() {
            return new TreeMap<CollectorFile, FileStatus>();
        }

        @Override
        protected CollectorFile getStreamFile(String fileName) {
            return CollectorFile.create(fileName);
        }

        @Override
        protected CollectorFile getStreamFile(FileStatus file) {
            return CollectorFile.create(file.getPath().getName());
        }
    };
}

From source file:com.inmobi.databus.readers.DatabusStreamWaitingReader.java

License:Apache License

@Override
protected FileMap<HadoopStreamFile> createFileMap() throws IOException {
    return new FileMap<HadoopStreamFile>() {
        @Override/*from   ww w . j av a 2  s .c  om*/
        protected void buildList() throws IOException {
            buildListing(this, pathFilter);
        }

        @Override
        protected TreeMap<HadoopStreamFile, FileStatus> createFilesMap() {
            return new TreeMap<HadoopStreamFile, FileStatus>();
        }

        @Override
        protected HadoopStreamFile getStreamFile(String fileName) {
            throw new RuntimeException("Not implemented");
        }

        @Override
        protected HadoopStreamFile getStreamFile(FileStatus file) {
            return HadoopStreamFile.create(file);
        }

        @Override
        protected PathFilter createPathFilter() {
            return new PathFilter() {
                @Override
                public boolean accept(Path path) {
                    if (path.getName().startsWith("_")) {
                        return false;
                    }
                    return true;
                }
            };
        }
    };
}

From source file:com.inmobi.databus.readers.LocalStreamCollectorReader.java

License:Apache License

public FileMap<DatabusStreamFile> createFileMap() throws IOException {
    return new FileMap<DatabusStreamFile>() {
        @Override/*from  www .j  a v a 2s .  c  o m*/
        protected void buildList() throws IOException {
            buildListing(this, pathFilter);
        }

        @Override
        protected TreeMap<DatabusStreamFile, FileStatus> createFilesMap() {
            return new TreeMap<DatabusStreamFile, FileStatus>();
        }

        @Override
        protected DatabusStreamFile getStreamFile(String fileName) {
            return DatabusStreamFile.create(streamName, fileName);
        }

        @Override
        protected DatabusStreamFile getStreamFile(FileStatus file) {
            return DatabusStreamFile.create(streamName, file.getPath().getName());
        }

        @Override
        protected PathFilter createPathFilter() {
            return new PathFilter() {
                @Override
                public boolean accept(Path p) {
                    if (p.getName().startsWith(collector)) {
                        return true;
                    }
                    return false;
                }
            };
        }
    };
}

From source file:com.intel.hibench.datagen.streaming.util.SourceFileReader.java

License:Apache License

static private InputStream openMultipleParts(FileSystem fs, Path pt, long offset) throws IOException {

    System.out.println("opening all parts in path: " + pt + ", from offset: " + offset);
    // list all files in given path
    RemoteIterator<LocatedFileStatus> rit = fs.listFiles(pt, false);
    Vector<FSDataInputStream> fileHandleList = new Vector<FSDataInputStream>();
    while (rit.hasNext()) {
        Path path = rit.next().getPath();

        // Only read those files start with "part-"
        if (path.getName().startsWith("part-")) {
            long fileSize = fs.getFileStatus(path).getLen();
            if (offset < fileSize) {
                FSDataInputStream inputStream = fs.open(path);
                if (offset > 0) {
                    inputStream.seek(offset);
                }/*w ww  .j  a v a2 s.c  om*/
                fileHandleList.add(inputStream);
            }
            offset -= fileSize;
        }
    }

    if (!fileHandleList.isEmpty()) {
        return new SequenceInputStream(fileHandleList.elements());
    } else {
        System.err.println("Error, no source file loaded. run genSeedDataset.sh first!");
        return null;
    }

}

From source file:com.jkoolcloud.tnt4j.streams.inputs.HdfsFileLineStream.java

License:Apache License

/**
 * Searches for files matching name pattern. Name pattern also may contain path of directory, where file search
 * should be performed, e.g., C:/Tomcat/logs/localhost_access_log.*.txt. If no path is defined (just file name
 * pattern) then files are searched in {@code System.getProperty("user.dir")}. Files array is ordered by file create
 * timestamp in descending order./*  w ww.j a v a  2  s . c  om*/
 *
 * @param path
 *            path of file
 * @param fs
 *            file system
 *
 * @return array of found files paths.
 * @throws IOException
 *             if files can't be listed by file system.
 *
 * @see FileSystem#listStatus(Path, PathFilter)
 * @see FilenameUtils#wildcardMatch(String, String, IOCase)
 */
public static Path[] searchFiles(Path path, FileSystem fs) throws IOException {
    FileStatus[] dir = fs.listStatus(path.getParent(), new PathFilter() {
        @Override
        public boolean accept(Path path) {
            String name = path.getName();
            return FilenameUtils.wildcardMatch(name, "*", IOCase.INSENSITIVE); // NON-NLS
        }
    });

    Path[] activityFiles = new Path[dir == null ? 0 : dir.length];
    if (dir != null) {
        Arrays.sort(dir, new Comparator<FileStatus>() {
            @Override
            public int compare(FileStatus o1, FileStatus o2) {
                return Long.valueOf(o1.getModificationTime()).compareTo(o2.getModificationTime()) * (-1);
            }
        });

        for (int i = 0; i < dir.length; i++) {
            activityFiles[i] = dir[i].getPath();
        }
    }

    return activityFiles;
}

From source file:com.knewton.mapreduce.SSTableRecordReader.java

License:Apache License

/**
 * Copies a remote path to the local filesystem, while updating hadoop that we're making
 * progress. Doesn't support directories.
 *//*from   w w w  . j  a  v  a2 s  .  c o  m*/
@VisibleForTesting
void copyToLocalFile(FileSystem remoteFS, FileSystem localFS, Path remote, Path local) throws IOException {
    // don't support transferring from remote directories
    FileStatus remoteStat = remoteFS.getFileStatus(remote);
    Preconditions.checkArgument(!remoteStat.isDirectory(), String.format("Path %s is directory!", remote));
    // if local is a dir, copy to inside that dir, like 'cp /path/file /tmp/' would do
    if (localFS.exists(local)) {
        FileStatus localStat = localFS.getFileStatus(local);
        if (localStat.isDirectory()) {
            local = new Path(local, remote.getName());
        }
    }
    long remoteFileSize = remoteStat.getLen();
    // do actual copy
    InputStream in = null;
    OutputStream out = null;
    try {
        long startTime = System.currentTimeMillis();
        long lastLogTime = 0;
        long bytesCopied = 0;
        in = remoteFS.open(remote);
        out = localFS.create(local, true);
        int buffSize = this.conf.getInt(CommonConfigurationKeys.IO_FILE_BUFFER_SIZE_KEY,
                CommonConfigurationKeys.IO_FILE_BUFFER_SIZE_DEFAULT);
        byte[] buf = new byte[buffSize];
        int bytesRead = in.read(buf);
        while (bytesRead >= 0) {
            long now = System.currentTimeMillis();
            // log transfer rate once per min, starting 1 min after transfer began
            if (now - lastLogTime > 60000L && now - startTime > 60000L) {
                double elapsedSec = (now - startTime) / 1000D;
                double bytesPerSec = bytesCopied / elapsedSec;
                LOG.info("Transferred {} of {} bytes at {} bytes per second", bytesCopied, remoteFileSize,
                        bytesPerSec);
                lastLogTime = now;
            }
            this.ctx.progress();
            out.write(buf, 0, bytesRead);
            bytesCopied += bytesRead;
            bytesRead = in.read(buf);
        }
        // try to close these outside of finally so we receive exception on failure
        out.close();
        out = null;
        in.close();
        in = null;
    } finally {
        // make sure everything's closed
        IOUtils.closeStream(out);
        IOUtils.closeStream(in);
    }
}

From source file:com.lightboxtechnologies.spectrum.ExtractData.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length != 4) {
        System.err.println("Usage: ExtractData <imageID> <friendly_name> <extents_file> <evidence file>");
        return 2;
    }//from   w w w.j a v  a2  s.  com

    final String imageID = args[0];
    final String friendlyName = args[1];
    final String extentsPath = args[2];
    final String image = args[3];

    Configuration conf = getConf();

    final Job job = SKJobFactory.createJobFromConf(imageID, friendlyName, "ExtractData", conf);
    job.setJarByClass(ExtractData.class);
    job.setMapperClass(ExtractDataMapper.class);
    job.setReducerClass(KeyValueSortReducer.class);
    job.setNumReduceTasks(1);

    // job ctor copies the Configuration we pass it, get the real one
    conf = job.getConfiguration();

    conf.setLong("timestamp", System.currentTimeMillis());

    job.setInputFormatClass(RawFileInputFormat.class);
    RawFileInputFormat.addInputPath(job, new Path(image));

    job.setOutputFormatClass(HFileOutputFormat.class);
    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(KeyValue.class);

    conf.setInt("mapreduce.job.jvm.numtasks", -1);

    final FileSystem fs = FileSystem.get(conf);
    Path hfileDir = new Path("/texaspete/ev/tmp", UUID.randomUUID().toString());
    hfileDir = hfileDir.makeQualified(fs);
    LOG.info("Hashes will be written temporarily to " + hfileDir);

    HFileOutputFormat.setOutputPath(job, hfileDir);

    final Path extp = new Path(extentsPath);
    final URI extents = extp.toUri();
    LOG.info("extents file is " + extents);

    DistributedCache.addCacheFile(extents, conf);
    conf.set("com.lbt.extentsname", extp.getName());
    // job.getConfiguration().setBoolean("mapred.task.profile", true);
    // job.getConfiguration().setBoolean("mapreduce.task.profile", true);

    HBaseTables.summon(conf, HBaseTables.HASH_TBL_B, HBaseTables.HASH_COLFAM_B);

    HBaseTables.summon(conf, HBaseTables.ENTRIES_TBL_B, HBaseTables.ENTRIES_COLFAM_B);

    final boolean result = job.waitForCompletion(true);
    if (result) {
        LoadIncrementalHFiles loader = new LoadIncrementalHFiles(conf);
        HBaseConfiguration.addHbaseResources(conf);
        loader.setConf(conf);
        LOG.info("Loading hashes into hbase");
        chmodR(fs, hfileDir);
        loader.doBulkLoad(hfileDir, new HTable(conf, HBaseTables.HASH_TBL_B));
        //      result = fs.delete(hfileDir, true);
    }
    return result ? 0 : 1;
}