Example usage for org.apache.hadoop.fs Path getName

List of usage examples for org.apache.hadoop.fs Path getName

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path getName.

Prototype

public String getName() 

Source Link

Document

Returns the final component of this path.

Usage

From source file:com.cloudera.sqoop.util.AppendUtils.java

License:Apache License

/**
 * Move files from source to target using a specified starting partition.
 *///from ww w . j a va2  s.  co m
private void moveFiles(FileSystem fs, Path sourceDir, Path targetDir, int partitionStart) throws IOException {

    NumberFormat numpart = NumberFormat.getInstance();
    numpart.setMinimumIntegerDigits(PARTITION_DIGITS);
    numpart.setGroupingUsed(false);
    Pattern patt = Pattern.compile("part.*-([0-9][0-9][0-9][0-9][0-9]).*");
    FileStatus[] tempFiles = fs.listStatus(sourceDir);

    if (null == tempFiles) {
        // If we've already checked that the dir exists, and now it can't be
        // listed, this is a genuine error (permissions, fs integrity, or other).
        throw new IOException("Could not list files from " + sourceDir);
    }

    // Move and rename files & directories from temporary to target-dir thus
    // appending file's next partition
    for (FileStatus fileStat : tempFiles) {
        if (!fileStat.isDir()) {
            // Move imported data files
            String filename = fileStat.getPath().getName();
            Matcher mat = patt.matcher(filename);
            if (mat.matches()) {
                String name = getFilename(filename);
                String fileToMove = name.concat(numpart.format(partitionStart++));
                String extension = getFileExtension(filename);
                if (extension != null) {
                    fileToMove = fileToMove.concat(extension);
                }
                LOG.debug("Filename: " + filename + " repartitioned to: " + fileToMove);
                fs.rename(fileStat.getPath(), new Path(targetDir, fileToMove));
            }
        } else {
            // Move directories (_logs & any other)
            String dirName = fileStat.getPath().getName();
            Path path = new Path(targetDir, dirName);
            int dirNumber = 0;
            while (fs.exists(path)) {
                path = new Path(targetDir, dirName.concat("-").concat(numpart.format(dirNumber++)));
            }
            LOG.debug("Directory: " + dirName + " renamed to: " + path.getName());
            fs.rename(fileStat.getPath(), path);
        }
    }
}

From source file:com.cloudera.training.metrics.JobHistoryHelper.java

License:Apache License

public static JobHistory.JobInfo getJobInfoFromLocalFile(String outputFile, Configuration conf)
        throws IOException {
    FileSystem fs = FileSystem.getLocal(conf);

    Path outputFilePath = new Path(outputFile);

    String[] jobDetails = JobHistory.JobInfo.decodeJobHistoryFileName(outputFilePath.getName()).split("_");
    String jobId = jobDetails[2] + "_" + jobDetails[3] + "_" + jobDetails[4];
    JobHistory.JobInfo job = new JobHistory.JobInfo(jobId);
    DefaultJobHistoryParser.parseJobTasks(outputFile, job, fs);
    return job;// ww  w  .  ja va  2 s  .c  om
}

From source file:com.conversantmedia.mapreduce.tool.DistributedResourceManager.java

License:Apache License

/**
 * Register this resource. If the resource is a simple property (i.e. primative or String),
 * it will place it in the configuration. Otherwise, it uses the distributed cache
 * mechanism as required./*from w ww .jav  a2 s .co m*/
 * 
 * @param key          the resource key. Usually a property/field name.
 * @param value       the resource.
 * @throws IOException   if resource cannot be serialized
 *
 */
public void registerResource(String key, Object value) throws IOException {
    if (value == null) {
        return;
    }

    String valueString = null;

    // First, determine our approach:
    if (value instanceof String) {
        valueString = (String) value;
    } else if (Primitives.isWrapperType(value.getClass())) {
        valueString = String.valueOf(value);
    }
    // If this is a Path or File object we'll place it
    // on the distributed cache
    else if (value instanceof Path) {
        Path path = (Path) value;
        valueString = path.getName();
        // Distribute the file the new way
        this.job.addCacheFile(path.toUri());
    } else if (value instanceof File) {
        File file = (File) value;
        valueString = file.getName();

        // Distribute the file
        distributeLocalFile(file);
    }
    // Check if it's serializable
    else if (value instanceof java.io.Serializable) {
        // Serialize the object and place it on the distributed cache
        ObjectOutputStream out = null;
        try {
            File beanSerFile = File.createTempFile(value.getClass().getName(), ".ser");
            FileOutputStream fileOut = new FileOutputStream(beanSerFile);
            out = new ObjectOutputStream(fileOut);
            out.writeObject(value);
            valueString = beanSerFile.getName();

            // Distribute the file
            distributeLocalFile(beanSerFile);
        } finally {
            IOUtils.closeQuietly(out);
        }
    } else {
        throw new IllegalArgumentException("Resource [" + key + "] is not serializable.");
    }

    // Setup the config key
    String configKey = CONFIGKEYBASE_RESOURCE + key;
    getConf().set(configKey, value.getClass().getName() + VALUE_SEP + valueString);
}

From source file:com.conversantmedia.mapreduce.tool.DistributedResourceManager.java

License:Apache License

private static Path distributedFilePath(String fileName, Path[] distFiles) throws IOException {
    for (Path path : distFiles) {
        if (StringUtils.equals(fileName, path.getName())) {
            return path;
        }/* w w  w  . j a  v a  2s . c  om*/
    }
    return null;
}

From source file:com.cotdp.hadoop.BrotliFileRecordReader.java

License:Apache License

/**
 * Initialize and open the ZIP file from the FileSystem
 *//*from w w  w  . j  av a2  s .  c o  m*/
@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext)
        throws IOException, InterruptedException {
    FileSplit split = (FileSplit) inputSplit;
    Configuration conf = taskAttemptContext.getConfiguration();
    Path path = split.getPath();
    FileSystem fs = path.getFileSystem(conf);

    // Set the file path as the key
    currentKey.set(path.getName());
    // Open the stream
    fsin = fs.open(path);

    String cmd = "/bin/cat";
    ProcessBuilder pb = new ProcessBuilder();
    pb.redirectOutput();
    pb.command(cmd);

    try {
        decompressor = pb.start();

    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
}

From source file:com.cqx.mr.MRSearchAuto.java

public void searchHBase(int numOfDays) throws IOException, InterruptedException, ClassNotFoundException {
    long startTime;
    long endTime;

    Configuration conf = HBaseConfiguration.create();
    conf.set("hbase.zookeeper.quorum", "node2,node3,node4");
    conf.set("fs.default.name", "hdfs://node1");
    conf.set("mapred.job.tracker", "node1:54311");
    /*// ww  w .j ava2  s .c  o m
     * ?map
     */
    conf.set("search.license", "C87310");
    conf.set("search.color", "10");
    conf.set("search.direction", "2");

    Job job = new Job(conf, "MRSearchHBase");
    System.out.println("search.license: " + conf.get("search.license"));
    job.setNumReduceTasks(0);
    job.setJarByClass(MRSearchAuto.class);
    Scan scan = new Scan();
    scan.addFamily(FAMILY_NAME);
    byte[] startRow = Bytes.toBytes("2011010100000");
    byte[] stopRow;
    switch (numOfDays) {
    case 1:
        stopRow = Bytes.toBytes("2011010200000");
        break;
    case 10:
        stopRow = Bytes.toBytes("2011011100000");
        break;
    case 30:
        stopRow = Bytes.toBytes("2011020100000");
        break;
    case 365:
        stopRow = Bytes.toBytes("2012010100000");
        break;
    default:
        stopRow = Bytes.toBytes("2011010101000");
    }
    // ?key
    scan.setStartRow(startRow);
    scan.setStopRow(stopRow);

    TableMapReduceUtil.initTableMapperJob(TABLE_NAME, scan, SearchMapper.class, ImmutableBytesWritable.class,
            Text.class, job);
    Path outPath = new Path("searchresult");
    HDFS_File file = new HDFS_File();
    file.DelFile(conf, outPath.getName(), true); // 
    FileOutputFormat.setOutputPath(job, outPath);// 

    startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    endTime = System.currentTimeMillis();
    System.out.println("Time used: " + (endTime - startTime));
    System.out.println("startRow:" + Text.decode(startRow));
    System.out.println("stopRow: " + Text.decode(stopRow));
}

From source file:com.dasasian.chok.operation.master.IndexDeployOperation.java

License:Apache License

protected static List<Shard> readShardsFromFs(final String indexName, final String indexPathString)
        throws IndexDeployException {
    // get shard folders from source
    URI uri;/*from  ww  w. j  a va  2  s  . c o m*/
    try {
        uri = new URI(indexPathString);
    } catch (final URISyntaxException e) {
        throw new IndexDeployException(ErrorType.INDEX_NOT_ACCESSIBLE, "unable to parse index path uri '"
                + indexPathString + "', make sure it starts with file:// or hdfs:// ", e);
    }
    FileSystem fileSystem;
    try {
        fileSystem = HadoopUtil.getFileSystem(new Path(uri.toString()));
    } catch (final IOException e) {
        throw new IndexDeployException(ErrorType.INDEX_NOT_ACCESSIBLE,
                "unable to retrive file system for index path '" + indexPathString
                        + "', make sure your path starts with hadoop support prefix like file:// or hdfs://",
                e);
    }

    List<Shard> shards = new ArrayList<>();
    try {
        final Path indexPath = new Path(indexPathString);
        if (!fileSystem.exists(indexPath)) {
            throw new IndexDeployException(ErrorType.INDEX_NOT_ACCESSIBLE,
                    "index path '" + uri + "' does not exists");
        }
        final FileStatus[] listStatus = fileSystem.listStatus(indexPath, new PathFilter() {
            public boolean accept(final Path aPath) {
                return !aPath.getName().startsWith(".");
            }
        });
        for (final FileStatus fileStatus : listStatus) {
            String shardPath = fileStatus.getPath().toString();
            if (fileStatus.isDir() || shardPath.endsWith(".zip")) {
                shards.add(new Shard(createShardName(indexName, shardPath), shardPath));
            }
        }
    } catch (final IOException e) {
        throw new IndexDeployException(ErrorType.INDEX_NOT_ACCESSIBLE,
                "could not access index path: " + indexPathString, e);
    }

    if (shards.size() == 0) {
        throw new IndexDeployException(ErrorType.INDEX_NOT_ACCESSIBLE, "index does not contain any shard");
    }
    return shards;
}

From source file:com.datasalt.pangool.solr.SolrRecordWriter.java

License:Apache License

private Path findSolrConfig(Configuration conf) throws IOException {
    Path solrHome = null;/*ww w . j a  v  a 2 s. c o  m*/

    // we added these lines to make this patch work on Hadoop 0.20.2
    FileSystem localFs = FileSystem.getLocal(conf);
    if (FileSystem.get(conf).equals(localFs)) {
        return new Path(localSolrHome);
    }
    // end-of-addition
    Path[] localArchives = DistributedCache.getLocalCacheArchives(conf);

    if (localArchives.length == 0) {
        throw new IOException(String.format("No local cache archives, where is %s", zipName));
    }
    for (Path unpackedDir : localArchives) {
        // Only logged if debugging
        if (LOG.isDebugEnabled()) {
            LOG.debug(String.format("Examining unpack directory %s for %s", unpackedDir, zipName));

            ProcessBuilder lsCmd = new ProcessBuilder(
                    new String[] { "/bin/ls", "-lR", unpackedDir.toString() });
            lsCmd.redirectErrorStream();
            Process ls = lsCmd.start();
            try {
                byte[] buf = new byte[16 * 1024];
                InputStream all = ls.getInputStream();
                int count;
                while ((count = all.read(buf)) > 0) {
                    System.err.write(buf, 0, count);
                }
            } catch (IOException ignore) {
            }
            System.err.format("Exit value is %d%n", ls.exitValue());
        }
        if (unpackedDir.getName().equals(zipName)) {

            solrHome = unpackedDir;
            break;
        }
    }
    return solrHome;
}

From source file:com.datasalt.utils.commons.TestRepoTool.java

License:Apache License

@Test
public void test() throws IOException {
    FileSystem fs = FileSystem.getLocal(getConf());

    Path repo = new Path("repoTest87463829");
    HadoopUtils.deleteIfExists(fs, repo);

    RepoTool tool = new RepoTool(repo, "pkg", fs);

    assertNull(tool.getNewestPackageWithStatus(PackageStatus.NOT_DEFINED));

    Path pkg1 = tool.newPackage();
    assertEquals("pkg", pkg1.getName().substring(0, 3));

    assertEquals(pkg1.makeQualified(fs), tool.getNewestPackageWithStatus(PackageStatus.NOT_DEFINED));

    Path pkg2 = tool.newPackage();
    assertEquals(pkg2.makeQualified(fs), tool.getNewestPackageWithStatus(PackageStatus.NOT_DEFINED));

    assertEquals(2, tool.getPackages().length);

    RepoTool.setStatus(fs, pkg2, PackageStatus.FINISHED);
    assertEquals(pkg2.makeQualified(fs), tool.getNewestPackageWithStatus(PackageStatus.FINISHED));

    HadoopUtils.deleteIfExists(fs, repo);
}

From source file:com.datatorrent.lib.io.fs.AbstractFileSplitter.java

License:Apache License

/**
 * Creates file-metadata and populates no. of blocks in the metadata.
 *
 * @param fileInfo file information/* w  w w .  jav  a2s.  c o m*/
 * @return file-metadata
 * @throws IOException
 */
protected FileMetadata buildFileMetadata(FileInfo fileInfo) throws IOException {
    LOG.debug("file {}", fileInfo.getFilePath());
    FileMetadata fileMetadata = createFileMetadata(fileInfo);
    LOG.debug("fileMetadata {}", fileMetadata);
    Path path = new Path(fileInfo.getFilePath());

    fileMetadata.setFileName(path.getName());

    FileStatus status = getFileStatus(path);
    fileMetadata.setDirectory(status.isDirectory());
    fileMetadata.setFileLength(status.getLen());

    if (fileInfo.getDirectoryPath() == null) { // Direct filename is given as input.
        fileMetadata.setRelativePath(status.getPath().getName());
    } else {
        String relativePath = getRelativePathWithFolderName(fileInfo);
        fileMetadata.setRelativePath(relativePath);
    }

    if (!status.isDirectory()) {
        int noOfBlocks = (int) ((status.getLen() / blockSize) + (((status.getLen() % blockSize) == 0) ? 0 : 1));
        if (fileMetadata.getDataOffset() >= status.getLen()) {
            noOfBlocks = 0;
        }
        fileMetadata.setNumberOfBlocks(noOfBlocks);
        populateBlockIds(fileMetadata);
    }
    return fileMetadata;
}