Example usage for org.apache.hadoop.fs Path toString

List of usage examples for org.apache.hadoop.fs Path toString

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path toString.

Prototype

@Override
    public String toString() 

Source Link

Usage

From source file:com.datasalt.pangool.solr.SolrRecordWriter.java

License:Apache License

private Path findSolrConfig(Configuration conf) throws IOException {
    Path solrHome = null;//from   w w w  .  j a  v a2 s .  co m

    // we added these lines to make this patch work on Hadoop 0.20.2
    FileSystem localFs = FileSystem.getLocal(conf);
    if (FileSystem.get(conf).equals(localFs)) {
        return new Path(localSolrHome);
    }
    // end-of-addition
    Path[] localArchives = DistributedCache.getLocalCacheArchives(conf);

    if (localArchives.length == 0) {
        throw new IOException(String.format("No local cache archives, where is %s", zipName));
    }
    for (Path unpackedDir : localArchives) {
        // Only logged if debugging
        if (LOG.isDebugEnabled()) {
            LOG.debug(String.format("Examining unpack directory %s for %s", unpackedDir, zipName));

            ProcessBuilder lsCmd = new ProcessBuilder(
                    new String[] { "/bin/ls", "-lR", unpackedDir.toString() });
            lsCmd.redirectErrorStream();
            Process ls = lsCmd.start();
            try {
                byte[] buf = new byte[16 * 1024];
                InputStream all = ls.getInputStream();
                int count;
                while ((count = all.read(buf)) > 0) {
                    System.err.write(buf, 0, count);
                }
            } catch (IOException ignore) {
            }
            System.err.format("Exit value is %d%n", ls.exitValue());
        }
        if (unpackedDir.getName().equals(zipName)) {

            solrHome = unpackedDir;
            break;
        }
    }
    return solrHome;
}

From source file:com.datasalt.pangool.solr.TupleSolrOutputFormat.java

License:Apache License

private void setupSolrHomeCache(File solrHome, Configuration conf) throws IOException {
    if (solrHome == null || !(solrHome.exists() && solrHome.isDirectory())) {
        throw new IOException("Invalid solr.home: " + solrHome);
    }/* w ww  .ja va2  s.  c o m*/
    localSolrHome = solrHome.getAbsolutePath();
    File tmpZip = File.createTempFile("solr", "zip");
    createZip(solrHome, tmpZip);
    // Make a reasonably unique name for the zip file in the distributed cache
    // to avoid collisions if multiple jobs are running.
    String hdfsZipName = UUID.randomUUID().toString() + '.' + ZIP_FILE_BASE_NAME;
    zipName = hdfsZipName;

    Path zipPath = new Path("/tmp", zipName);
    FileSystem fs = FileSystem.get(conf);
    fs.copyFromLocalFile(new Path(tmpZip.toString()), zipPath);
    final URI baseZipUrl = fs.getUri().resolve(zipPath.toString() + '#' + zipName);

    DistributedCache.addCacheArchive(baseZipUrl, conf);
    LOG.info("Set Solr cache: " + Arrays.asList(DistributedCache.getCacheArchives(conf)));
}

From source file:com.datasalt.pangool.tuplemr.mapred.lib.input.PangoolMultipleInputs.java

License:Apache License

/**
 * Add a {@link Path} with a custom {@link InputFormat} and {@link Mapper} to the list of inputs for the map-reduce
 * job. Returns the instance files created.
 * /* www  .j ava 2  s  .  c  o m*/
 * @param job
 *          The {@link Job}
 * @param path
 *          {@link Path} to be added to the list of inputs for the job
 * @param inputFormat
 *          {@link InputFormat} class to use for this path
 * @param mapperInstance
 *          {@link Mapper} instance to use
 * @throws IOException
 * @throws FileNotFoundException
 */
public static Set<String> addInputPath(Job job, Path path, InputFormat inputFormat, Mapper mapperInstance,
        Map<String, String> specificContext) throws FileNotFoundException, IOException {

    Set<String> instanceFiles = new HashSet<String>();
    // Serialize the Mapper instance
    String uniqueNameMapper = UUID.randomUUID().toString() + '.' + "mapper.dat";
    try {
        InstancesDistributor.distribute(mapperInstance, uniqueNameMapper, job.getConfiguration());
        instanceFiles.add(uniqueNameMapper);
    } catch (URISyntaxException e) {
        throw new IOException(e);
    }
    // Serialize the Input Format
    String uniqueNameInputFormat = UUID.randomUUID().toString() + '.' + "inputFormat.dat";
    try {
        InstancesDistributor.distribute(inputFormat, uniqueNameInputFormat, job.getConfiguration());
        instanceFiles.add(uniqueNameInputFormat);
    } catch (URISyntaxException e) {
        throw new IOException(e);
    }
    for (Map.Entry<String, String> contextKeyValue : specificContext.entrySet()) {
        PangoolMultipleInputs.addInputContext(job, uniqueNameInputFormat, contextKeyValue.getKey(),
                contextKeyValue.getValue());
    }
    addInputPath(job, path, uniqueNameInputFormat);
    Configuration conf = job.getConfiguration();
    String mapperMapping = path.toString() + ";" + uniqueNameMapper;
    String mappers = conf.get(PANGOOL_INPUT_DIR_MAPPERS_CONF);
    conf.set(PANGOOL_INPUT_DIR_MAPPERS_CONF, mappers == null ? mapperMapping : mappers + "," + mapperMapping);
    job.setMapperClass(DelegatingMapper.class);
    return instanceFiles;
}

From source file:com.datasalt.pangool.tuplemr.mapred.lib.input.PangoolMultipleInputs.java

License:Apache License

private static void addInputPath(Job job, Path path, String inputFormatInstance) {
    /*//w  w  w.ja  v  a  2 s . c  o m
     * Only internal -> not allowed to add inputs without associated InputProcessor files
     */
    String inputFormatMapping = path.toString() + ";" + inputFormatInstance;
    Configuration conf = job.getConfiguration();
    String inputFormats = conf.get(PANGOOL_INPUT_DIR_FORMATS_CONF);
    conf.set(PANGOOL_INPUT_DIR_FORMATS_CONF,
            inputFormats == null ? inputFormatMapping : inputFormats + "," + inputFormatMapping);

    job.setInputFormatClass(DelegatingInputFormat.class);
}

From source file:com.datasalt.pangool.utils.DCUtils.java

License:Apache License

/**
 * Given a file post-fix, locate a file in the DistributedCache. It iterates over all the local files and returns the
 * first one that meets this condition.//from   ww w. ja v  a 2s  .c  om
 * 
 * @param conf
 *          The Hadoop Configuration.
 * @param filePostFix
 *          The file post-fix.
 * @throws IOException
 */
public static Path locateFileInDC(Configuration conf, String filePostFix) throws IOException {
    FileSystem fS = FileSystem.get(conf);
    Path locatedFile = null;

    if (fS.equals(FileSystem.getLocal(conf))) {
        // We use the File Java API in local because the Hadoop Path, FileSystem, etc is too slow for tests that
        // need to call this method a lot
        File tmpFolder = new File(conf.get("hadoop.tmp.dir"));
        for (File file : tmpFolder.listFiles()) {
            if (file.getName().endsWith(filePostFix)) {
                locatedFile = new Path(file.toString());
                break;
            }
        }
    } else {
        Path tmpHdfsFolder = new Path(conf.get(HDFS_TMP_FOLDER_CONF, conf.get("hadoop.tmp.dir")));
        for (FileStatus fSt : fS.listStatus(tmpHdfsFolder)) {
            Path path = fSt.getPath();
            if (path.toString().endsWith(filePostFix)) {
                locatedFile = path;
                break;
            }
        }
    }

    return locatedFile;
}

From source file:com.datasalt.pangool.utils.test.AbstractHadoopTestLibrary.java

License:Apache License

public void cleanUp() throws IOException {
    for (Map.Entry<String, Object> entry : inputs.entrySet()) {
        trash(entry.getKey());//from w  ww. j  a v a2s  . c om
    }
    for (Map.Entry<String, List<Pair<Object, Object>>> entry : outputs.entrySet()) {
        Path p = new Path(entry.getKey());
        if (p.toString().contains("-0000")) {
            p = p.getParent();
        }
        trash(p.toString());
    }
}

From source file:com.datasalt.utils.commons.HadoopUtils.java

License:Apache License

/**
 * Given a file post-fix, locate a file in the DistributedCache
 * //from   ww  w. java  2  s . c om
 * @param conf
 * @param filePostFix
 * 
 * @throws IOException
 */
public static Path locateFileInDC(Configuration conf, String filePostFix) throws IOException {
    Path locatedFile = null;
    Path[] paths = DistributedCache.getLocalCacheFiles(conf);
    if (paths == null) {
        return null;
    }
    for (Path p : paths) {
        if (p.toString().endsWith(filePostFix)) {
            locatedFile = p;
            break;
        }
    }
    return locatedFile;
}

From source file:com.datasalt.utils.mapred.joiner.MultiJoiner.java

License:Apache License

/**
 * Adds a CHANNELED input specification. A channeled input specification is a channel associated to a Mapper and a
 * input file or glob. The user will implement a {@link MultiJoinChanneledMapper} which will be tied to a single
 * channel./*from   w  ww.  j  a v a  2s.c  o  m*/
 * <p>
 * The user must be consistent with the channel numbers it provides. For instance, in case that two or more different
 * files must belong to the same channel.
 * 
 * @param channel
 * @param location
 * @param channelClass
 * @param inputFormat
 * @param mapper
 * 
 * @throws IOException
 */
public MultiJoiner addChanneledInput(Integer channel, Path location, Class<? extends Object> channelClass,
        Class<? extends InputFormat> inputFormat, Class<? extends MultiJoinChanneledMapper> mapper)
        throws IOException {
    /*
     * Configure the MultiJoiner
     */
    setChannelDatumClass(channel, channelClass);
    FileSystem fS = FileSystem.get(getJob().getConfiguration());
    if (location.toString().contains("*")) { // is a glob
        for (FileStatus fSt : fS.globStatus(location, hiddenFileFilter)) { // expands the glob
            addChanneledInputInner(channel, fSt.getPath(), channelClass, inputFormat, mapper);
        }
    } else if (fS.getFileStatus(location).isDir()) {
        for (FileStatus fSt : fS.listStatus(location, hiddenFileFilter)) { // expands the glob
            addChanneledInputInner(channel, fSt.getPath(), channelClass, inputFormat, mapper);
        }
    } else {
        addChanneledInputInner(channel, location, channelClass, inputFormat, mapper);
    }
    return this;
}

From source file:com.datasalt.utils.mapred.joiner.MultiJoiner.java

License:Apache License

private void addChanneledInputInner(Integer channel, Path location, Class<? extends Object> channelClass,
        Class<? extends InputFormat> inputFormat, Class<? extends MultiJoinChanneledMapper> mapper)
        throws IOException {

    FileSystem fS = location.getFileSystem(getJob().getConfiguration());
    if (!location.toString().startsWith("/")) {
        // relative path
        location = new Path(fS.getWorkingDirectory(), location);
    } else {/*from  w w w .j  a v  a  2s.  c  o  m*/
        // absolute path
        location = new Path(fS.getUri() + location.toString());
    }
    addInOrder(channel + "", MultiJoinChanneledMapper.MULTIJOINER_CHANNELED_CHANNELS,
            getJob().getConfiguration());
    addInOrder(location.toString(), MultiJoinChanneledMapper.MULTIJOINER_CHANNELED_FILES,
            getJob().getConfiguration());
    System.out.println("Adding file " + location + " with mapper " + mapper.getName());
    MultipleInputs.addInputPath(getJob(), location, inputFormat, mapper);
}

From source file:com.datatorrent.flume.source.HdfsTestSource.java

License:Open Source License

private List<String> findFiles() throws IOException {
    List<String> files = Lists.newArrayList();
    Path directoryPath = new Path(directory);
    FileSystem lfs = FileSystem.newInstance(directoryPath.toUri(), configuration);
    try {/* www.ja  v a 2 s  .c  om*/
        logger.debug("checking for new files in {}", directoryPath);
        RemoteIterator<LocatedFileStatus> statuses = lfs.listFiles(directoryPath, true);
        for (; statuses.hasNext();) {
            FileStatus status = statuses.next();
            Path path = status.getPath();
            String filePathStr = path.toString();
            if (!filePathStr.endsWith(".gz")) {
                continue;
            }
            logger.debug("new file {}", filePathStr);
            files.add(path.toString());
        }
    } catch (FileNotFoundException e) {
        logger.warn("Failed to list directory {}", directoryPath, e);
        throw new RuntimeException(e);
    } finally {
        lfs.close();
    }
    return files;
}