Example usage for org.apache.hadoop.fs Path getParent

List of usage examples for org.apache.hadoop.fs Path getParent

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path getParent.

Prototype

public Path getParent() 

Source Link

Document

Returns the parent of a path or null if at root.

Usage

From source file:com.inmobi.messaging.consumer.util.HadoopUtil.java

License:Apache License

public static void setUpHadoopFiles(Path streamDirPrefix, Configuration conf, String[] files,
        String[] suffixDirs, Path[] finalFiles, boolean alternateEmptyFiles, Date minuteDirTimeStamp, int index,
        int startIndex) throws Exception {
    FileSystem fs = streamDirPrefix.getFileSystem(conf);
    Path rootDir = streamDirPrefix.getParent();
    Path tmpDataDir = new Path(rootDir, "data");
    boolean emptyFile = false;
    // setup data dirs
    if (files != null) {
        int i = startIndex;
        int j = index;
        for (String file : files) {
            if (alternateEmptyFiles && emptyFile) {
                MessageUtil.createEmptySequenceFile(file, fs, tmpDataDir, conf);
                emptyFile = false;//  w ww. j  av a  2s  . c om
            } else {
                MessageUtil.createMessageSequenceFile(file, fs, tmpDataDir, i, conf);
                emptyFile = true;
                i += 100;
            }
            Path srcPath = new Path(tmpDataDir, file);
            Date commitTime = getCommitDateForFile(file, minuteDirTimeStamp);
            TestUtil.publishMissingPaths(fs, streamDirPrefix, lastCommitTime, commitTime);
            lastCommitTime = commitTime;
            Path targetDateDir = getTargetDateDir(streamDirPrefix, commitTime);
            List<Path> targetDirs = new ArrayList<Path>();
            if (suffixDirs != null) {
                for (String suffixDir : suffixDirs) {
                    targetDirs.add(new Path(targetDateDir, suffixDir));
                }
            } else {
                targetDirs.add(targetDateDir);
            }
            for (Path targetDir : targetDirs) {
                fs.mkdirs(targetDir);
                Path targetPath = new Path(targetDir, file);
                fs.copyFromLocalFile(srcPath, targetPath);
                LOG.info("Copied " + srcPath + " to " + targetPath);
                if (finalFiles != null) {
                    finalFiles[j] = targetPath;
                    j++;
                }
                Thread.sleep(1000);
            }
            fs.delete(srcPath, true);
        }
        TestUtil.publishLastPath(fs, streamDirPrefix, lastCommitTime);
    }
}

From source file:com.inmobi.messaging.consumer.util.HadoopUtil.java

License:Apache License

public static void setupHadoopCluster(Configuration conf, String[] files, String[] suffixDirs,
        Path[] finalFiles, Path finalDir, boolean withEmptyFiles, boolean createFilesInNextHour)
        throws Exception {
    FileSystem fs = finalDir.getFileSystem(conf);

    Path rootDir = finalDir.getParent();
    fs.delete(rootDir, true);//from w w  w . j  a v a  2  s .  c o m
    Path tmpDataDir = new Path(rootDir, "data");
    fs.mkdirs(tmpDataDir);

    if (!createFilesInNextHour) {
        setUpHadoopFiles(finalDir, conf, files, suffixDirs, finalFiles, withEmptyFiles, null, 0, 0);
    } else {
        // start from 1 hour back as we need files in two diff hours.
        Calendar cal = Calendar.getInstance();
        cal.setTime(startCommitTime);
        cal.add(Calendar.HOUR_OF_DAY, -1);

        setUpHadoopFiles(finalDir, conf, files, suffixDirs, finalFiles, withEmptyFiles, cal.getTime(), 0, 0);
        // go to next hour
        cal.add(Calendar.HOUR_OF_DAY, 1);
        int index = files.length;
        // find number of non empty(i.e. data) files in 1 hour
        int numberOfNonEmptyFiles = withEmptyFiles ? (int) Math.ceil(index / 2.0) : index;
        int startIndex = numberOfNonEmptyFiles * 100;
        setUpHadoopFiles(finalDir, conf, files, suffixDirs, finalFiles, withEmptyFiles, cal.getTime(), index,
                startIndex);
    }
}

From source file:com.intel.hibench.streambench.FileDataGenNew.java

License:Apache License

private InputStream OpenMultiplePartsWithOffset(FileSystem fs, Path pt, long offset) throws IOException {
    System.out.println("Opening files, path:" + pt + " offset:" + offset);
    RemoteIterator<LocatedFileStatus> rit = fs.listFiles(pt, false);
    Vector<FSDataInputStream> fileHandleList = new Vector<FSDataInputStream>();
    while (rit.hasNext()) {
        Path path = rit.next().getPath();
        String filename = path.toString().substring(path.getParent().toString().length(),
                path.toString().length());

        if (filename.startsWith("/part-")) {
            long filesize = fs.getFileStatus(path).getLen();
            if (offset < filesize) {
                FSDataInputStream handle = fs.open(path);
                if (offset > 0) {
                    handle.seek(offset);
                }/*from  www . java2  s  .  c om*/
                fileHandleList.add(handle);
            }
            offset -= filesize;
        }
    }
    if (fileHandleList.size() == 1)
        return fileHandleList.get(0);
    else if (fileHandleList.size() > 1) {
        Enumeration<FSDataInputStream> enu = fileHandleList.elements();
        return new SequenceInputStream(enu);
    } else {
        System.err.println("Error, no source file loaded. run genSeedDataset.sh first!");
        return null;
    }
}

From source file:com.intropro.prairie.unit.hdfs.HdfsUnit.java

License:Apache License

public <T> void saveAs(InputStream inputStream, String dstPath, Format<T> inputFormat, Format<T> outputFormat)
        throws IOException {
    Path outPath = new Path(dstPath);
    getFileSystem().mkdirs(outPath.getParent());
    FSDataOutputStream fsDataOutputStream = getFileSystem().create(outPath);
    try {/*from  w w  w  . j  a  v  a 2s.  co  m*/
        InputFormatReader<T> inputFormatReader = inputFormat.createReader(inputStream);
        OutputFormatWriter<T> outputFormatWriter = outputFormat.createWriter(fsDataOutputStream);
        T line;
        while ((line = inputFormatReader.next()) != null) {
            outputFormatWriter.write(line);
        }
        inputFormatReader.close();
        outputFormatWriter.close();
    } catch (FormatException e) {
        throw new IOException(e);
    }
}

From source file:com.jaeksoft.searchlib.crawler.cache.HadoopCrawlCache.java

License:Open Source License

private Path checkPath(Path path) throws IOException {
    if (!fileSystem.exists(path)) {
        Path parent = path.getParent();
        if (!fileSystem.exists(parent))
            fileSystem.mkdirs(parent);// www .j  a  va2 s . c o m
    }
    return path;
}

From source file:com.jkoolcloud.tnt4j.streams.inputs.HdfsFileLineStream.java

License:Apache License

/**
 * Searches for files matching name pattern. Name pattern also may contain path of directory, where file search
 * should be performed, e.g., C:/Tomcat/logs/localhost_access_log.*.txt. If no path is defined (just file name
 * pattern) then files are searched in {@code System.getProperty("user.dir")}. Files array is ordered by file create
 * timestamp in descending order.//w  w  w .  j ava 2s  .  c o m
 *
 * @param path
 *            path of file
 * @param fs
 *            file system
 *
 * @return array of found files paths.
 * @throws IOException
 *             if files can't be listed by file system.
 *
 * @see FileSystem#listStatus(Path, PathFilter)
 * @see FilenameUtils#wildcardMatch(String, String, IOCase)
 */
public static Path[] searchFiles(Path path, FileSystem fs) throws IOException {
    FileStatus[] dir = fs.listStatus(path.getParent(), new PathFilter() {
        @Override
        public boolean accept(Path path) {
            String name = path.getName();
            return FilenameUtils.wildcardMatch(name, "*", IOCase.INSENSITIVE); // NON-NLS
        }
    });

    Path[] activityFiles = new Path[dir == null ? 0 : dir.length];
    if (dir != null) {
        Arrays.sort(dir, new Comparator<FileStatus>() {
            @Override
            public int compare(FileStatus o1, FileStatus o2) {
                return Long.valueOf(o1.getModificationTime()).compareTo(o2.getModificationTime()) * (-1);
            }
        });

        for (int i = 0; i < dir.length; i++) {
            activityFiles[i] = dir[i].getPath();
        }
    }

    return activityFiles;
}

From source file:com.kadwa.hadoop.DistExec.java

License:Open Source License

/**
 * Initialize ExecFilesMapper specific job-configuration.
 *
 * @param conf    : The dfs/mapred configuration.
 * @param jobConf : The handle to the jobConf object to be initialized.
 * @param args    Arguments//  ww w .j a  va 2s.c om
 * @return true if it is necessary to launch a job.
 */
private static boolean setup(Configuration conf, JobConf jobConf, final Arguments args) throws IOException {
    jobConf.set(DST_DIR_LABEL, args.dst.toUri().toString());
    jobConf.set(EXEC_CMD_LABEL, args.execCmd);

    //set boolean values
    jobConf.setBoolean(Options.REDIRECT_ERROR_TO_OUT.propertyname,
            args.flags.contains(Options.REDIRECT_ERROR_TO_OUT));

    final String randomId = getRandomId();
    JobClient jClient = new JobClient(jobConf);
    Path stagingArea;
    try {
        stagingArea = JobSubmissionFiles.getStagingDir(jClient, conf);
    } catch (InterruptedException e) {
        throw new IOException(e);
    }

    Path jobDirectory = new Path(stagingArea + NAME + "_" + randomId);
    FsPermission mapredSysPerms = new FsPermission(JobSubmissionFiles.JOB_DIR_PERMISSION);
    FileSystem.mkdirs(FileSystem.get(jobDirectory.toUri(), conf), jobDirectory, mapredSysPerms);
    jobConf.set(JOB_DIR_LABEL, jobDirectory.toString());

    FileSystem dstfs = args.dst.getFileSystem(conf);

    // get tokens for all the required FileSystems..
    TokenCache.obtainTokensForNamenodes(jobConf.getCredentials(), new Path[] { args.dst }, conf);

    boolean dstExists = dstfs.exists(args.dst);
    boolean dstIsDir = false;
    if (dstExists) {
        dstIsDir = dstfs.getFileStatus(args.dst).isDir();
    }

    // default logPath
    Path logPath = args.log;
    if (logPath == null) {
        String filename = "_" + NAME + "_logs_" + randomId;
        if (!dstExists || !dstIsDir) {
            Path parent = args.dst.getParent();
            if (!dstfs.exists(parent)) {
                dstfs.mkdirs(parent);
            }
            logPath = new Path(parent, filename);
        } else {
            logPath = new Path(args.dst, filename);
        }
    }
    FileOutputFormat.setOutputPath(jobConf, logPath);

    // create src list, dst list
    FileSystem jobfs = jobDirectory.getFileSystem(jobConf);

    Path srcfilelist = new Path(jobDirectory, "_" + NAME + "_src_files");
    jobConf.set(SRC_LIST_LABEL, srcfilelist.toString());
    SequenceFile.Writer src_writer = SequenceFile.createWriter(jobfs, jobConf, srcfilelist, LongWritable.class,
            FilePair.class, SequenceFile.CompressionType.NONE);

    Path dstfilelist = new Path(jobDirectory, "_" + NAME + "_dst_files");
    SequenceFile.Writer dst_writer = SequenceFile.createWriter(jobfs, jobConf, dstfilelist, Text.class,
            Text.class, SequenceFile.CompressionType.NONE);

    Path dstdirlist = new Path(jobDirectory, "_" + NAME + "_dst_dirs");
    jobConf.set(DST_DIR_LIST_LABEL, dstdirlist.toString());
    SequenceFile.Writer dir_writer = SequenceFile.createWriter(jobfs, jobConf, dstdirlist, Text.class,
            FilePair.class, SequenceFile.CompressionType.NONE);

    // handle the case where the destination directory doesn't exist
    // and we've only a single src directory.
    final boolean special = (args.srcs.size() == 1 && !dstExists);
    int srcCount = 0, cnsyncf = 0, dirsyn = 0;
    long fileCount = 0L, byteCount = 0L, cbsyncs = 0L;
    try {
        for (Iterator<Path> srcItr = args.srcs.iterator(); srcItr.hasNext();) {
            final Path src = srcItr.next();
            FileSystem srcfs = src.getFileSystem(conf);
            FileStatus srcfilestat = srcfs.getFileStatus(src);
            Path root = special && srcfilestat.isDir() ? src : src.getParent();
            if (srcfilestat.isDir()) {
                ++srcCount;
            }

            Stack<FileStatus> pathstack = new Stack<FileStatus>();
            for (pathstack.push(srcfilestat); !pathstack.empty();) {
                FileStatus cur = pathstack.pop();
                FileStatus[] children = srcfs.listStatus(cur.getPath());
                for (int i = 0; i < children.length; i++) {
                    boolean skipfile = false;
                    final FileStatus child = children[i];
                    final String dst = makeRelative(root, child.getPath());
                    ++srcCount;

                    if (child.isDir()) {
                        pathstack.push(child);
                    } else {

                        if (!skipfile) {
                            ++fileCount;
                            byteCount += child.getLen();

                            if (LOG.isTraceEnabled()) {
                                LOG.trace("adding file " + child.getPath());
                            }

                            ++cnsyncf;
                            cbsyncs += child.getLen();
                            if (cnsyncf > SYNC_FILE_MAX || cbsyncs > BYTES_PER_MAP) {
                                src_writer.sync();
                                dst_writer.sync();
                                cnsyncf = 0;
                                cbsyncs = 0L;
                            }
                        }
                    }

                    if (!skipfile) {
                        src_writer.append(new LongWritable(child.isDir() ? 0 : child.getLen()),
                                new FilePair(child, dst));
                    }

                    dst_writer.append(new Text(dst), new Text(child.getPath().toString()));
                }

                if (cur.isDir()) {
                    String dst = makeRelative(root, cur.getPath());
                    dir_writer.append(new Text(dst), new FilePair(cur, dst));
                    if (++dirsyn > SYNC_FILE_MAX) {
                        dirsyn = 0;
                        dir_writer.sync();
                    }
                }
            }
        }
    } finally {
        checkAndClose(src_writer);
        checkAndClose(dst_writer);
        checkAndClose(dir_writer);
    }

    FileStatus dststatus = null;
    try {
        dststatus = dstfs.getFileStatus(args.dst);
    } catch (FileNotFoundException fnfe) {
        LOG.info(args.dst + " does not exist.");
    }

    // create dest path dir if copying > 1 file
    if (dststatus == null) {
        if (srcCount > 1 && !dstfs.mkdirs(args.dst)) {
            throw new IOException("Failed to create" + args.dst);
        }
    }

    final Path sorted = new Path(jobDirectory, "_" + NAME + "_sorted");
    checkDuplication(jobfs, dstfilelist, sorted, conf);

    Path tmpDir = new Path(
            (dstExists && !dstIsDir) || (!dstExists && srcCount == 1) ? args.dst.getParent() : args.dst,
            "_" + NAME + "_tmp_" + randomId);
    jobConf.set(TMP_DIR_LABEL, tmpDir.toUri().toString());
    LOG.info("sourcePathsCount=" + srcCount);
    LOG.info("filesToExecCount=" + fileCount);
    LOG.info("bytesToExecCount=" + StringUtils.humanReadableInt(byteCount));
    jobConf.setInt(SRC_COUNT_LABEL, srcCount);
    jobConf.setLong(TOTAL_SIZE_LABEL, byteCount);
    setMapCount(fileCount, jobConf);
    return fileCount > 0;
}

From source file:com.kylinolap.dict.DictionaryManager.java

License:Apache License

private String unpackDataSet(String tempHDFSDir, String dataSetName) throws IOException {

    InputStream in = this.getClass().getResourceAsStream("/com/kylinolap/dict/" + dataSetName + ".txt");
    if (in == null) // data set resource not found
        return null;

    ByteArrayOutputStream buf = new ByteArrayOutputStream();
    IOUtils.copy(in, buf);/*  w  ww  . j  av a  2s.  co  m*/
    in.close();
    byte[] bytes = buf.toByteArray();

    Path tmpDataSetPath = new Path(
            tempHDFSDir + "/dict/temp_dataset/" + dataSetName + "_" + bytes.length + ".txt");

    FileSystem fs = HadoopUtil.getFileSystem(tempHDFSDir);
    boolean writtenNewFile = false;
    if (fs.exists(tmpDataSetPath) == false || fs.getFileStatus(tmpDataSetPath).getLen() != bytes.length) {
        fs.mkdirs(tmpDataSetPath.getParent());
        FSDataOutputStream out = fs.create(tmpDataSetPath);
        IOUtils.copy(new ByteArrayInputStream(bytes), out);
        out.close();
        writtenNewFile = true;
    }

    String qualifiedPath = tmpDataSetPath.makeQualified(fs.getUri(), new Path("/")).toString();
    if (writtenNewFile)
        logger.info("Dictionary temp data set file written to " + qualifiedPath);
    return qualifiedPath;
}

From source file:com.liferay.hadoop.action.HadoopJob.java

License:Open Source License

public String doExecute(HttpServletRequest request, HttpServletResponse response) throws Exception {

    response.setContentType(ContentTypes.TEXT_PLAIN_UTF8);

    PrintWriter writer = response.getWriter();

    FileSystem fileSystem = HadoopManager.getFileSystem();

    JobClient jobClient = HadoopManager.getJobClient();

    writer.println("-- Job Status --");

    Path inputPath = new Path("/index/*/*");
    Path outputPath = new Path("/wordcount/results");

    try {/*from  w w w.java  2s.  c  o  m*/
        if (_runningJob == null) {
            writer.println("Creating job");

            if (fileSystem.exists(_jobPath)) {
                fileSystem.delete(_jobPath, false);
            }

            if (!fileSystem.exists(_jobPath)) {
                writer.println("Deploying the job code to cluster");

                FSDataOutputStream outputStream = null;

                try {
                    outputStream = fileSystem.create(_jobPath);

                    ServletContext servletContext = HadoopManager.getServletContext();

                    InputStream inputStream = servletContext.getResourceAsStream("/WEB-INF/lib/hadoop-job.jar");

                    StreamUtil.transfer(inputStream, outputStream, false);
                } finally {
                    StreamUtil.cleanUp(outputStream);
                }

                writer.println("Job code deployed to cluster");
            }

            if (fileSystem.exists(outputPath)) {
                writer.println("A previous job output was found, backing it up");

                fileSystem.rename(outputPath,
                        outputPath.getParent().suffix("/.results-" + System.currentTimeMillis()));
            }

            _jobConf = HadoopManager.createNewJobConf();

            _jobConf.setJobName("Word Count");

            writer.println("Job '" + _jobConf.getJobName() + "' is being configured");

            _jobConf.setJarByClass(Map.class);
            _jobConf.setOutputKeyClass(Text.class);
            _jobConf.setOutputValueClass(IntWritable.class);
            _jobConf.setMapperClass(Map.class);
            _jobConf.setCombinerClass(Reduce.class);
            _jobConf.setReducerClass(Reduce.class);
            _jobConf.setInputFormat(TextInputFormat.class);
            _jobConf.setOutputFormat(TextOutputFormat.class);

            writer.println("Job code deployed to distributed cache's classpath");

            DistributedCache.addArchiveToClassPath(_jobPath, _jobConf, fileSystem);

            FileInputFormat.setInputPaths(_jobConf, inputPath);
            FileOutputFormat.setOutputPath(_jobConf, outputPath);

            writer.println("Submitting job the first time");

            _runningJob = jobClient.submitJob(_jobConf);

            writer.println("Job submitted");
        }

        int jobState = _runningJob.getJobState();

        writer.println(
                "Job status: " + jobState + " (RUNNING = 1, SUCCEEDED = 2, FAILED = 3, PREP = 4, KILLED = 5)");

        if ((jobState != JobStatus.RUNNING) && (jobState != JobStatus.PREP)) {

            writer.println("Re-issuing the job");

            if (fileSystem.exists(outputPath)) {
                writer.println("A previous job output was found, backing it up");

                fileSystem.rename(outputPath,
                        outputPath.getParent().suffix("/.results-" + System.currentTimeMillis()));
            }

            writer.println("Submitting job the first time");

            _runningJob = jobClient.submitJob(_jobConf);

            writer.println("Job submitted");
        }
    } catch (Exception ioe) {
        writer.println("Job error: ");

        ioe.printStackTrace(writer);
    }

    writer.flush();
    writer.close();

    return null;
}

From source file:com.liferay.hadoop.store.HDFSStore.java

License:Open Source License

@Override
public void deleteDirectory(long companyId, long repositoryId, String dirName)
        throws PortalException, SystemException {

    Path fullPath = HadoopManager.getFullDirPath(companyId, repositoryId, dirName);

    try {/*from   ww  w.ja va 2 s . c  om*/
        FileSystem fileSystem = HadoopManager.getFileSystem();

        fileSystem.delete(fullPath, true);

        Path parentPath = fullPath.getParent();

        deleteEmptyAncestors(parentPath);
    } catch (IOException ioe) {
        throw new SystemException(ioe);
    }
}