Example usage for org.apache.hadoop.fs Path makeQualified

List of usage examples for org.apache.hadoop.fs Path makeQualified

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path makeQualified.

Prototype

@Deprecated
public Path makeQualified(FileSystem fs) 

Source Link

Document

Returns a qualified path object for the FileSystem 's working directory.

Usage

From source file:com.datasalt.utils.commons.TestRepoTool.java

License:Apache License

@Test
public void test() throws IOException {
    FileSystem fs = FileSystem.getLocal(getConf());

    Path repo = new Path("repoTest87463829");
    HadoopUtils.deleteIfExists(fs, repo);

    RepoTool tool = new RepoTool(repo, "pkg", fs);

    assertNull(tool.getNewestPackageWithStatus(PackageStatus.NOT_DEFINED));

    Path pkg1 = tool.newPackage();
    assertEquals("pkg", pkg1.getName().substring(0, 3));

    assertEquals(pkg1.makeQualified(fs), tool.getNewestPackageWithStatus(PackageStatus.NOT_DEFINED));

    Path pkg2 = tool.newPackage();
    assertEquals(pkg2.makeQualified(fs), tool.getNewestPackageWithStatus(PackageStatus.NOT_DEFINED));

    assertEquals(2, tool.getPackages().length);

    RepoTool.setStatus(fs, pkg2, PackageStatus.FINISHED);
    assertEquals(pkg2.makeQualified(fs), tool.getNewestPackageWithStatus(PackageStatus.FINISHED));

    HadoopUtils.deleteIfExists(fs, repo);
}

From source file:com.ebay.erl.mobius.core.criterion.TupleRestrictions.java

License:Apache License

/**
 * Create a tuple criterion that only accepts tuples when the value 
 * of the <code>column</code> are presented in the given <code>file</code>
 * <p>//from w  ww . j  av  a  2s  . c o  m
 * 
 * The assumption of the file is that, it's single column and one to many
 * line text file.  Each line is read into a case insensitive set, and 
 * using the set to check the value of the <code>column</code> within
 * the set or not.
 * 
 * 
 * @param column the name of a column to be tested that whether its value is in 
 * the given <code>file</code> or not
 * 
 * @param file a single column and multiple lines of file that contains strings/numbers,
 * each line is treated as a single unit.
 *
 * @return an instance of {@link TupleCriterion} that extracts only the records 
 * when the value of its <code>column</code> are presented in the given 
 * <code>file</code>.
 * 
 * @throws FileNotFoundException if the given file cannot be found.
 */
public static TupleCriterion within(final String column, File file) throws FileNotFoundException {
    final File f = TupleRestrictions.checkFileExist(file);

    return new TupleCriterion() {

        private static final long serialVersionUID = -1121221619118915652L;
        private Set<String> set;

        @Override
        public void setConf(Configuration conf) {
            try {
                if (conf.get("tmpfiles") == null || conf.get("tmpfiles").trim().length() == 0) {
                    conf.set("tmpfiles", validateFiles(f.getAbsolutePath(), conf));
                } else {
                    conf.set("tmpfiles", validateFiles(f.getAbsolutePath(), conf) + "," + conf.get("tmpfiles"));
                }

            } catch (IOException e) {
                throw new IllegalArgumentException(e);
            }
        }

        /**
         * COPIED FROM org.apache.hadoop.util.GenericOptionsParser
         */
        private String validateFiles(String files, Configuration conf) throws IOException {
            if (files == null)
                return null;
            String[] fileArr = files.split(",");
            String[] finalArr = new String[fileArr.length];
            for (int i = 0; i < fileArr.length; i++) {
                String tmp = fileArr[i];
                String finalPath;
                Path path = new Path(tmp);
                URI pathURI = path.toUri();
                FileSystem localFs = FileSystem.getLocal(conf);
                if (pathURI.getScheme() == null) {
                    // default to the local file system
                    // check if the file exists or not first
                    if (!localFs.exists(path)) {
                        throw new FileNotFoundException("File " + tmp + " does not exist.");
                    }
                    finalPath = path.makeQualified(localFs).toString();
                } else {
                    // check if the file exists in this file system
                    // we need to recreate this filesystem object to copy
                    // these files to the file system jobtracker is running
                    // on.
                    FileSystem fs = path.getFileSystem(conf);
                    if (!fs.exists(path)) {
                        throw new FileNotFoundException("File " + tmp + " does not exist.");
                    }
                    finalPath = path.makeQualified(fs).toString();
                    try {
                        fs.close();
                    } catch (IOException e) {
                    }
                    ;
                }
                finalArr[i] = finalPath;
            }
            return StringUtils.arrayToString(finalArr);
        }

        @Override
        protected boolean evaluate(Tuple tuple, Configuration configuration) {
            if (set == null) {
                set = new CaseInsensitiveTreeSet();
                BufferedReader br = null;
                try {
                    br = new BufferedReader(new FileReader(new File(f.getName())));
                    String newLine = null;
                    while ((newLine = br.readLine()) != null) {
                        this.set.add(newLine);
                    }
                } catch (IOException e) {
                    throw new RuntimeException(e);
                } finally {
                    try {
                        br.close();
                    } catch (Throwable e) {
                    }
                }
            }

            String value = tuple.getString(column);
            if (value != null) {
                return this.set.contains(value);
            } else {
                return false;
            }
        }

        @Override
        public String[] getInvolvedColumns() {
            return new String[] { column };
        }
    };
}

From source file:com.github.gaoyangthu.demo.mapred.terasort.TeraSort.java

License:Apache License

public int run(String[] args) throws Exception {
    LOG.info("starting");
    JobConf job = (JobConf) getConf();//from   w ww  . jav  a2  s  .  c  o  m
    Path inputDir = new Path(args[0]);
    inputDir = inputDir.makeQualified(inputDir.getFileSystem(job));
    Path partitionFile = new Path(inputDir, TeraInputFormat.PARTITION_FILENAME);
    URI partitionUri = new URI(partitionFile.toString() + "#" + TeraInputFormat.PARTITION_FILENAME);
    TeraInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.setJobName("TeraSort");
    job.setJarByClass(TeraSort.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setInputFormat(TeraInputFormat.class);
    job.setOutputFormat(TeraOutputFormat.class);
    job.setPartitionerClass(TotalOrderPartitioner.class);
    TeraInputFormat.writePartitionFile(job, partitionFile);
    DistributedCache.addCacheFile(partitionUri, job);
    DistributedCache.createSymlink(job);
    job.setInt("dfs.replication", 1);
    TeraOutputFormat.setFinalSync(job, true);
    JobClient.runJob(job);
    LOG.info("done");
    return 0;
}

From source file:com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemBase.java

License:Open Source License

/**
 * Renames src to dst. Src must not be equal to the filesystem root.
 *
 * @param src Source path.//ww  w  .  j  a v  a2  s. c o m
 * @param dst Destination path.
 * @return true if rename succeeds.
 * @throws FileNotFoundException if src does not exist.
 * @throws IOException if an error occurs.
 */
@Override
public boolean rename(Path src, Path dst) throws IOException {
    // Even though the underlying GCSFS will also throw an IAE if src is root, since our filesystem
    // root happens to equal the global root, we want to explicitly check it here since derived
    // classes may not have filesystem roots equal to the global root.
    if (src.makeQualified(this).equals(getFileSystemRoot())) {
        LOG.debug("GHFS.rename: src is root: '{}'", src);
        return false;
    }

    long startTime = System.nanoTime();
    Preconditions.checkArgument(src != null, "src must not be null");
    Preconditions.checkArgument(dst != null, "dst must not be null");

    checkOpen();

    try {
        LOG.debug("GHFS.rename: {} -> {}", src, dst);

        URI srcPath = getGcsPath(src);
        URI dstPath = getGcsPath(dst);
        gcsfs.rename(srcPath, dstPath);
    } catch (IOException e) {
        LOG.debug("GHFS.rename", e);
        return false;
    }

    long duration = System.nanoTime() - startTime;
    increment(Counter.RENAME);
    increment(Counter.RENAME_TIME, duration);
    return true;
}

From source file:com.google.cloud.hadoop.fs.gcs.GoogleHadoopGlobalRootedFileSystem.java

License:Open Source License

@Override
public URI getGcsPath(Path hadoopPath) {
    LOG.debug("GHFS.getGcsPath: {}", hadoopPath);

    // Convert to fully qualified absolute path; the Path object will callback to get our current
    // workingDirectory as part of fully resolving the path.
    Path resolvedPath = hadoopPath.makeQualified(this);

    // Handle root.
    if (resolvedPath.equals(getFileSystemRoot())) {
        return GoogleCloudStorageFileSystem.GCS_ROOT;
    }/*  ww w.jav a  2  s . c  o m*/

    // Need to convert scheme to GCS scheme and possibly move bucket into authority
    String authorityString = null;
    if (!Strings.isNullOrEmpty(resolvedPath.toUri().getAuthority())) {
        authorityString = "/" + resolvedPath.toUri().getAuthority();
    } else {
        authorityString = "";
    }
    // Construct GCS path uri.
    String path = GoogleCloudStorageFileSystem.SCHEME + ":/" + authorityString + resolvedPath.toUri().getPath();
    URI gcsPath = null;
    try {
        gcsPath = new URI(path);
    } catch (URISyntaxException e) {
        String msg = String.format("Invalid path: %s", hadoopPath);
        throw new IllegalArgumentException(msg, e);
    }

    LOG.debug("GHFS.getGcsPath: {} -> {}", hadoopPath, gcsPath);
    return gcsPath;
}

From source file:com.ibm.bi.dml.runtime.matrix.CSVReblockMR.java

License:Open Source License

/**
 * Method to find the first (part)file in the order given by <code>fs.listStatus()</code> among all (part)files in <code>inpathPath</code>.
 * /*w w  w.  ja  v a2 s  .c o  m*/
 * @param job
 * @param inputPath
 * @return
 * @throws IOException 
 * @throws FileNotFoundException 
 */
public static String findSmallestFile(JobConf job, String inputPath) throws FileNotFoundException, IOException {

    String smallestFile = null;

    Path p = new Path(inputPath);
    FileSystem fs = p.getFileSystem(job);
    if (!fs.isDirectory(p))
        smallestFile = p.makeQualified(fs).toString();
    else {
        FileStatus[] stats = fs.listStatus(p, hiddenFileFilter);
        if (stats.length == 0)
            smallestFile = "";
        else {
            smallestFile = stats[0].getPath().toString();
            for (int j = 1; j < stats.length; j++) {
                String f = stats[j].getPath().toString();
                if (f.compareTo(smallestFile) < 0)
                    smallestFile = f;
            }
        }
    }
    return smallestFile;
}

From source file:com.ibm.bi.dml.runtime.matrix.CSVReblockMR.java

License:Open Source License

public static JobReturn runJob(MRJobInstruction inst, String[] inputs, InputInfo[] inputInfos, long[] rlens,
        long[] clens, int[] brlens, int[] bclens, String reblockInstructions, String otherInstructionsInReducer,
        int numReducers, int replication, byte[] resultIndexes, String[] outputs, OutputInfo[] outputInfos)
        throws Exception {
    String[] smallestFiles = new String[inputs.length];
    JobConf job = new JobConf();
    for (int i = 0; i < inputs.length; i++) {
        smallestFiles[i] = findSmallestFile(job, inputs[i]);
    }/*from  ww w  .j a v  a2  s  . co  m*/

    for (int i = 0; i < inputs.length; i++) {
        Path p = new Path(inputs[i]);
        FileSystem fs = p.getFileSystem(job);
        if (!fs.isDirectory(p))
            smallestFiles[i] = p.makeQualified(fs).toString();
        else {
            FileStatus[] stats = fs.listStatus(p, hiddenFileFilter);
            if (stats.length == 0)
                smallestFiles[i] = "";
            else {
                smallestFiles[i] = stats[0].getPath().toString();
                for (int j = 1; j < stats.length; j++) {
                    String f = stats[j].getPath().toString();
                    if (f.compareTo(smallestFiles[i]) < 0)
                        smallestFiles[i] = f;
                }
            }
        }
    }

    AssignRowIDMRReturn ret1 = CSVReblockMR.runAssignRowIDMRJob(inputs, inputInfos, brlens, bclens,
            reblockInstructions, replication, smallestFiles);
    for (int i = 0; i < rlens.length; i++)
        if ((rlens[i] > 0 && rlens[i] != ret1.rlens[i]) || (clens[i] > 0 && clens[i] != ret1.clens[i]))
            throw new RuntimeException("Dimension doesn't mach for input matrix " + i + ", expected ("
                    + rlens[i] + ", " + clens[i] + ") but real (" + ret1.rlens[i] + ", " + ret1.clens[i] + ")");
    JobReturn ret = CSVReblockMR.runCSVReblockJob(null, inputs, inputInfos, ret1.rlens, ret1.clens, brlens,
            bclens, reblockInstructions, otherInstructionsInReducer, numReducers, replication, resultIndexes,
            outputs, outputInfos, ret1.counterFile, smallestFiles);
    return ret;
}

From source file:com.ibm.bi.dml.runtime.matrix.SortMR.java

License:Open Source License

@SuppressWarnings({ "unchecked", "rawtypes" })
public static JobReturn runJob(MRJobInstruction inst, String input, InputInfo inputInfo, long rlen, long clen,
        int brlen, int bclen, String combineInst, String sortInst, int numReducers, int replication,
        String output, OutputInfo outputInfo, boolean valueIsWeight) throws Exception {
    boolean sortIndexes = getSortInstructionType(sortInst) == SortKeys.OperationTypes.Indexes;
    String tmpOutput = sortIndexes ? MRJobConfiguration.constructTempOutputFilename() : output;

    JobConf job = new JobConf(SortMR.class);
    job.setJobName("SortMR");

    //setup partition file
    String pfname = MRJobConfiguration.setUpSortPartitionFilename(job);
    Path partitionFile = new Path(pfname);
    URI partitionUri = new URI(partitionFile.toString());

    //setup input/output paths
    Path inputDir = new Path(input);
    inputDir = inputDir.makeQualified(inputDir.getFileSystem(job));
    SamplingSortMRInputFormat.setInputPaths(job, inputDir);
    Path outpath = new Path(tmpOutput);
    FileOutputFormat.setOutputPath(job, outpath);
    MapReduceTool.deleteFileIfExistOnHDFS(outpath, job);

    //set number of reducers (1 if local mode)
    if (InfrastructureAnalyzer.isLocalMode(job))
        job.setNumReduceTasks(1);/*from www .  ja  v  a2  s.  c  o  m*/
    else
        MRJobConfiguration.setNumReducers(job, numReducers, numReducers);

    //setup input/output format
    job.setInputFormat(SamplingSortMRInputFormat.class);
    SamplingSortMRInputFormat.setTargetKeyValueClasses(job,
            (Class<? extends WritableComparable>) outputInfo.outputKeyClass, outputInfo.outputValueClass);

    //setup instructions and meta information
    if (combineInst != null && !combineInst.trim().isEmpty())
        job.set(COMBINE_INSTRUCTION, combineInst);
    job.set(SORT_INSTRUCTION, sortInst);
    job.setBoolean(VALUE_IS_WEIGHT, valueIsWeight);
    boolean desc = getSortInstructionDescending(sortInst);
    job.setBoolean(SORT_DECREASING, desc);
    MRJobConfiguration.setBlockSize(job, (byte) 0, brlen, bclen);
    MRJobConfiguration.setInputInfo(job, (byte) 0, inputInfo, brlen, bclen, ConvertTarget.CELL);
    int partitionWith0 = SamplingSortMRInputFormat.writePartitionFile(job, partitionFile);

    //setup mapper/reducer/partitioner/output classes
    if (getSortInstructionType(sortInst) == SortKeys.OperationTypes.Indexes) {
        MRJobConfiguration.setInputInfo(job, (byte) 0, inputInfo, brlen, bclen, ConvertTarget.CELL);
        job.setOutputFormat(OutputInfo.BinaryBlockOutputInfo.outputFormatClass);
        job.setMapperClass(IndexSortMapper.class);
        job.setReducerClass(IndexSortReducer.class);
        job.setMapOutputKeyClass(!desc ? IndexSortComparable.class : IndexSortComparableDesc.class);
        job.setMapOutputValueClass(LongWritable.class);
        job.setOutputKeyClass(MatrixIndexes.class);
        job.setOutputValueClass(MatrixBlock.class);
    } else { //default case: SORT w/wo weights
        MRJobConfiguration.setInputInfo(job, (byte) 0, inputInfo, brlen, bclen, ConvertTarget.CELL);
        job.setOutputFormat(CompactOutputFormat.class);
        job.setMapperClass(ValueSortMapper.class);
        job.setReducerClass(ValueSortReducer.class);
        job.setOutputKeyClass(outputInfo.outputKeyClass); //double
        job.setOutputValueClass(outputInfo.outputValueClass); //int
    }
    job.setPartitionerClass(TotalOrderPartitioner.class);

    //setup distributed cache
    DistributedCache.addCacheFile(partitionUri, job);
    DistributedCache.createSymlink(job);

    //setup replication factor
    job.setInt("dfs.replication", replication);

    MatrixCharacteristics[] s = new MatrixCharacteristics[1];
    s[0] = new MatrixCharacteristics(rlen, clen, brlen, bclen);

    // Print the complete instruction
    if (LOG.isTraceEnabled())
        inst.printCompleteMRJobInstruction(s);

    //set unique working dir
    MRJobConfiguration.setUniqueWorkingDir(job);

    //run mr job
    RunningJob runjob = JobClient.runJob(job);
    Group group = runjob.getCounters().getGroup(NUM_VALUES_PREFIX);
    numReducers = job.getNumReduceTasks();

    //process final meta data
    long[] counts = new long[numReducers];
    long total = 0;
    for (int i = 0; i < numReducers; i++) {
        counts[i] = group.getCounter(Integer.toString(i));
        total += counts[i];
    }

    //add missing 0s back to the results
    long missing0s = 0;
    if (total < rlen * clen) {
        if (partitionWith0 < 0)
            throw new RuntimeException("no partition contains 0, which is wrong!");
        missing0s = rlen * clen - total;
        counts[partitionWith0] += missing0s;
    } else
        partitionWith0 = -1;

    if (sortIndexes) {
        //run builtin job for shifting partially sorted blocks according to global offsets
        //we do this in this custom form since it would not fit into the current structure
        //of systemml to output two intermediates (partially sorted data, offsets) out of a 
        //single SortKeys lop
        boolean success = runjob.isSuccessful();
        if (success) {
            success = runStitchupJob(tmpOutput, rlen, clen, brlen, bclen, counts, numReducers, replication,
                    output);
        }
        MapReduceTool.deleteFileIfExistOnHDFS(tmpOutput);
        MapReduceTool.deleteFileIfExistOnHDFS(pfname);
        return new JobReturn(s[0], OutputInfo.BinaryBlockOutputInfo, success);
    } else {
        MapReduceTool.deleteFileIfExistOnHDFS(pfname);
        return new JobReturn(s[0], counts, partitionWith0, missing0s, runjob.isSuccessful());
    }
}

From source file:com.ibm.bi.dml.yarn.DMLYarnClient.java

License:Open Source License

/**
 *    //from w w w . java 2 s . c  o  m
 * @param appId
 * @throws ParseException
 * @throws IOException
 * @throws DMLRuntimeException
 * @throws InterruptedException 
 */
@SuppressWarnings("deprecation")
private void copyResourcesToHdfsWorkingDir(YarnConfiguration yconf, String hdfsWD)
        throws ParseException, IOException, DMLRuntimeException, InterruptedException {
    FileSystem fs = FileSystem.get(yconf);

    //create working directory
    MapReduceTool.createDirIfNotExistOnHDFS(hdfsWD, DMLConfig.DEFAULT_SHARED_DIR_PERMISSION);

    //serialize the dml config to HDFS file 
    //NOTE: we do not modify and ship the absolute scratch space path of the current user
    //because this might result in permission issues if the app master is run with a different user
    //(runtime plan migration during resource reoptimizations now needs to use qualified names
    //for shipping/reading intermediates) TODO modify resource reoptimizer on prototype integration.
    Path confPath = new Path(hdfsWD, DML_CONFIG_NAME);
    FSDataOutputStream fout = fs.create(confPath, true);
    //_dmlConfig.makeQualifiedScratchSpacePath(); 
    fout.writeBytes(_dmlConfig.serializeDMLConfig() + "\n");
    fout.close();
    _hdfsDMLConfig = confPath.makeQualified(fs).toString();
    LOG.debug("DML config written to HDFS file: " + _hdfsDMLConfig + "");

    //serialize the dml script to HDFS file
    Path scriptPath = new Path(hdfsWD, DML_SCRIPT_NAME);
    FSDataOutputStream fout2 = fs.create(scriptPath, true);
    fout2.writeBytes(_dmlScript);
    fout2.close();
    _hdfsDMLScript = scriptPath.makeQualified(fs).toString();
    LOG.debug("DML script written to HDFS file: " + _hdfsDMLScript + "");

    // copy local jar file to HDFS (try to get the original jar filename)
    String fname = getLocalJarFileNameFromEnvConst();
    if (fname == null) {
        //get location of unpacked jar classes and repackage (if required)
        String lclassFile = DMLYarnClient.class.getProtectionDomain().getCodeSource().getLocation().getPath()
                .toString();
        File flclassFile = new File(lclassFile);
        if (!flclassFile.isDirectory()) //called w/ jar 
            fname = lclassFile;
        else //called w/ unpacked jar (need to be repackaged)   
            fname = createJar(lclassFile);
    }
    Path srcPath = new Path(fname);
    Path dstPath = new Path(hdfsWD, srcPath.getName());
    FileUtil.copy(FileSystem.getLocal(yconf), srcPath, fs, dstPath, false, true, yconf);
    _hdfsJarFile = dstPath.makeQualified(fs).toString();
    LOG.debug(
            "Jar file copied from local file: " + srcPath.toString() + " to HDFS file: " + dstPath.toString());
}

From source file:com.ibm.jaql.io.hadoop.FileOutputConfigurator.java

License:Apache License

public void setSequential(JobConf conf) throws Exception {
    registerSerializers(conf);/*  w ww.  j  av  a2 s . co m*/

    // For an expression, the location is the final file name
    Path outPath = new Path(location);
    FileSystem fs = outPath.getFileSystem(conf);
    outPath = outPath.makeQualified(fs);
    if (fs.exists(outPath)) {
        // TODO: Jaql currently has overwrite semantics; add flag to control this
        if (fs.isFile(outPath)) {
            fs.delete(outPath, false);
        } else {
            // Look for a map-reduce output directory
            FileStatus[] nonMR = fs.listStatus(outPath, new PathFilter() {
                boolean onlyOne = true;

                public boolean accept(Path path) {
                    String name = path.getName();
                    if (name.matches("([.][.]?)|([.]part-[0-9]+.crc)|(part-[0-9]+)")) {
                        return false;
                    }
                    if (onlyOne) {
                        onlyOne = false;
                        return true;
                    }
                    return false;
                }
            });
            if (nonMR.length > 0) {
                throw new IOException(
                        "directory exists and is not a map-reduce output directory: " + nonMR[0].getPath());
            }
            fs.delete(outPath, true);
        }
    }

    // In sequential mode, we will write directly to the output file
    // and bypass the _temporary directory and rename of the standard 
    // FileOutputCommitter by using our own DirectFileOutputCommitter.
    FileOutputFormat.setOutputPath(conf, outPath.getParent());
    conf.setClass("mapred.output.committer.class", DirectFileOutputCommiter.class, OutputCommitter.class);
}