Example usage for org.apache.hadoop.fs FileSystem makeQualified

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem makeQualified.

Prototype

public Path makeQualified(Path path)

Source Link

Document

Qualify a path to one which uses this FileSystem and, if relative, made absolute.

Usage

From source file:PiEstimator.java

License:Apache License

/**
 * Run a map/reduce job for estimating Pi.
 *
 * @return the estimated value of Pi//from   www  . j  ava2 s . co  m
 */
public static BigDecimal estimate(int numMaps, long numPoints, JobConf jobConf) throws IOException {
    // setup job conf
    jobConf.setJobName(PiEstimator.class.getSimpleName());

    jobConf.setInputFormat(SequenceFileInputFormat.class);

    jobConf.setOutputKeyClass(BooleanWritable.class);
    jobConf.setOutputValueClass(LongWritable.class);
    jobConf.setOutputFormat(SequenceFileOutputFormat.class);

    jobConf.setMapperClass(PiMapper.class);
    jobConf.setNumMapTasks(numMaps);

    jobConf.setReducerClass(PiReducer.class);
    jobConf.setNumReduceTasks(1);

    // turn off speculative execution, because DFS doesn't handle
    // multiple writers to the same file.
    jobConf.setSpeculativeExecution(false);

    // setup input/output directories
    final Path inDir = new Path(TMP_DIR, "in");
    final Path outDir = new Path(TMP_DIR, "out");
    FileInputFormat.setInputPaths(jobConf, inDir);
    FileOutputFormat.setOutputPath(jobConf, outDir);

    final FileSystem fs = FileSystem.get(jobConf);
    if (fs.exists(TMP_DIR)) {
        throw new IOException(
                "Tmp directory " + fs.makeQualified(TMP_DIR) + " already exists.  Please remove it first.");
    }
    if (!fs.mkdirs(inDir)) {
        throw new IOException("Cannot create input directory " + inDir);
    }

    try {
        // generate an input file for each map task
        for (int i = 0; i < numMaps; ++i) {
            final Path file = new Path(inDir, "part" + i);
            final LongWritable offset = new LongWritable(i * numPoints);
            final LongWritable size = new LongWritable(numPoints);
            final SequenceFile.Writer writer = SequenceFile.createWriter(fs, jobConf, file, LongWritable.class,
                    LongWritable.class, CompressionType.NONE);
            try {
                writer.append(offset, size);
            } finally {
                writer.close();
            }
            System.out.println("Wrote input for Map #" + i);
        }

        // start a map/reduce job
        System.out.println("Starting Job");
        final long startTime = System.currentTimeMillis();
        JobClient.runJob(jobConf);
        final double duration = (System.currentTimeMillis() - startTime) / 1000.0;
        System.out.println("Job Finished in " + duration + " seconds");

        // read outputs
        Path inFile = new Path(outDir, "reduce-out");
        LongWritable numInside = new LongWritable();
        LongWritable numOutside = new LongWritable();
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, jobConf);
        try {
            reader.next(numInside, numOutside);
        } finally {
            reader.close();
        }

        // compute estimated value
        return BigDecimal.valueOf(4).setScale(20).multiply(BigDecimal.valueOf(numInside.get()))
                .divide(BigDecimal.valueOf(numMaps)).divide(BigDecimal.valueOf(numPoints));
    } finally {
        fs.delete(TMP_DIR, true);
    }
}

From source file:at.illecker.hadoop.rootbeer.examples.matrixmultiplication.DistributedRowMatrix.java

License:Apache License

@Override
public void setConf(Configuration conf) {
    this.conf = conf;
    try {// w  w w . ja v  a  2 s  .com
        FileSystem fs = FileSystem.get(inputPath.toUri(), conf);
        rowPath = fs.makeQualified(inputPath);
        outputTmpBasePath = fs.makeQualified(outputTmpPath);
        keepTempFiles = conf.getBoolean(KEEP_TEMP_FILES, false);
    } catch (IOException ioe) {
        throw new IllegalStateException(ioe);
    }
}

From source file:at.illecker.hama.hybrid.examples.matrixmultiplication.util.DistributedRowMatrix.java

License:Apache License

@Override
public void setConf(Configuration conf) {
    this.conf = conf;
    try {//ww  w. j  a  va2  s  .  co  m
        FileSystem fs = FileSystem.get(inputPath.toUri(), conf);
        rowPath = fs.makeQualified(inputPath);
        outputTmpBasePath = fs.makeQualified(outputTmpPath);
    } catch (IOException ioe) {
        throw new IllegalStateException(ioe);
    }
}

From source file:cascading.flow.hadoop.util.HadoopUtil.java

License:Open Source License

public static void resolvePaths(Configuration config, Collection<String> classpath, String remoteRoot,
        String resourceSubPath, Map<String, Path> localPaths, Map<String, Path> remotePaths) {
    FileSystem defaultFS = getDefaultFS(config);
    FileSystem localFS = getLocalFS(config);

    Path remoteRootPath = new Path(remoteRoot == null ? "./.staging" : remoteRoot);

    if (resourceSubPath != null)
        remoteRootPath = new Path(remoteRootPath, resourceSubPath);

    remoteRootPath = defaultFS.makeQualified(remoteRootPath);

    boolean defaultIsLocal = defaultFS.equals(localFS);

    for (String stringPath : classpath) {
        Path path = new Path(stringPath);

        URI uri = path.toUri();/*from  ww  w.  j  a  va2s.  c o m*/

        if (uri.getScheme() == null && !defaultIsLocal) // we want to sync
        {
            Path localPath = localFS.makeQualified(path);

            if (!exists(localFS, localPath))
                throw new FlowException("path not found: " + localPath);

            String name = localPath.getName();

            if (resourceSubPath != null)
                name = resourceSubPath + "/" + name;

            localPaths.put(name, localPath);
            remotePaths.put(name, defaultFS.makeQualified(new Path(remoteRootPath, path.getName())));
        } else if (localFS.equals(getFileSystem(config, path))) {
            if (!exists(localFS, path))
                throw new FlowException("path not found: " + path);

            Path localPath = localFS.makeQualified(path);

            String name = localPath.getName();

            if (resourceSubPath != null)
                name = resourceSubPath + "/" + name;

            localPaths.put(name, localPath);
        } else {
            if (!exists(defaultFS, path))
                throw new FlowException("path not found: " + path);

            Path defaultPath = defaultFS.makeQualified(path);

            String name = defaultPath.getName();

            if (resourceSubPath != null)
                name = resourceSubPath + "/" + name;

            remotePaths.put(name, defaultPath);
        }
    }
}

From source file:cascading.flow.tez.planner.Hadoop2TezFlowStepJob.java

License:Open Source License

private Path prepareEnsureStagingDir(TezConfiguration workingConf) throws IOException {
    String stepStagingPath = createStepStagingPath();

    workingConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stepStagingPath);

    Path stagingDir = new Path(stepStagingPath);
    FileSystem fileSystem = FileSystem.get(workingConf);

    stagingDir = fileSystem.makeQualified(stagingDir);

    TokenCache.obtainTokensForNamenodes(new Credentials(), new Path[] { stagingDir }, workingConf);

    TezClientUtils.ensureStagingDirExists(workingConf, stagingDir);

    if (fileSystem.getScheme().startsWith("file:/"))
        new File(stagingDir.toUri()).mkdirs();

    return stagingDir;
}

From source file:cascading.tap.hive.HiveTap.java

License:Open Source License

@Override
public boolean resourceExists(JobConf conf) throws IOException {
    IMetaStoreClient metaStoreClient = null;
    try {/*  w w  w.  j  a  v a 2s. co  m*/
        metaStoreClient = createMetaStoreClient();
        Table table = metaStoreClient.getTable(tableDescriptor.getDatabaseName(),
                tableDescriptor.getTableName());
        modifiedTime = table.getLastAccessTime();
        // check if the schema matches the table descriptor. If not, throw an exception.
        if (strict) {
            LOG.info("strict mode: comparing existing hive table with table descriptor");
            if (!table.getTableType().equals(tableDescriptor.toHiveTable().getTableType()))
                throw new HiveTableValidationException(
                        String.format("expected a table of type '%s' but found '%s'",
                                tableDescriptor.toHiveTable().getTableType(), table.getTableType()));

            // Check that the paths are the same
            FileSystem fs = FileSystem.get(conf);
            StorageDescriptor sd = table.getSd();
            Path expectedPath = fs.makeQualified(
                    new Path(tableDescriptor.getLocation(hiveConf.getVar(ConfVars.METASTOREWAREHOUSE))));
            Path actualPath = fs.makeQualified(new Path(sd.getLocation()));

            if (!expectedPath.equals(actualPath))
                throw new HiveTableValidationException(
                        String.format("table in MetaStore does not have the sampe path. Expected %s got %s",
                                expectedPath, actualPath));

            List<FieldSchema> schemaList = sd.getCols();
            if (schemaList.size() != tableDescriptor.getColumnNames().length
                    - tableDescriptor.getPartitionKeys().length)
                throw new HiveTableValidationException(String.format(
                        "table in MetaStore does not have same number of columns. expected %d got %d",
                        tableDescriptor.getColumnNames().length - tableDescriptor.getPartitionKeys().length,
                        schemaList.size()));
            for (int index = 0; index < schemaList.size(); index++) {
                FieldSchema schema = schemaList.get(index);
                String expectedColumnName = tableDescriptor.getColumnNames()[index];
                String expectedColumnType = tableDescriptor.getColumnTypes()[index];
                // this could be extended to the StorageDescriptor if necessary.
                if (!schema.getName().equalsIgnoreCase(expectedColumnName))
                    throw new HiveTableValidationException(
                            String.format("hive schema mismatch: expected column name '%s', but found '%s'",
                                    expectedColumnName, schema.getName()));
                if (!schema.getType().equalsIgnoreCase(expectedColumnType))
                    throw new HiveTableValidationException(
                            String.format("hive schema mismatch: expected column type '%s', but found '%s'",
                                    expectedColumnType, schema.getType()));
            }
            List<FieldSchema> schemaPartitions = table.getPartitionKeys();
            if (schemaPartitions.size() != tableDescriptor.getPartitionKeys().length)
                throw new HiveTableValidationException(String.format(
                        "table in MetaStore does not have same number of partition columns. expected %d got %d",
                        tableDescriptor.getPartitionKeys().length, schemaPartitions.size()));
            int offset = tableDescriptor.getColumnNames().length - tableDescriptor.getPartitionKeys().length;
            for (int index = 0; index < schemaPartitions.size(); index++) {
                FieldSchema schema = schemaPartitions.get(index);
                String expectedColumnName = tableDescriptor.getColumnNames()[index + offset];
                String expectedColumnType = tableDescriptor.getColumnTypes()[index + offset];
                // this could be extended to the StorageDescriptor if necessary.
                if (!schema.getName().equalsIgnoreCase(expectedColumnName))
                    throw new HiveTableValidationException(String.format(
                            "hive partition schema mismatch: expected column name '%s', but found '%s'",
                            expectedColumnName, schema.getName()));
                if (!schema.getType().equalsIgnoreCase(expectedColumnType))
                    throw new HiveTableValidationException(String.format(
                            "hive partition schema mismatch: expected column type '%s', but found '%s'",
                            expectedColumnType, schema.getType()));
            }
        }
        return true;
    } catch (MetaException exception) {
        throw new IOException(exception);
    } catch (NoSuchObjectException exception) {
        return false;
    } catch (TException exception) {
        throw new IOException(exception);
    } finally {
        if (metaStoreClient != null)
            metaStoreClient.close();
    }
}

From source file:cn.jpush.hdfs.mr.example.BaileyBorweinPlouffe.java

License:Apache License

/** Run a map/reduce job to compute Pi. */
private static void compute(int startDigit, int nDigits, int nMaps, String workingDir, Configuration conf,
        PrintStream out) throws IOException {
    final String name = startDigit + "_" + nDigits;

    // setup wroking directory
    out.println("Working Directory = " + workingDir);
    out.println();/*from  w  w  w . j  av a2  s  . co m*/
    // final FileSystem fs = FileSystem.get(conf);// ?
    final FileSystem fs = new Path(workingDir, "part-r-00000").getFileSystem(conf);// ?
    final Path dir = fs.makeQualified(new Path(workingDir));
    if (fs.exists(dir)) {
        throw new IOException("Working directory " + dir + " already exists.  Please remove it first.");
    } else if (!fs.mkdirs(dir)) {
        throw new IOException("Cannot create working directory " + dir);
    }

    out.println("Start Digit      = " + startDigit);
    out.println("Number of Digits = " + nDigits);
    out.println("Number of Maps   = " + nMaps);

    // setup a job
    final Job job = createJob(name, conf);
    final Path hexfile = new Path(dir, "pi_" + name + ".hex");
    FileOutputFormat.setOutputPath(job, new Path(dir, "out"));

    // setup custom properties
    job.getConfiguration().set(WORKING_DIR_PROPERTY, dir.toString());
    job.getConfiguration().set(HEX_FILE_PROPERTY, hexfile.toString());

    job.getConfiguration().setInt(DIGIT_START_PROPERTY, startDigit);
    job.getConfiguration().setInt(DIGIT_SIZE_PROPERTY, nDigits);
    job.getConfiguration().setInt(DIGIT_PARTS_PROPERTY, nMaps);

    // start a map/reduce job
    out.println("\nStarting Job ...");
    final long startTime = System.currentTimeMillis();
    try {
        if (!job.waitForCompletion(true)) {
            out.println("Job failed.");
            System.exit(1);
        }
    } catch (Exception e) {
        throw new RuntimeException(e);
    } finally {
        final double duration = (System.currentTimeMillis() - startTime) / 1000.0;
        out.println("Duration is " + duration + " seconds.");
    }
    out.println("Output file: " + hexfile);
}

From source file:co.cask.cdap.internal.app.runtime.batch.distributed.MapReduceContainerHelper.java

License:Apache License

/**
 * Gets the MapReduce framework URI based on the {@code mapreduce.application.framework.path} setting.
 *
 * @param hConf the job configuration/*  ww  w. ja v a2 s.  com*/
 * @return the framework URI or {@code null} if not present or if the URI in the config is invalid.
 */
@Nullable
public static URI getFrameworkURI(Configuration hConf) {
    String framework = hConf.get(MRJobConfig.MAPREDUCE_APPLICATION_FRAMEWORK_PATH);
    if (framework == null) {
        return null;
    }

    try {
        // Parse the path. It can contains '#' to represent the localized file name
        URI uri = new URI(framework);
        String linkName = uri.getFragment();

        // The following resolution logic is copied from JobSubmitter in MR.
        FileSystem fs = FileSystem.get(hConf);
        Path frameworkPath = fs.makeQualified(new Path(uri.getScheme(), uri.getAuthority(), uri.getPath()));
        FileContext fc = FileContext.getFileContext(frameworkPath.toUri(), hConf);
        frameworkPath = fc.resolvePath(frameworkPath);
        uri = frameworkPath.toUri();

        // If doesn't have localized name (in the URI fragment), then use the last part of the URI path as name
        if (linkName == null) {
            linkName = uri.getPath();
            int idx = linkName.lastIndexOf('/');
            if (idx >= 0) {
                linkName = linkName.substring(idx + 1);
            }
        }
        return new URI(uri.getScheme(), uri.getAuthority(), uri.getPath(), null, linkName);
    } catch (URISyntaxException e) {
        LOG.warn("Failed to parse {} as a URI. MapReduce framework path is not used. Check the setting for {}.",
                framework, MRJobConfig.MAPREDUCE_APPLICATION_FRAMEWORK_PATH, e);
    } catch (IOException e) {
        LOG.warn("Failed to resolve {} URI. MapReduce framework path is not used. Check the setting for {}.",
                framework, MRJobConfig.MAPREDUCE_APPLICATION_FRAMEWORK_PATH, e);
    }
    return null;
}

From source file:co.cask.hydrator.action.ftp.FTPCopyAction.java

License:Apache License

@Override
public void run(ActionContext context) throws Exception {
    Path destination = new Path(config.getDestDirectory());
    FileSystem fileSystem = FileSystem.get(new Configuration());
    destination = fileSystem.makeQualified(destination);
    if (!fileSystem.exists(destination)) {
        fileSystem.mkdirs(destination);//  w w  w . j ava2  s .c o  m
    }

    FTPClient ftp;
    if ("ftp".equals(config.getProtocol().toLowerCase())) {
        ftp = new FTPClient();
    } else {
        ftp = new FTPSClient();
    }
    ftp.setControlKeepAliveTimeout(5);
    // UNIX type server
    FTPClientConfig ftpConfig = new FTPClientConfig();
    // Set additional parameters required for the ftp
    // for example config.setServerTimeZoneId("Pacific/Pitcairn")
    ftp.configure(ftpConfig);
    try {
        ftp.connect(config.getHost(), config.getPort());
        ftp.enterLocalPassiveMode();
        String replyString = ftp.getReplyString();
        LOG.info("Connected to server {} and port {} with reply from connect as {}.", config.getHost(),
                config.getPort(), replyString);

        // Check the reply code for actual success
        int replyCode = ftp.getReplyCode();

        if (!FTPReply.isPositiveCompletion(replyCode)) {
            ftp.disconnect();
            throw new RuntimeException(String.format("FTP server refused connection with code %s and reply %s.",
                    replyCode, replyString));
        }

        if (!ftp.login(config.getUserName(), config.getPassword())) {
            LOG.error("login command reply code {}, {}", ftp.getReplyCode(), ftp.getReplyString());
            ftp.logout();
            throw new RuntimeException(String.format(
                    "Login to the FTP server %s and port %s failed. " + "Please check user name and password.",
                    config.getHost(), config.getPort()));
        }

        FTPFile[] ftpFiles = ftp.listFiles(config.getSrcDirectory());
        LOG.info("listFiles command reply code: {}, {}.", ftp.getReplyCode(), ftp.getReplyString());
        // Check the reply code for listFiles call.
        // If its "522 Data connections must be encrypted" then it means data channel also need to be encrypted
        if (ftp.getReplyCode() == 522 && "sftp".equalsIgnoreCase(config.getProtocol())) {
            // encrypt data channel and listFiles again
            ((FTPSClient) ftp).execPROT("P");
            LOG.info("Attempting command listFiles on encrypted data channel.");
            ftpFiles = ftp.listFiles(config.getSrcDirectory());
        }
        for (FTPFile file : ftpFiles) {
            String source = config.getSrcDirectory() + "/" + file.getName();

            LOG.info("Current file {}, source {}", file.getName(), source);
            if (config.getExtractZipFiles() && file.getName().endsWith(".zip")) {
                copyZip(ftp, source, fileSystem, destination);
            } else {
                Path destinationPath = fileSystem.makeQualified(new Path(destination, file.getName()));
                LOG.debug("Downloading {} to {}", file.getName(), destinationPath.toString());
                try (OutputStream output = fileSystem.create(destinationPath)) {
                    InputStream is = ftp.retrieveFileStream(source);
                    ByteStreams.copy(is, output);
                }
            }
            if (!ftp.completePendingCommand()) {
                LOG.error("Error completing command.");
            }
        }
        ftp.logout();
    } finally {
        if (ftp.isConnected()) {
            try {
                ftp.disconnect();
            } catch (Throwable e) {
                LOG.error("Failure to disconnect the ftp connection.", e);
            }
        }
    }
}

From source file:co.cask.hydrator.action.ftp.FTPCopyAction.java

License:Apache License

private void copyZip(FTPClient ftp, String source, FileSystem fs, Path destination) throws IOException {
    InputStream is = ftp.retrieveFileStream(source);
    try (ZipInputStream zis = new ZipInputStream(new BufferedInputStream(is))) {
        ZipEntry entry;//from   w  w  w. j  a  v  a  2s. co  m
        while ((entry = zis.getNextEntry()) != null) {
            LOG.debug("Extracting {}", entry);
            Path destinationPath = fs.makeQualified(new Path(destination, entry.getName()));
            try (OutputStream os = fs.create(destinationPath)) {
                LOG.debug("Downloading {} to {}", entry.getName(), destinationPath.toString());
                ByteStreams.copy(zis, os);
            }
        }
    }
}