Example usage for org.apache.hadoop.fs FileSystem makeQualified

List of usage examples for org.apache.hadoop.fs FileSystem makeQualified

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem makeQualified.

Prototype

public Path makeQualified(Path path) 

Source Link

Document

Qualify a path to one which uses this FileSystem and, if relative, made absolute.

Usage

From source file:PiEstimator.java

License:Apache License

/**
 * Run a map/reduce job for estimating Pi.
 *
 * @return the estimated value of Pi//from   www  . j  ava2 s . co  m
 */
public static BigDecimal estimate(int numMaps, long numPoints, JobConf jobConf) throws IOException {
    // setup job conf
    jobConf.setJobName(PiEstimator.class.getSimpleName());

    jobConf.setInputFormat(SequenceFileInputFormat.class);

    jobConf.setOutputKeyClass(BooleanWritable.class);
    jobConf.setOutputValueClass(LongWritable.class);
    jobConf.setOutputFormat(SequenceFileOutputFormat.class);

    jobConf.setMapperClass(PiMapper.class);
    jobConf.setNumMapTasks(numMaps);

    jobConf.setReducerClass(PiReducer.class);
    jobConf.setNumReduceTasks(1);

    // turn off speculative execution, because DFS doesn't handle
    // multiple writers to the same file.
    jobConf.setSpeculativeExecution(false);

    // setup input/output directories
    final Path inDir = new Path(TMP_DIR, "in");
    final Path outDir = new Path(TMP_DIR, "out");
    FileInputFormat.setInputPaths(jobConf, inDir);
    FileOutputFormat.setOutputPath(jobConf, outDir);

    final FileSystem fs = FileSystem.get(jobConf);
    if (fs.exists(TMP_DIR)) {
        throw new IOException(
                "Tmp directory " + fs.makeQualified(TMP_DIR) + " already exists.  Please remove it first.");
    }
    if (!fs.mkdirs(inDir)) {
        throw new IOException("Cannot create input directory " + inDir);
    }

    try {
        // generate an input file for each map task
        for (int i = 0; i < numMaps; ++i) {
            final Path file = new Path(inDir, "part" + i);
            final LongWritable offset = new LongWritable(i * numPoints);
            final LongWritable size = new LongWritable(numPoints);
            final SequenceFile.Writer writer = SequenceFile.createWriter(fs, jobConf, file, LongWritable.class,
                    LongWritable.class, CompressionType.NONE);
            try {
                writer.append(offset, size);
            } finally {
                writer.close();
            }
            System.out.println("Wrote input for Map #" + i);
        }

        // start a map/reduce job
        System.out.println("Starting Job");
        final long startTime = System.currentTimeMillis();
        JobClient.runJob(jobConf);
        final double duration = (System.currentTimeMillis() - startTime) / 1000.0;
        System.out.println("Job Finished in " + duration + " seconds");

        // read outputs
        Path inFile = new Path(outDir, "reduce-out");
        LongWritable numInside = new LongWritable();
        LongWritable numOutside = new LongWritable();
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, jobConf);
        try {
            reader.next(numInside, numOutside);
        } finally {
            reader.close();
        }

        // compute estimated value
        return BigDecimal.valueOf(4).setScale(20).multiply(BigDecimal.valueOf(numInside.get()))
                .divide(BigDecimal.valueOf(numMaps)).divide(BigDecimal.valueOf(numPoints));
    } finally {
        fs.delete(TMP_DIR, true);
    }
}

From source file:at.illecker.hadoop.rootbeer.examples.matrixmultiplication.DistributedRowMatrix.java

License:Apache License

@Override
public void setConf(Configuration conf) {
    this.conf = conf;
    try {// w  w w . ja v  a  2 s  .com
        FileSystem fs = FileSystem.get(inputPath.toUri(), conf);
        rowPath = fs.makeQualified(inputPath);
        outputTmpBasePath = fs.makeQualified(outputTmpPath);
        keepTempFiles = conf.getBoolean(KEEP_TEMP_FILES, false);
    } catch (IOException ioe) {
        throw new IllegalStateException(ioe);
    }
}

From source file:at.illecker.hama.hybrid.examples.matrixmultiplication.util.DistributedRowMatrix.java

License:Apache License

@Override
public void setConf(Configuration conf) {
    this.conf = conf;
    try {//ww  w. j  a  va2  s  .  co  m
        FileSystem fs = FileSystem.get(inputPath.toUri(), conf);
        rowPath = fs.makeQualified(inputPath);
        outputTmpBasePath = fs.makeQualified(outputTmpPath);
    } catch (IOException ioe) {
        throw new IllegalStateException(ioe);
    }
}

From source file:cascading.flow.hadoop.util.HadoopUtil.java

License:Open Source License

public static void resolvePaths(Configuration config, Collection<String> classpath, String remoteRoot,
        String resourceSubPath, Map<String, Path> localPaths, Map<String, Path> remotePaths) {
    FileSystem defaultFS = getDefaultFS(config);
    FileSystem localFS = getLocalFS(config);

    Path remoteRootPath = new Path(remoteRoot == null ? "./.staging" : remoteRoot);

    if (resourceSubPath != null)
        remoteRootPath = new Path(remoteRootPath, resourceSubPath);

    remoteRootPath = defaultFS.makeQualified(remoteRootPath);

    boolean defaultIsLocal = defaultFS.equals(localFS);

    for (String stringPath : classpath) {
        Path path = new Path(stringPath);

        URI uri = path.toUri();/*from  ww  w.  j  a  va2s.  c o m*/

        if (uri.getScheme() == null && !defaultIsLocal) // we want to sync
        {
            Path localPath = localFS.makeQualified(path);

            if (!exists(localFS, localPath))
                throw new FlowException("path not found: " + localPath);

            String name = localPath.getName();

            if (resourceSubPath != null)
                name = resourceSubPath + "/" + name;

            localPaths.put(name, localPath);
            remotePaths.put(name, defaultFS.makeQualified(new Path(remoteRootPath, path.getName())));
        } else if (localFS.equals(getFileSystem(config, path))) {
            if (!exists(localFS, path))
                throw new FlowException("path not found: " + path);

            Path localPath = localFS.makeQualified(path);

            String name = localPath.getName();

            if (resourceSubPath != null)
                name = resourceSubPath + "/" + name;

            localPaths.put(name, localPath);
        } else {
            if (!exists(defaultFS, path))
                throw new FlowException("path not found: " + path);

            Path defaultPath = defaultFS.makeQualified(path);

            String name = defaultPath.getName();

            if (resourceSubPath != null)
                name = resourceSubPath + "/" + name;

            remotePaths.put(name, defaultPath);
        }
    }
}

From source file:cascading.flow.tez.planner.Hadoop2TezFlowStepJob.java

License:Open Source License

private Path prepareEnsureStagingDir(TezConfiguration workingConf) throws IOException {
    String stepStagingPath = createStepStagingPath();

    workingConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stepStagingPath);

    Path stagingDir = new Path(stepStagingPath);
    FileSystem fileSystem = FileSystem.get(workingConf);

    stagingDir = fileSystem.makeQualified(stagingDir);

    TokenCache.obtainTokensForNamenodes(new Credentials(), new Path[] { stagingDir }, workingConf);

    TezClientUtils.ensureStagingDirExists(workingConf, stagingDir);

    if (fileSystem.getScheme().startsWith("file:/"))
        new File(stagingDir.toUri()).mkdirs();

    return stagingDir;
}

From source file:cascading.tap.hive.HiveTap.java

License:Open Source License

@Override
public boolean resourceExists(JobConf conf) throws IOException {
    IMetaStoreClient metaStoreClient = null;
    try {/*  w w  w.  j  a  v a 2s. co  m*/
        metaStoreClient = createMetaStoreClient();
        Table table = metaStoreClient.getTable(tableDescriptor.getDatabaseName(),
                tableDescriptor.getTableName());
        modifiedTime = table.getLastAccessTime();
        // check if the schema matches the table descriptor. If not, throw an exception.
        if (strict) {
            LOG.info("strict mode: comparing existing hive table with table descriptor");
            if (!table.getTableType().equals(tableDescriptor.toHiveTable().getTableType()))
                throw new HiveTableValidationException(
                        String.format("expected a table of type '%s' but found '%s'",
                                tableDescriptor.toHiveTable().getTableType(), table.getTableType()));

            // Check that the paths are the same
            FileSystem fs = FileSystem.get(conf);
            StorageDescriptor sd = table.getSd();
            Path expectedPath = fs.makeQualified(
                    new Path(tableDescriptor.getLocation(hiveConf.getVar(ConfVars.METASTOREWAREHOUSE))));
            Path actualPath = fs.makeQualified(new Path(sd.getLocation()));

            if (!expectedPath.equals(actualPath))
                throw new HiveTableValidationException(
                        String.format("table in MetaStore does not have the sampe path. Expected %s got %s",
                                expectedPath, actualPath));

            List<FieldSchema> schemaList = sd.getCols();
            if (schemaList.size() != tableDescriptor.getColumnNames().length
                    - tableDescriptor.getPartitionKeys().length)
                throw new HiveTableValidationException(String.format(
                        "table in MetaStore does not have same number of columns. expected %d got %d",
                        tableDescriptor.getColumnNames().length - tableDescriptor.getPartitionKeys().length,
                        schemaList.size()));
            for (int index = 0; index < schemaList.size(); index++) {
                FieldSchema schema = schemaList.get(index);
                String expectedColumnName = tableDescriptor.getColumnNames()[index];
                String expectedColumnType = tableDescriptor.getColumnTypes()[index];
                // this could be extended to the StorageDescriptor if necessary.
                if (!schema.getName().equalsIgnoreCase(expectedColumnName))
                    throw new HiveTableValidationException(
                            String.format("hive schema mismatch: expected column name '%s', but found '%s'",
                                    expectedColumnName, schema.getName()));
                if (!schema.getType().equalsIgnoreCase(expectedColumnType))
                    throw new HiveTableValidationException(
                            String.format("hive schema mismatch: expected column type '%s', but found '%s'",
                                    expectedColumnType, schema.getType()));
            }
            List<FieldSchema> schemaPartitions = table.getPartitionKeys();
            if (schemaPartitions.size() != tableDescriptor.getPartitionKeys().length)
                throw new HiveTableValidationException(String.format(
                        "table in MetaStore does not have same number of partition columns. expected %d got %d",
                        tableDescriptor.getPartitionKeys().length, schemaPartitions.size()));
            int offset = tableDescriptor.getColumnNames().length - tableDescriptor.getPartitionKeys().length;
            for (int index = 0; index < schemaPartitions.size(); index++) {
                FieldSchema schema = schemaPartitions.get(index);
                String expectedColumnName = tableDescriptor.getColumnNames()[index + offset];
                String expectedColumnType = tableDescriptor.getColumnTypes()[index + offset];
                // this could be extended to the StorageDescriptor if necessary.
                if (!schema.getName().equalsIgnoreCase(expectedColumnName))
                    throw new HiveTableValidationException(String.format(
                            "hive partition schema mismatch: expected column name '%s', but found '%s'",
                            expectedColumnName, schema.getName()));
                if (!schema.getType().equalsIgnoreCase(expectedColumnType))
                    throw new HiveTableValidationException(String.format(
                            "hive partition schema mismatch: expected column type '%s', but found '%s'",
                            expectedColumnType, schema.getType()));
            }
        }
        return true;
    } catch (MetaException exception) {
        throw new IOException(exception);
    } catch (NoSuchObjectException exception) {
        return false;
    } catch (TException exception) {
        throw new IOException(exception);
    } finally {
        if (metaStoreClient != null)
            metaStoreClient.close();
    }
}

From source file:cn.jpush.hdfs.mr.example.BaileyBorweinPlouffe.java

License:Apache License

/** Run a map/reduce job to compute Pi. */
private static void compute(int startDigit, int nDigits, int nMaps, String workingDir, Configuration conf,
        PrintStream out) throws IOException {
    final String name = startDigit + "_" + nDigits;

    // setup wroking directory
    out.println("Working Directory = " + workingDir);
    out.println();/*from  w  w  w . j  av a2  s  . co m*/
    // final FileSystem fs = FileSystem.get(conf);// ?
    final FileSystem fs = new Path(workingDir, "part-r-00000").getFileSystem(conf);// ?
    final Path dir = fs.makeQualified(new Path(workingDir));
    if (fs.exists(dir)) {
        throw new IOException("Working directory " + dir + " already exists.  Please remove it first.");
    } else if (!fs.mkdirs(dir)) {
        throw new IOException("Cannot create working directory " + dir);
    }

    out.println("Start Digit      = " + startDigit);
    out.println("Number of Digits = " + nDigits);
    out.println("Number of Maps   = " + nMaps);

    // setup a job
    final Job job = createJob(name, conf);
    final Path hexfile = new Path(dir, "pi_" + name + ".hex");
    FileOutputFormat.setOutputPath(job, new Path(dir, "out"));

    // setup custom properties
    job.getConfiguration().set(WORKING_DIR_PROPERTY, dir.toString());
    job.getConfiguration().set(HEX_FILE_PROPERTY, hexfile.toString());

    job.getConfiguration().setInt(DIGIT_START_PROPERTY, startDigit);
    job.getConfiguration().setInt(DIGIT_SIZE_PROPERTY, nDigits);
    job.getConfiguration().setInt(DIGIT_PARTS_PROPERTY, nMaps);

    // start a map/reduce job
    out.println("\nStarting Job ...");
    final long startTime = System.currentTimeMillis();
    try {
        if (!job.waitForCompletion(true)) {
            out.println("Job failed.");
            System.exit(1);
        }
    } catch (Exception e) {
        throw new RuntimeException(e);
    } finally {
        final double duration = (System.currentTimeMillis() - startTime) / 1000.0;
        out.println("Duration is " + duration + " seconds.");
    }
    out.println("Output file: " + hexfile);
}

From source file:co.cask.cdap.internal.app.runtime.batch.distributed.MapReduceContainerHelper.java

License:Apache License

/**
 * Gets the MapReduce framework URI based on the {@code mapreduce.application.framework.path} setting.
 *
 * @param hConf the job configuration/*  ww  w. ja v a2 s.  com*/
 * @return the framework URI or {@code null} if not present or if the URI in the config is invalid.
 */
@Nullable
public static URI getFrameworkURI(Configuration hConf) {
    String framework = hConf.get(MRJobConfig.MAPREDUCE_APPLICATION_FRAMEWORK_PATH);
    if (framework == null) {
        return null;
    }

    try {
        // Parse the path. It can contains '#' to represent the localized file name
        URI uri = new URI(framework);
        String linkName = uri.getFragment();

        // The following resolution logic is copied from JobSubmitter in MR.
        FileSystem fs = FileSystem.get(hConf);
        Path frameworkPath = fs.makeQualified(new Path(uri.getScheme(), uri.getAuthority(), uri.getPath()));
        FileContext fc = FileContext.getFileContext(frameworkPath.toUri(), hConf);
        frameworkPath = fc.resolvePath(frameworkPath);
        uri = frameworkPath.toUri();

        // If doesn't have localized name (in the URI fragment), then use the last part of the URI path as name
        if (linkName == null) {
            linkName = uri.getPath();
            int idx = linkName.lastIndexOf('/');
            if (idx >= 0) {
                linkName = linkName.substring(idx + 1);
            }
        }
        return new URI(uri.getScheme(), uri.getAuthority(), uri.getPath(), null, linkName);
    } catch (URISyntaxException e) {
        LOG.warn("Failed to parse {} as a URI. MapReduce framework path is not used. Check the setting for {}.",
                framework, MRJobConfig.MAPREDUCE_APPLICATION_FRAMEWORK_PATH, e);
    } catch (IOException e) {
        LOG.warn("Failed to resolve {} URI. MapReduce framework path is not used. Check the setting for {}.",
                framework, MRJobConfig.MAPREDUCE_APPLICATION_FRAMEWORK_PATH, e);
    }
    return null;
}

From source file:co.cask.hydrator.action.ftp.FTPCopyAction.java

License:Apache License

@Override
public void run(ActionContext context) throws Exception {
    Path destination = new Path(config.getDestDirectory());
    FileSystem fileSystem = FileSystem.get(new Configuration());
    destination = fileSystem.makeQualified(destination);
    if (!fileSystem.exists(destination)) {
        fileSystem.mkdirs(destination);//  w w  w . j ava2  s .c o  m
    }

    FTPClient ftp;
    if ("ftp".equals(config.getProtocol().toLowerCase())) {
        ftp = new FTPClient();
    } else {
        ftp = new FTPSClient();
    }
    ftp.setControlKeepAliveTimeout(5);
    // UNIX type server
    FTPClientConfig ftpConfig = new FTPClientConfig();
    // Set additional parameters required for the ftp
    // for example config.setServerTimeZoneId("Pacific/Pitcairn")
    ftp.configure(ftpConfig);
    try {
        ftp.connect(config.getHost(), config.getPort());
        ftp.enterLocalPassiveMode();
        String replyString = ftp.getReplyString();
        LOG.info("Connected to server {} and port {} with reply from connect as {}.", config.getHost(),
                config.getPort(), replyString);

        // Check the reply code for actual success
        int replyCode = ftp.getReplyCode();

        if (!FTPReply.isPositiveCompletion(replyCode)) {
            ftp.disconnect();
            throw new RuntimeException(String.format("FTP server refused connection with code %s and reply %s.",
                    replyCode, replyString));
        }

        if (!ftp.login(config.getUserName(), config.getPassword())) {
            LOG.error("login command reply code {}, {}", ftp.getReplyCode(), ftp.getReplyString());
            ftp.logout();
            throw new RuntimeException(String.format(
                    "Login to the FTP server %s and port %s failed. " + "Please check user name and password.",
                    config.getHost(), config.getPort()));
        }

        FTPFile[] ftpFiles = ftp.listFiles(config.getSrcDirectory());
        LOG.info("listFiles command reply code: {}, {}.", ftp.getReplyCode(), ftp.getReplyString());
        // Check the reply code for listFiles call.
        // If its "522 Data connections must be encrypted" then it means data channel also need to be encrypted
        if (ftp.getReplyCode() == 522 && "sftp".equalsIgnoreCase(config.getProtocol())) {
            // encrypt data channel and listFiles again
            ((FTPSClient) ftp).execPROT("P");
            LOG.info("Attempting command listFiles on encrypted data channel.");
            ftpFiles = ftp.listFiles(config.getSrcDirectory());
        }
        for (FTPFile file : ftpFiles) {
            String source = config.getSrcDirectory() + "/" + file.getName();

            LOG.info("Current file {}, source {}", file.getName(), source);
            if (config.getExtractZipFiles() && file.getName().endsWith(".zip")) {
                copyZip(ftp, source, fileSystem, destination);
            } else {
                Path destinationPath = fileSystem.makeQualified(new Path(destination, file.getName()));
                LOG.debug("Downloading {} to {}", file.getName(), destinationPath.toString());
                try (OutputStream output = fileSystem.create(destinationPath)) {
                    InputStream is = ftp.retrieveFileStream(source);
                    ByteStreams.copy(is, output);
                }
            }
            if (!ftp.completePendingCommand()) {
                LOG.error("Error completing command.");
            }
        }
        ftp.logout();
    } finally {
        if (ftp.isConnected()) {
            try {
                ftp.disconnect();
            } catch (Throwable e) {
                LOG.error("Failure to disconnect the ftp connection.", e);
            }
        }
    }
}

From source file:co.cask.hydrator.action.ftp.FTPCopyAction.java

License:Apache License

private void copyZip(FTPClient ftp, String source, FileSystem fs, Path destination) throws IOException {
    InputStream is = ftp.retrieveFileStream(source);
    try (ZipInputStream zis = new ZipInputStream(new BufferedInputStream(is))) {
        ZipEntry entry;//from   w  w  w. j  a  v  a  2s. co  m
        while ((entry = zis.getNextEntry()) != null) {
            LOG.debug("Extracting {}", entry);
            Path destinationPath = fs.makeQualified(new Path(destination, entry.getName()));
            try (OutputStream os = fs.create(destinationPath)) {
                LOG.debug("Downloading {} to {}", entry.getName(), destinationPath.toString());
                ByteStreams.copy(zis, os);
            }
        }
    }
}