Example usage for org.apache.hadoop.fs FileSystem makeQualified

List of usage examples for org.apache.hadoop.fs FileSystem makeQualified

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem makeQualified.

Prototype

public Path makeQualified(Path path) 

Source Link

Document

Qualify a path to one which uses this FileSystem and, if relative, made absolute.

Usage

From source file:gobblin.data.management.trash.TrashFactoryTest.java

License:Apache License

@Test
public void test() throws IOException {
    FileSystem fs = mock(FileSystem.class);

    Path homeDirectory = new Path("/home/directory");
    Path trashDirectory = new Path(homeDirectory, Trash.DEFAULT_TRASH_DIRECTORY);
    Path trashIdentifierFile = new Path(trashDirectory, Trash.TRASH_IDENTIFIER_FILE);

    when(fs.getHomeDirectory()).thenReturn(homeDirectory);
    when(fs.exists(trashDirectory)).thenReturn(true);
    when(fs.exists(trashIdentifierFile)).thenReturn(true);
    when(fs.listStatus(trashDirectory)).thenReturn(new FileStatus[] {});
    when(fs.isDirectory(trashDirectory)).thenReturn(true);

    when(fs.mkdirs(any(Path.class))).thenReturn(true);
    when(fs.mkdirs(any(Path.class), any(FsPermission.class))).thenReturn(true);
    when(fs.createNewFile(any(Path.class))).thenReturn(true);
    when(fs.makeQualified(any(Path.class))).thenAnswer(new Answer<Path>() {
        @Override/* w  w  w.  j  av a  2s  .co m*/
        public Path answer(InvocationOnMock invocation) throws Throwable {
            return (Path) invocation.getArguments()[0];
        }
    });

    Properties properties;

    properties = getBaseProperties(trashDirectory);
    Assert.assertTrue(TrashFactory.createTrash(fs, properties) instanceof Trash);
    Assert.assertTrue(TrashFactory.createProxiedTrash(fs, properties) instanceof ProxiedTrash);

    properties = getBaseProperties(trashDirectory);
    properties.setProperty(TrashFactory.SIMULATE, Boolean.toString(true));
    Assert.assertTrue(TrashFactory.createTrash(fs, properties) instanceof MockTrash);
    Assert.assertTrue(TrashFactory.createProxiedTrash(fs, properties) instanceof MockTrash);

    properties = getBaseProperties(trashDirectory);
    properties.setProperty(TrashFactory.TRASH_TEST, Boolean.toString(true));
    Assert.assertTrue(TrashFactory.createTrash(fs, properties) instanceof TestTrash);
    Assert.assertTrue(TrashFactory.createProxiedTrash(fs, properties) instanceof TestTrash);

    properties = getBaseProperties(trashDirectory);
    properties.setProperty(TrashFactory.SKIP_TRASH, Boolean.toString(true));
    Assert.assertTrue(TrashFactory.createTrash(fs, properties) instanceof ImmediateDeletionTrash);
    Assert.assertTrue(TrashFactory.createProxiedTrash(fs, properties) instanceof ImmediateDeletionTrash);

}

From source file:gobblin.runtime.mapreduce.GobblinWorkUnitsInputFormatTest.java

License:Apache License

@Test
public void testGetSplits() throws Exception {

    URI baseUri = new URI(GobblinWorkUnitsInputFormatTest.class.getSimpleName() + "://testGetSplits");
    Configuration configuration = new Configuration();

    Path workUnitsDir = new Path(new Path(baseUri), "/workUnits");

    FileSystem fs = Mockito.mock(FileSystem.class);
    FileStatus[] statuses = createFileStatuses(20, workUnitsDir);
    Mockito.when(fs.listStatus(workUnitsDir)).thenReturn(statuses);
    Mockito.when(fs.makeQualified(Mockito.any(Path.class))).thenAnswer(new Answer<Path>() {
        @Override/*from   w  w w. j a va2  s .co m*/
        public Path answer(InvocationOnMock invocation) throws Throwable {
            return (Path) invocation.getArguments()[0];
        }
    });

    FileSystemTestUtils.addFileSystemForTest(baseUri, configuration, fs);

    GobblinWorkUnitsInputFormat inputFormat = new GobblinWorkUnitsInputFormat();
    Job job = Job.getInstance(configuration);
    FileInputFormat.addInputPath(job, workUnitsDir);

    List<InputSplit> splits = inputFormat.getSplits(job);

    Assert.assertEquals(splits.size(), 20);
    verifyPaths(splits, statuses);
}

From source file:gobblin.runtime.mapreduce.GobblinWorkUnitsInputFormatTest.java

License:Apache License

@Test
public void testGetSplitsMaxSize() throws Exception {

    URI baseUri = new URI(GobblinWorkUnitsInputFormatTest.class.getSimpleName() + "://testGetSplitsMaxSize");
    Configuration configuration = new Configuration();

    Path workUnitsDir = new Path(new Path(baseUri), "/workUnits");

    FileSystem fs = Mockito.mock(FileSystem.class);
    FileStatus[] statuses = createFileStatuses(20, workUnitsDir);
    Mockito.when(fs.listStatus(workUnitsDir)).thenReturn(statuses);
    Mockito.when(fs.makeQualified(Mockito.any(Path.class))).thenAnswer(new Answer<Path>() {
        @Override/*from w  w  w .  j  a  v  a2  s  .  c o  m*/
        public Path answer(InvocationOnMock invocation) throws Throwable {
            return (Path) invocation.getArguments()[0];
        }
    });

    FileSystemTestUtils.addFileSystemForTest(baseUri, configuration, fs);

    GobblinWorkUnitsInputFormat inputFormat = new GobblinWorkUnitsInputFormat();
    Job job = Job.getInstance(configuration);
    FileInputFormat.addInputPath(job, workUnitsDir);
    GobblinWorkUnitsInputFormat.setMaxMappers(job, 6);

    List<InputSplit> splits = inputFormat.getSplits(job);

    Assert.assertTrue(splits.size() < 6);
    verifyPaths(splits, statuses);
}

From source file:gobblin.test.TestExtractor.java

License:Apache License

public TestExtractor(WorkUnitState workUnitState) {
    //super(workUnitState);
    Schema schema = new Schema.Parser().parse(AVRO_SCHEMA);
    Path sourceFile = new Path(workUnitState.getWorkunit().getProp(TestSource.SOURCE_FILE_KEY));
    LOG.info("Reading from source file " + sourceFile);
    DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(schema);
    try {/*w w  w  .j av a  2 s . c  o m*/
        FileSystem fs = FileSystem.get(
                URI.create(workUnitState.getProp(ConfigurationKeys.FS_URI_KEY, ConfigurationKeys.LOCAL_FS_URI)),
                new Configuration());
        fs.makeQualified(sourceFile);
        this.dataFileReader = new DataFileReader<GenericRecord>(new FsInput(sourceFile, new Configuration()),
                datumReader);
    } catch (IOException ioe) {
        LOG.error("Failed to read the source file " + sourceFile, ioe);
    }
}

From source file:inflater.runner.RunInflater.java

License:MIT License

@Override
public int run(String[] args) throws IOException, ClassNotFoundException, InterruptedException, ParseException {
    if (args.length < 2) {
        return -1;
    }//from w  w  w.  j av a 2s  . co m
    if (conf == null) {
        conf = new Configuration();
    }

    GiraphConfiguration giraphConf = new GiraphConfiguration(getConf());
    giraphConf.addResource(new Path("giraph-site.xml"));

    GiraphJob job = new GiraphJob(giraphConf, giraphConf.getComputationName());

    Path inputLocal = new Path(args[0]);
    Path outputLocal = new Path(args[1]);

    // We copy file from local file system to HDFS
    FileSystem fs = FileSystem.get(giraphConf);
    inputHDFS = new Path(fs.getHomeDirectory(),
            "Giraph Source" + File.separator + inputLocal.getName() + File.separator + inputLocal.getName());
    inputHDFS = fs.makeQualified(inputHDFS);

    outputHDFS = new Path(fs.getHomeDirectory(),
            "Giraph Source" + File.separator + inputLocal.getName() + File.separator + "output");
    outputHDFS = fs.makeQualified(outputHDFS);

    fs.copyFromLocalFile(false, true, inputLocal, inputHDFS);

    // Delete output path because Hadoop cannot override it.
    if (fs.exists(outputHDFS))
        fs.delete(outputHDFS, true);

    FileOutputFormat.setOutputPath(job.getInternalJob(), outputHDFS);
    GiraphFileInputFormat.addVertexInputPath(giraphConf, inputHDFS);
    new GiraphConfigurationValidator<>(giraphConf).validateConfiguration();
    boolean result = job.run(true);
    if (result) {
        fs.copyToLocalFile(false, new Path(outputHDFS, "part-m-00000"), outputLocal);
    }
    return result ? 0 : -1;
}

From source file:io.druid.indexer.JobHelper.java

License:Apache License

public static Path prependFSIfNullScheme(FileSystem fs, Path path) {
    if (path.toUri().getScheme() == null) {
        path = fs.makeQualified(path);
    }//w  w  w .  j a  v  a  2 s  . co  m
    return path;
}

From source file:it.polito.dbdmg.searum.ARM.java

License:Apache License

/**
 * Serializes the header table and returns the string representation of the
 * header table//from   w w w.  j a va  2s  .c  om
 * 
 * @return Serialized String representation of header table
 */
public static void saveFList(Iterable<Pair<String, Long>> flist, Parameters params, Configuration conf)
        throws IOException {
    Path flistPath = new Path(params.get(OUTPUT), HEADER_TABLE);
    FileSystem fs = FileSystem.get(flistPath.toUri(), conf);
    flistPath = fs.makeQualified(flistPath);
    HadoopUtil.delete(conf, flistPath);
    SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, flistPath, Text.class, LongWritable.class);
    try {
        for (Pair<String, Long> pair : flist) {
            writer.append(new Text(pair.getFirst()), new LongWritable(pair.getSecond()));
        }
    } finally {
        writer.close();
    }
    DistributedCache.addCacheFile(flistPath.toUri(), conf);
}

From source file:it.polito.dbdmg.searum.ARM.java

License:Apache License

/**
 * Generates the header table from the serialized string representation
 * /*from   w  w  w  .j a v a 2 s  .  c  o  m*/
 * @return Deserialized header table
 */
public static List<Pair<String, Long>> readFList(Configuration conf) throws IOException {
    List<Pair<String, Long>> list = new ArrayList<Pair<String, Long>>();
    Path[] files = DistributedCache.getLocalCacheFiles(conf);
    if (files == null) {
        throw new IOException("Cannot read Frequency list from Distributed Cache");
    }
    if (files.length != 1) {
        throw new IOException("Cannot read Frequency list from Distributed Cache (" + files.length + ')');
    }
    FileSystem fs = FileSystem.getLocal(conf);
    Path fListLocalPath = fs.makeQualified(files[0]);
    // Fallback if we are running locally.
    if (!fs.exists(fListLocalPath)) {
        URI[] filesURIs = DistributedCache.getCacheFiles(conf);
        if (filesURIs == null) {
            throw new IOException("Cannot read header table from Distributed Cache");
        }
        if (filesURIs.length != 1) {
            throw new IOException("Cannot read header table from Distributed Cache (" + files.length + ')');
        }
        fListLocalPath = new Path(filesURIs[0].getPath());
    }
    for (Pair<Text, LongWritable> record : new SequenceFileIterable<Text, LongWritable>(fListLocalPath, true,
            conf)) {
        list.add(new Pair<String, Long>(record.getFirst().toString(), record.getSecond().get()));
    }
    return list;
}

From source file:ml.shifu.guagua.hadoop.io.GuaguaOptionsParser.java

License:Apache License

/**
 * Modify configuration according user-specified generic options
 * //from w  w  w  . j a v a 2s  .  c o m
 * @param conf
 *            Configuration to be modified
 * @param line
 *            User-specified generic options
 */
private void processGeneralOptions(Configuration conf, CommandLine line) throws IOException {
    if (line.hasOption("fs")) {
        FileSystem.setDefaultUri(conf, line.getOptionValue("fs"));
    }

    if (line.hasOption("jt")) {
        conf.set("mapred.job.tracker", line.getOptionValue("jt"));
    }
    if (line.hasOption("conf")) {
        String[] values = line.getOptionValues("conf");
        for (String value : values) {
            conf.addResource(new Path(value));
        }
    }
    if (line.hasOption("libjars")) {
        conf.set("tmpjars", validateFiles(line.getOptionValue("libjars"), conf));
        // setting libjars in client classpath
        URL[] libjars = getLibJars(conf);
        if (libjars != null && libjars.length > 0) {
            conf.setClassLoader(new URLClassLoader(libjars, conf.getClassLoader()));
            Thread.currentThread().setContextClassLoader(
                    new URLClassLoader(libjars, Thread.currentThread().getContextClassLoader()));
        }
    }
    if (line.hasOption("files")) {
        conf.set("tmpfiles", validateFiles(line.getOptionValue("files"), conf));
    }
    if (line.hasOption("archives")) {
        conf.set("tmparchives", validateFiles(line.getOptionValue("archives"), conf));
    }
    if (line.hasOption('D')) {
        String[] property = line.getOptionValues('D');
        for (String prop : property) {
            String[] keyval = prop.split("=", 2);
            if (keyval.length == 2) {
                conf.set(keyval[0], keyval[1]);
            }
        }
    }
    conf.setBoolean("mapred.used.genericoptionsparser", true);

    // tokensFile
    if (line.hasOption("tokenCacheFile")) {
        String fileName = line.getOptionValue("tokenCacheFile");
        // check if the local file exists
        try {
            FileSystem localFs = FileSystem.getLocal(conf);
            Path p = new Path(fileName);
            if (!localFs.exists(p)) {
                throw new FileNotFoundException("File " + fileName + " does not exist.");
            }

            LOG.debug("setting conf tokensFile: {}", fileName);
            conf.set("mapreduce.job.credentials.json", localFs.makeQualified(p).toString());
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }
}

From source file:ml.shifu.guagua.hadoop.ZooKeeperWorkerInterceptor.java

License:Apache License

@Override
public void preApplication(WorkerContext<MASTER_RESULT, WORKER_RESULT> context) {
    String zkServers = context.getProps().getProperty(GuaguaConstants.GUAGUA_ZK_SERVERS);
    if (zkServers == null || zkServers.length() == 0 || !ZooKeeperUtils.checkServers(zkServers)) {
        this.sleepTime = NumberFormatUtils.getLong(
                context.getProps().getProperty(GuaguaConstants.GUAGUA_COORDINATOR_SLEEP_UNIT), WAIT_SLOT_MILLS);
        this.isFixedTime = Boolean.TRUE.toString().equalsIgnoreCase(
                context.getProps().getProperty(GuaguaConstants.GUAGUA_COORDINATOR_FIXED_SLEEP_ENABLE,
                        GuaguaConstants.GUAGUA_COORDINATOR_FIXED_SLEEP));

        String hdfsZookeeperServerFolder = getZookeeperServerFolder(context);
        long start = System.nanoTime();
        while (true) {
            if (TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - start) > 10 * 60 * 1000L) {
                throw new GuaguaRuntimeException("Cannot get zookeeper server address in 10 minutes.");
            }/*from www . j a  v  a  2 s .com*/
            BufferedReader br = null;
            try {
                final FileSystem fileSystem = FileSystem.get(new Configuration());
                final Path zookeeperServerPath = fileSystem.makeQualified(new Path(hdfsZookeeperServerFolder,
                        GuaguaConstants.GUAGUA_CLUSTER_ZOOKEEPER_SERVER_FILE));
                LOG.info("Embeded zookeeper server address is {}", zookeeperServerPath);

                new RetryCoordinatorCommand(this.isFixedTime, this.sleepTime) {
                    @Override
                    public boolean retryExecution() throws Exception, InterruptedException {
                        return fileSystem.exists(zookeeperServerPath);
                    }
                }.execute();

                FSDataInputStream fis = fileSystem.open(zookeeperServerPath);
                br = new BufferedReader(new InputStreamReader(fis));
                String zookeeperServer = br.readLine();
                if (zookeeperServer == null || zookeeperServer.length() == 0) {
                    LOG.warn("Cannot get zookeeper server in {} ", zookeeperServerPath.toString());
                    // retry
                    continue;
                }
                // set server info to context for next intercepters.
                LOG.info("Embeded zookeeper instance is {}", zookeeperServer);
                context.getProps().setProperty(GuaguaConstants.GUAGUA_ZK_SERVERS, zookeeperServer);
                break;
            } catch (Throwable t) {
                LOG.warn(String.format("Error in get zookeeper address message: %s", t.getMessage()));
                continue;
            } finally {
                IOUtils.closeQuietly(br);
            }
        }
    }
}