Example usage for org.apache.hadoop.fs FileSystem copyFromLocalFile

List of usage examples for org.apache.hadoop.fs FileSystem copyFromLocalFile

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem copyFromLocalFile.

Prototype

public void copyFromLocalFile(Path src, Path dst) throws IOException 

Source Link

Document

The src file is on the local disk.

Usage

From source file:org.apache.nutch.api.resources.SeedResource.java

License:Apache License

@POST
@Path("/create")
@Consumes(MediaType.APPLICATION_JSON)//from  w w w  .j  a  v  a  2s  .  co  m
/**
 * Method creates seed list file and returns temorary directory path
 * @param seedList
 * @return
 */
public String createSeedFile(SeedList seedList) {
    if (seedList == null) {
        throw new WebApplicationException(
                Response.status(Status.BAD_REQUEST).entity("Seed list cannot be empty!").build());
    }

    File seedFile = createSeedFile();
    BufferedWriter writer = getWriter(seedFile);

    Collection<SeedUrl> seedUrls = seedList.getSeedUrls();
    if (CollectionUtils.isNotEmpty(seedUrls)) {
        for (SeedUrl seedUrl : seedUrls) {
            writeUrl(writer, seedUrl);
        }
    }

    try {
        Configuration conf = configManager.get(ConfManager.DEFAULT);
        String fsName = conf.get("fs.default.name");
        LOG.info("fs.default.name : " + fsName);

        if (fsName.contains("hdfs")) {
            LOG.info("Running under hadoop distributed file system");

            FileSystem fs = FileSystem.get(conf);
            org.apache.hadoop.fs.Path path = new org.apache.hadoop.fs.Path(seedFile.getAbsolutePath());
            fs.copyFromLocalFile(path, path);
        } else {
            LOG.info("Running under local file system");
        }
    } catch (IOException e) {
        LOG.error(e.toString());
    }

    return seedFile.getParent();
}

From source file:org.apache.oozie.action.hadoop.TestLauncher.java

License:Apache License

private RunningJob _test(String... arg) throws Exception {
    Path actionDir = getFsTestCaseDir();

    File jar = IOUtils.createJar(new File(getTestCaseDir()), "launcher.jar", LauncherMapper.class,
            LauncherMainException.class, LauncherSecurityManager.class, LauncherException.class,
            LauncherMainTester.class);

    FileSystem fs = getFileSystem();

    Path launcherJar = new Path(actionDir, "launcher.jar");
    fs.copyFromLocalFile(new Path(jar.toString()), launcherJar);

    JobConf jobConf = Services.get().get(HadoopAccessorService.class)
            .createJobConf(new URI(getNameNodeUri()).getAuthority());
    //        jobConf.setJar(jar.getAbsolutePath());
    jobConf.set("user.name", getTestUser());
    jobConf.setInt("mapred.map.tasks", 1);
    jobConf.setInt("mapred.map.max.attempts", 1);
    jobConf.setInt("mapred.reduce.max.attempts", 1);

    jobConf.set("mapreduce.framework.name", "yarn");
    jobConf.set("mapred.job.tracker", getJobTrackerUri());
    jobConf.set("fs.default.name", getNameNodeUri());

    LauncherMapperHelper.setupMainClass(jobConf, LauncherMainTester.class.getName());
    LauncherMapperHelper.setupMainArguments(jobConf, arg);

    Configuration actionConf = new XConfiguration();
    LauncherMapperHelper.setupLauncherInfo(jobConf, "1", "1@a", actionDir, "1@a-0", actionConf, "");
    LauncherMapperHelper.setupYarnRestartHandling(jobConf, jobConf, "1@a", System.currentTimeMillis());

    assertEquals("1", actionConf.get("oozie.job.id"));
    assertEquals("1@a", actionConf.get("oozie.action.id"));

    DistributedCache.addFileToClassPath(new Path(launcherJar.toUri().getPath()), jobConf);

    JobClient jobClient = createJobClient();

    final RunningJob runningJob = jobClient.submitJob(jobConf);

    System.out.println("Action Dir: " + actionDir);
    System.out.println("LauncherMapper ID: " + runningJob.getJobID().toString());

    waitFor(180 * 1000, new Predicate() {
        public boolean evaluate() throws Exception {
            return runningJob.isComplete();
        }/*from  w w w .ja v  a2s.  c o  m*/
    });

    assertTrue(jobConf.get("oozie.action.prepare.xml").equals(""));
    return runningJob;

}

From source file:org.apache.oozie.service.ShareLibService.java

License:Apache License

/**
 * Copy jar containing classes./*from ww w  .  j a va2 s .c  om*/
 *
 * @param classes the classes
 * @param fs the FileSystem
 * @param executorDir is Path
 * @param type is sharelib key
 * @throws IOException Signals that an I/O exception has occurred.
 */
private void copyJarContainingClasses(List<Class> classes, FileSystem fs, Path executorDir, String type)
        throws IOException {
    fs.mkdirs(executorDir);
    Set<String> localJarSet = new HashSet<String>();
    for (Class c : classes) {
        String localJar = findContainingJar(c);
        if (localJar != null) {
            localJarSet.add(localJar);
        } else {
            throw new IOException("No jar containing " + c + " found");
        }
    }
    List<Path> listOfPaths = new ArrayList<Path>();
    for (String localJarStr : localJarSet) {
        File localJar = new File(localJarStr);
        fs.copyFromLocalFile(new Path(localJar.getPath()), executorDir);
        Path path = new Path(executorDir, localJar.getName());
        listOfPaths.add(path);
        LOG.info(localJar.getName() + " uploaded to " + executorDir.toString());
    }
    launcherLibMap.put(type, listOfPaths);

}

From source file:org.apache.oozie.tools.OozieSharelibCLI.java

License:Apache License

private List<Future<Void>> copyFolderRecursively(final FileSystem fs, final ExecutorService threadPool,
        File srcFile, final Path dstPath) throws IOException {
    List<Future<Void>> taskList = new ArrayList<Future<Void>>();
    for (final File file : srcFile.listFiles()) {
        final Path trgName = new Path(dstPath, file.getName());
        if (file.isDirectory()) {
            taskList.addAll(copyFolderRecursively(fs, threadPool, file, trgName));
        } else {//from   w  w w.ja v a2 s. co m
            taskList.add(threadPool.submit(new Callable<Void>() {
                @Override
                public Void call() throws Exception {
                    fs.copyFromLocalFile(new Path(file.toURI()), trgName);
                    return null;
                }
            }));
        }
    }
    return taskList;
}

From source file:org.apache.pig.backend.hadoop.executionengine.mapreduceExec.MapReduceLauncher.java

License:Apache License

private static void setupDistributedCache(PigContext pigContext, Configuration conf, Properties properties,
        String key, boolean shipToCluster) throws IOException {
    // Turn on the symlink feature
    DistributedCache.createSymlink(conf);

    // Set up the DistributedCache for this job        
    String fileNames = properties.getProperty(key);
    if (fileNames != null) {
        String[] paths = fileNames.split(",");

        for (String path : paths) {
            path = path.trim();//from www.  j  a v  a  2  s  .co m
            if (path.length() != 0) {
                Path src = new Path(path);

                // Ensure that 'src' is a valid URI
                URI srcURI = null;
                try {
                    srcURI = new URI(src.toString());
                } catch (URISyntaxException ue) {
                    throw new IOException("Invalid cache specification, " + "file doesn't exist: " + src);
                }

                // Ship it to the cluster if necessary and add to the
                // DistributedCache
                if (shipToCluster) {
                    Path dst = new Path(FileLocalizer.getTemporaryPath(null, pigContext).toString());
                    FileSystem fs = dst.getFileSystem(conf);
                    fs.copyFromLocalFile(src, dst);

                    // Construct the dst#srcName uri for DistributedCache
                    URI dstURI = null;
                    try {
                        dstURI = new URI(dst.toString() + "#" + src.getName());
                    } catch (URISyntaxException ue) {
                        throw new IOException("Invalid ship specification, " + "file doesn't exist: " + dst);
                    }
                    DistributedCache.addCacheFile(dstURI, conf);
                } else {
                    DistributedCache.addCacheFile(srcURI, conf);
                }
            }
        }
    }
}

From source file:org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler.java

License:Apache License

private static void setupDistributedCache(PigContext pigContext, Configuration conf, String[] paths,
        boolean shipToCluster) throws IOException {
    // Turn on the symlink feature
    DistributedCache.createSymlink(conf);

    for (String path : paths) {
        path = path.trim();/* w  w  w . ja  v a 2  s.  co  m*/
        if (path.length() != 0) {
            Path src = new Path(path);

            // Ensure that 'src' is a valid URI
            URI srcURI = toURI(src);

            // Ship it to the cluster if necessary and add to the
            // DistributedCache
            if (shipToCluster) {
                Path dst = new Path(FileLocalizer.getTemporaryPath(pigContext).toString());
                FileSystem fs = dst.getFileSystem(conf);
                fs.copyFromLocalFile(src, dst);
                fs.setReplication(dst, (short) conf.getInt(MRConfiguration.SUMIT_REPLICATION, 3));

                // Construct the dst#srcName uri for DistributedCache
                URI dstURI = null;
                try {
                    dstURI = new URI(dst.toString() + "#" + src.getName());
                } catch (URISyntaxException ue) {
                    byte errSrc = pigContext.getErrorSource();
                    int errCode = 0;
                    switch (errSrc) {
                    case PigException.REMOTE_ENVIRONMENT:
                        errCode = 6004;
                        break;
                    case PigException.USER_ENVIRONMENT:
                        errCode = 4004;
                        break;
                    default:
                        errCode = 2037;
                        break;
                    }
                    String msg = "Invalid ship specification. " + "File doesn't exist: " + dst;
                    throw new ExecException(msg, errCode, errSrc);
                }
                addToDistributedCache(dstURI, conf);
            } else {
                addToDistributedCache(srcURI, conf);
            }
        }
    }
}

From source file:org.apache.pig.piggybank.test.evaluation.string.TestLookupInFiles.java

License:Apache License

@Test
public void testLookupInFiles() throws Exception {
    File tmpFile = File.createTempFile("test", ".txt");
    PrintStream ps1 = new PrintStream(new FileOutputStream(tmpFile));

    ps1.println("one");
    ps1.println("notexist");
    ps1.println("three");
    ps1.close();//from   w ww .  j av  a  2 s .c o m

    File lookupFile1 = File.createTempFile("lookup", ".txt");
    PrintStream lps1 = new PrintStream(new FileOutputStream(lookupFile1));

    lps1.println("one");
    lps1.println("two");
    lps1.println("three");
    lps1.close();

    File lookupFile2 = File.createTempFile("lookup", "txt");
    PrintStream lps2 = new PrintStream(new FileOutputStream(lookupFile2));

    lps2.println("one");
    lps2.println("ten");
    lps2.println("eleven");
    lps2.close();

    FileSystem fs = FileSystem
            .get(ConfigurationUtil.toConfiguration(pigServer.getPigContext().getProperties()));
    fs.copyFromLocalFile(new Path(lookupFile1.toString()), new Path("lookup1"));
    fs.copyFromLocalFile(new Path(lookupFile1.toString()), new Path("lookup2"));
    pigServer.registerQuery("A = LOAD '" + Util.generateURI(tmpFile.toString(), pigServer.getPigContext())
            + "' AS (key:chararray);");
    pigServer.registerQuery(
            "B = FOREACH A GENERATE org.apache.pig.piggybank.evaluation.string.LookupInFiles(key, 'lookup1', 'lookup2');");
    Iterator<Tuple> iter = pigServer.openIterator("B");

    int r = (Integer) iter.next().get(0);
    assertTrue(r == 1);
    r = (Integer) iter.next().get(0);
    assertTrue(r == 0);
}

From source file:org.apache.pig.test.TestPigServer.java

License:Apache License

@Test
public void testRegisterRemoteScript() throws Throwable {
    String scriptName = "script.py";
    File scriptFile = File.createTempFile("tmp", "");
    PrintWriter pw = new PrintWriter(new FileWriter(scriptFile));
    pw.println("@outputSchema(\"word:chararray\")\ndef helloworld():\n    return 'Hello, World'");
    pw.close();/*from ww w .  ja  v a2  s. c  o m*/

    FileSystem fs = cluster.getFileSystem();
    fs.copyFromLocalFile(new Path(scriptFile.getAbsolutePath()), new Path(scriptName));

    // find the absolute path for the directory so that it does not
    // depend on configuration
    String absPath = fs.getFileStatus(new Path(scriptName)).getPath().toString();

    Util.createInputFile(cluster, "testRegisterRemoteScript_input", new String[] { "1", "2" });
    PigServer pig = new PigServer(cluster.getExecType(), properties);
    pig.registerCode(absPath, "jython", "pig");
    pig.registerQuery("a = load 'testRegisterRemoteScript_input';");
    pig.registerQuery("b = foreach a generate pig.helloworld($0);");
    Iterator<Tuple> iter = pig.openIterator("b");

    assertTrue(iter.hasNext());
    Tuple t = iter.next();
    assertTrue(t.size() > 0);
    assertEquals("Hello, World", t.get(0));

    assertTrue(iter.hasNext());
    t = iter.next();
    assertTrue(t.size() > 0);
    assertEquals("Hello, World", t.get(0));

    assertFalse(iter.hasNext());
}

From source file:org.apache.pig.test.TestPigServerWithMacros.java

License:Apache License

@Test
public void testRegisterRemoteMacro() throws Throwable {
    PigServer pig = new PigServer(cluster.getExecType(), cluster.getProperties());

    String macroName = "util.pig";
    File macroFile = File.createTempFile("tmp", "");
    PrintWriter pw = new PrintWriter(new FileWriter(macroFile));
    pw.println("DEFINE row_count(X) RETURNS Z { Y = group $X all; $Z = foreach Y generate COUNT($X); };");
    pw.close();//from   w ww. j av  a  2 s . co  m

    FileSystem fs = cluster.getFileSystem();
    fs.copyFromLocalFile(new Path(macroFile.getAbsolutePath()), new Path(macroName));

    // find the absolute path for the directory so that it does not
    // depend on configuration
    String absPath = fs.getFileStatus(new Path(macroName)).getPath().toString();

    Util.createInputFile(cluster, "testRegisterRemoteMacro_input", new String[] { "1", "2" });

    pig.registerQuery("import '" + absPath + "';");
    pig.registerQuery("a = load 'testRegisterRemoteMacro_input';");
    pig.registerQuery("b = row_count(a);");
    Iterator<Tuple> iter = pig.openIterator("b");

    assertEquals(2L, ((Long) iter.next().get(0)).longValue());

    pig.shutdown();
}

From source file:org.apache.reef.runtime.mesos.driver.REEFScheduler.java

License:Apache License

private String getReefTarUri(final String jobIdentifier) {
    try {//  w ww  .  j a v a2 s  .c om
        // Create REEF_TAR
        final FileOutputStream fileOutputStream = new FileOutputStream(REEF_TAR);
        final TarArchiveOutputStream tarArchiveOutputStream = new TarArchiveOutputStream(
                new GZIPOutputStream(fileOutputStream));
        final File globalFolder = new File(this.fileNames.getGlobalFolderPath());
        final DirectoryStream<Path> directoryStream = Files.newDirectoryStream(globalFolder.toPath());

        for (final Path path : directoryStream) {
            tarArchiveOutputStream.putArchiveEntry(
                    new TarArchiveEntry(path.toFile(), globalFolder + "/" + path.getFileName()));

            final BufferedInputStream bufferedInputStream = new BufferedInputStream(
                    new FileInputStream(path.toFile()));
            IOUtils.copy(bufferedInputStream, tarArchiveOutputStream);
            bufferedInputStream.close();

            tarArchiveOutputStream.closeArchiveEntry();
        }
        directoryStream.close();
        tarArchiveOutputStream.close();
        fileOutputStream.close();

        // Upload REEF_TAR to HDFS
        final FileSystem fileSystem = FileSystem.get(new Configuration());
        final org.apache.hadoop.fs.Path src = new org.apache.hadoop.fs.Path(REEF_TAR);
        final String reefTarUri = fileSystem.getUri().toString() + "/" + jobIdentifier + "/" + REEF_TAR;
        final org.apache.hadoop.fs.Path dst = new org.apache.hadoop.fs.Path(reefTarUri);
        fileSystem.copyFromLocalFile(src, dst);

        return reefTarUri;
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}