Example usage for org.apache.hadoop.fs FileSystem copyFromLocalFile

List of usage examples for org.apache.hadoop.fs FileSystem copyFromLocalFile

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem copyFromLocalFile.

Prototype

public void copyFromLocalFile(Path src, Path dst) throws IOException 

Source Link

Document

The src file is on the local disk.

Usage

From source file:edu.uci.ics.pregelix.example.util.TestCluster.java

License:Apache License

private void startHDFS() throws IOException {
    conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/core-site.xml"));
    conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/mapred-site.xml"));
    conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/hdfs-site.xml"));
    FileSystem lfs = FileSystem.getLocal(new Configuration());
    lfs.delete(new Path("build"), true);
    System.setProperty("hadoop.log.dir", "logs");
    dfsCluster = new MiniDFSCluster(conf, numberOfNC, true, null);
    FileSystem dfs = FileSystem.get(conf);
    Path src = new Path(DATA_PATH);
    Path dest = new Path(HDFS_PATH);
    dfs.mkdirs(dest);/*from w w w  .ja va 2 s .  c  o m*/
    dfs.copyFromLocalFile(src, dest);

    src = new Path(DATA_PATH2);
    dest = new Path(HDFS_PATH2);
    dfs.mkdirs(dest);
    dfs.copyFromLocalFile(src, dest);

    src = new Path(DATA_PATH3);
    dest = new Path(HDFS_PATH3);
    dfs.mkdirs(dest);
    dfs.copyFromLocalFile(src, dest);

    src = new Path(DATA_PATH4);
    dest = new Path(HDFS_PATH4);
    dfs.mkdirs(dest);
    dfs.copyFromLocalFile(src, dest);

    src = new Path(DATA_PATH5);
    dest = new Path(HDFS_PATH5);
    dfs.mkdirs(dest);
    dfs.copyFromLocalFile(src, dest);

    DataOutputStream confOutput = new DataOutputStream(new FileOutputStream(new File(HADOOP_CONF_PATH)));
    conf.writeXml(confOutput);
    confOutput.flush();
    confOutput.close();
}

From source file:edu.umn.cs.spatialHadoop.core.SpatialSite.java

License:Open Source License

/**
 * Ensures that the given class is in the class path of running jobs.
 * If the jar is not already in the class path, it is added to the
 * DisributedCache of the given job to ensure the associated job will work
 * fine./*from  w w w  .  j a va  2 s.c o  m*/
 * @param conf
 * @param klass
 */
public static void addClassToPath(Configuration conf, Class<?> klass) {
    // Check if we need to add the containing jar to class path
    String klassJar = findContainingJar(klass);
    String shadoopJar = findContainingJar(SpatialSite.class);
    if (klassJar == null || (shadoopJar != null && klassJar.equals(shadoopJar)))
        return;
    Path containingJar = new Path(findContainingJar(klass));
    Path[] existingClassPaths = DistributedCache.getArchiveClassPaths(conf);
    if (existingClassPaths != null) {
        for (Path existingClassPath : existingClassPaths) {
            if (containingJar.getName().equals(existingClassPath.getName()))
                return;
        }
    }
    // The containing jar is a new one and needs to be copied to class path
    try {
        LOG.info("Adding JAR '" + containingJar.getName() + "' to job class path");
        FileSystem defaultFS = FileSystem.get(conf);
        Path libFolder;
        if (existingClassPaths != null && existingClassPaths.length > 0) {
            libFolder = existingClassPaths[0].getParent();
        } else {
            // First jar to be added like this. Create a new lib folder
            do {
                libFolder = new Path("lib_" + (int) (Math.random() * 100000));
            } while (defaultFS.exists(libFolder));
            defaultFS.mkdirs(libFolder);
            defaultFS.deleteOnExit(libFolder);
        }
        defaultFS.copyFromLocalFile(containingJar, libFolder);
        Path jarFullPath = new Path(libFolder, containingJar.getName()).makeQualified(defaultFS);
        jarFullPath = jarFullPath.makeQualified(defaultFS);
        DistributedCache.addArchiveToClassPath(jarFullPath, conf);
    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:eu.edisonproject.classification.tfidf.mapreduce.CompetencesDistanceDriver.java

License:Apache License

@Override
public int run(String[] args) {
    try {/*from   w  w  w  . ja  va  2 s.c  o m*/
        Configuration conf = HBaseConfiguration.create();
        //additional output using TextOutputFormat.
        conf.set("file.names", args[3]);

        Job job = Job.getInstance(conf);
        //TableMapReduceUtil.addDependencyJars(job); 
        job.setJarByClass(CompetencesDistanceDriver.class);
        //This row must be changed
        job.setJobName("Words Group By Title Driver");

        Path inPath = new Path(args[0]);
        Path outPath = new Path(args[1]);

        Path competencesPath = new Path(args[2]);
        Path competencesPathHDFS = competencesPath;
        FileSystem fs = FileSystem.get(conf);

        if (!conf.get(FileSystem.FS_DEFAULT_NAME_KEY).startsWith("file")) {
            competencesPathHDFS = new Path(competencesPath.getName());
            if (!fs.exists(competencesPathHDFS)) {
                fs.mkdirs(competencesPathHDFS);
                File[] stats = new File(competencesPath.toString()).listFiles();
                for (File stat : stats) {
                    Path filePath = new Path(stat.getAbsolutePath());
                    if (FilenameUtils.getExtension(filePath.getName()).endsWith("csv")) {
                        Path dest = new Path(competencesPathHDFS.toUri() + "/" + filePath.getName());
                        fs.copyFromLocalFile(filePath, dest);
                    }
                }
            }
        }
        job.addCacheFile(competencesPathHDFS.toUri());

        FileInputFormat.setInputPaths(job, inPath);

        FileOutputFormat.setOutputPath(job, outPath);
        fs.delete(outPath, true);

        job.setMapperClass(CompetencesDistanceMapper.class);

        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);

        job.setReducerClass(CompetencesDistanceReducer.class);
        //            job.setOutputFormatClass(TableOutputFormat.class);
        //            job.getConfiguration().set(TableOutputFormat.OUTPUT_TABLE, "jobpostcompetence");
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        String[] fileNames = args[3].split(",");
        for (String n : fileNames) {
            MultipleOutputs.addNamedOutput(job, n, TextOutputFormat.class, Text.class, Text.class);
        }

        return (job.waitForCompletion(true) ? 0 : 1);
    } catch (IOException | IllegalStateException | IllegalArgumentException | InterruptedException
            | ClassNotFoundException ex) {
        Logger.getLogger(CompetencesDistanceDriver.class.getName()).log(Level.SEVERE, null, ex);
    }
    return 0;
}

From source file:eu.edisonproject.classification.tfidf.mapreduce.TermWordFrequency.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration jobconf = getConf();
    Job job = Job.getInstance(jobconf);//  www.ja v  a2s . com
    FileSystem fs = FileSystem.get(jobconf);
    fs.delete(new Path(args[1]), true);
    Path dictionary = new Path(args[0]);
    Path dictionaryHdfs = dictionary;

    Path localDocs = new Path(args[2]);
    Path hdfsDocs = localDocs;

    Path stopwordsLocal = new Path(args[3]);
    Path stopwordsHDFS = stopwordsLocal;
    if (!jobconf.get(FileSystem.FS_DEFAULT_NAME_KEY).startsWith("file")) {
        dictionaryHdfs = new Path(dictionary.getName());
        if (!fs.exists(dictionaryHdfs)) {
            fs.copyFromLocalFile(dictionary, dictionaryHdfs);
        }
        hdfsDocs = new Path(localDocs.getName());
        fs.mkdirs(hdfsDocs);
        fs.deleteOnExit(hdfsDocs);

        File[] stats = new File(localDocs.toString()).listFiles();

        for (File stat : stats) {
            Path filePath = new Path(stat.getAbsolutePath());
            if (FilenameUtils.getExtension(filePath.getName()).endsWith("txt")) {
                Path dest = new Path(hdfsDocs.toUri() + "/" + filePath.getName());
                fs.copyFromLocalFile(filePath, dest);
            }
        }
        stopwordsHDFS = new Path(stopwordsLocal.getName());
        if (!fs.exists(stopwordsHDFS)) {
            fs.copyFromLocalFile(stopwordsLocal, stopwordsHDFS);
        }
    }

    FileStatus stopwordsStatus = fs.getFileStatus(stopwordsHDFS);
    stopwordsHDFS = stopwordsStatus.getPath();
    job.addCacheFile(stopwordsHDFS.toUri());

    job.addCacheFile(hdfsDocs.toUri());

    job.setJarByClass(TermWordFrequency.class);
    job.setJobName("Word Frequency Term Driver");

    FileInputFormat.setInputPaths(job, dictionaryHdfs);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    //        job.setInputFormatClass(TextInputFormat.class);
    job.setInputFormatClass(NLineInputFormat.class);
    NLineInputFormat.addInputPath(job, dictionaryHdfs);
    NLineInputFormat.setNumLinesPerSplit(job, Integer.valueOf(args[4]));
    NLineInputFormat.setMaxInputSplitSize(job, 500);

    job.setMapperClass(TermWordFrequencyMapper.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Integer.class);
    job.setReducerClass(TermWordFrequencyReducer.class);

    return (job.waitForCompletion(true) ? 0 : 1);

}

From source file:eu.edisonproject.classification.tfidf.mapreduce.WordFrequencyInDocDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    //        itemset = new LinkedList<String>();
    //        BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(args[2])));
    //        String line;
    //        while ((line = br.readLine()) != null) {
    //            String[] components = line.split("/");
    //            itemset.add(components[0]);
    //        }/*ww w .  j a  v a2  s  . com*/
    Configuration conf = getConf();

    Job job = Job.getInstance(conf);
    job.setJarByClass(WordFrequencyInDocDriver.class);
    job.setJobName("Word Frequency In Doc Driver");

    FileSystem fs = FileSystem.get(conf);
    fs.delete(new Path(args[1]), true);
    Path in = new Path(args[0]);
    Path inHdfs = in;

    Path dictionaryLocal = new Path(args[2]);
    Path dictionaryHDFS = dictionaryLocal;

    Path stopwordsLocal = new Path(args[3]);
    Path stopwordsHDFS = stopwordsLocal;

    if (!conf.get(FileSystem.FS_DEFAULT_NAME_KEY).startsWith("file")) {
        inHdfs = new Path(in.getName());
        fs.delete(inHdfs, true);
        fs.copyFromLocalFile(in, inHdfs);
        fs.deleteOnExit(inHdfs);

        dictionaryHDFS = new Path(dictionaryLocal.getName());
        if (!fs.exists(dictionaryHDFS)) {
            fs.copyFromLocalFile(dictionaryLocal, dictionaryHDFS);
        }
        stopwordsHDFS = new Path(stopwordsLocal.getName());
        if (!fs.exists(stopwordsHDFS)) {
            fs.copyFromLocalFile(stopwordsLocal, stopwordsHDFS);
        }
    }

    FileStatus dictionaryStatus = fs.getFileStatus(dictionaryHDFS);
    dictionaryHDFS = dictionaryStatus.getPath();
    job.addCacheFile(dictionaryHDFS.toUri());

    FileStatus stopwordsStatus = fs.getFileStatus(stopwordsHDFS);
    stopwordsHDFS = stopwordsStatus.getPath();
    job.addCacheFile(stopwordsHDFS.toUri());

    FileInputFormat.setInputPaths(job, inHdfs);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setInputFormatClass(AvroKeyInputFormat.class);
    job.setMapperClass(WordFrequencyInDocMapper.class);
    AvroJob.setInputKeySchema(job, Document.getClassSchema());
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Integer.class);
    job.setReducerClass(WordFrequencyInDocReducer.class);
    return (job.waitForCompletion(true) ? 0 : 1);
}

From source file:eu.edisonproject.training.tfidf.mapreduce.TermWordFrequency.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration jobconf = getConf();

    FileSystem fs = FileSystem.get(jobconf);
    fs.delete(new Path(args[1]), true);
    Path in = new Path(args[0]);
    Path inHdfs = in;/*w ww  . j  a  v a  2s .  c  om*/
    if (!jobconf.get(FileSystem.FS_DEFAULT_NAME_KEY).startsWith("file")) {
        inHdfs = new Path(in.getName());
        fs.delete(inHdfs, true);
        fs.copyFromLocalFile(in, inHdfs);
        fs.deleteOnExit(inHdfs);
        FileStatus inHdfsStatus = fs.getFileStatus(inHdfs);
        //            Logger.getLogger(TermWordFrequency.class.getName()).log(Level.INFO, "Copied: {0} to: {1}", new Object[]{in.toUri(), inHdfsStatus.getPath().toUri()});
    }

    Job job = Job.getInstance(jobconf);
    Path stopwordsLocal = new Path(args[3]);
    stopwords = new Path(stopwordsLocal.getName());
    fs.delete(stopwords, true);
    fs.copyFromLocalFile(stopwordsLocal, stopwords);
    fs.deleteOnExit(stopwords);

    FileStatus stopwordsStatus = fs.getFileStatus(stopwords);
    stopwords = stopwordsStatus.getPath();
    job.addCacheFile(stopwords.toUri());

    Path localDocs = new Path(args[2]);
    Path hdfsDocs = new Path(localDocs.getName());
    fs.mkdirs(hdfsDocs);
    hdfsDocs = fs.getFileStatus(hdfsDocs).getPath();
    fs.delete(hdfsDocs, true);
    //        FileStatus[] stats = fs.listStatus(localDocs);
    File[] stats = new File(localDocs.toString()).listFiles();

    for (File stat : stats) {
        //        for (FileStatus stat : stats) {
        Path filePath = new Path(stat.getAbsolutePath());
        if (FilenameUtils.getExtension(filePath.getName()).endsWith("txt")) {
            Path dest = new Path(hdfsDocs.toUri() + "/" + filePath.getName());
            fs.copyFromLocalFile(filePath, dest);
        }
    }

    job.addCacheFile(hdfsDocs.toUri());

    job.setJarByClass(TermWordFrequency.class);
    job.setJobName("Word Frequency Term Driver");

    FileInputFormat.setInputPaths(job, inHdfs);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    //         job.setInputFormatClass(TextInputFormat.class);
    job.setInputFormatClass(NLineInputFormat.class);
    NLineInputFormat.addInputPath(job, inHdfs);
    NLineInputFormat.setNumLinesPerSplit(job, Integer.valueOf(args[4]));
    NLineInputFormat.setMaxInputSplitSize(job, 500);
    Logger.getLogger(TermWordFrequency.class.getName()).log(Level.INFO, "Num. of lines: {0}",
            NLineInputFormat.getNumLinesPerSplit(job));

    job.setMapperClass(TermWordFrequencyMapper.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Integer.class);
    job.setReducerClass(TermWordFrequencyReducer.class);

    return (job.waitForCompletion(true) ? 0 : 1);

}

From source file:eu.larkc.iris.imports.Importer.java

License:Apache License

public void processNTriple(String inPath, String importName) throws IOException {
    //FIXME//ww w  .  j  a v  a2 s  .  co  m
    //configuration.jobConf.set("mapreduce.input.fileinputformat.split.minsize", "268435456");
    FileSystem fs = FileSystem.get(configuration.hadoopConfiguration);

    String fileName = configuration.project + "/" + DistributedFileSystemManager.TMP_FOLDER + "/"
            + inPath.substring(inPath.lastIndexOf("/") + 1);
    logger.info("copy start: " + inPath + " to hdfs : " + fileName);
    fs.copyFromLocalFile(new Path(inPath), new Path(fileName));
    logger.info("copy done : " + inPath + " to hdfs : " + fileName);

    //Tap source = new Lfs(new TextLine(), inPath);
    Tap source = new Hfs(new TextLine(), fileName);

    SequenceFile sinkScheme = new SequenceFile(new Fields(0, 1, 2));
    //sinkScheme.setNumSinkParts(1);
    String importPath = distributedFileSystemManager.getImportPath(importName);
    Tap sink = new Hfs(sinkScheme, importPath, true);

    int[] groups = { 2, 1, 3 };
    RegexParser parser = new RegexParser(Fields.UNKNOWN,
            "^(<[^\\s]+>|_:node\\w+)\\s*(<[^\\s]+>|_:node\\w+)\\s*([<\"].*[^\\s]|_:node\\w+)\\s*.\\s*$",
            groups); //_ is for generated nodes like _:node15n67q1f2x14
    Pipe sourcePipe = new Each("sourcePipe", new Fields("line"), parser);
    sourcePipe = new Each(sourcePipe, Fields.ALL, new TextImporterFunction());

    Flow aFlow = new FlowConnector(configuration.flowProperties).connect(source, sink, sourcePipe);
    aFlow.complete();

    fs.delete(new Path(fileName), false);

    if (configuration.doPredicateIndexing) {
        processIndexing(importName);

        //FileSystem fs = FileSystem.get(configuration.hadoopConfiguration);
        fs.delete(new Path(importPath), true);
    }
}

From source file:eu.qualimaster.dataManagement.storage.hdfs.HdfsUtils.java

License:Apache License

/**
 * Stores the data file to the HDFS (alternative) using the Dfs path as prefix.
 * //  w w  w  .  ja  va  2 s  .  c o  m
 * @return the target path if successful, <b>null</b> else
 * @throws IOException in case that I/O fails
 */
public static String storeToHdfs(File dataFile) throws IOException {
    String dataPath = null;
    if (!DataManagementConfiguration.isEmpty(DataManagementConfiguration.getHdfsUrl())) {
        String basePath = DataManagementConfiguration.getDfsPath() + "/";
        FileSystem fs = HdfsUtils.getFilesystem();
        Path target = new Path(basePath, dataFile.getName());
        fs.copyFromLocalFile(new Path(dataFile.getAbsolutePath()), target);
        dataPath = target.toString();
    }
    return dataPath;
}

From source file:eu.qualimaster.dataManagement.storage.hdfs.HdfsUtils.java

License:Apache License

/**
 * Copies <code>source</code> to <code>fs</code> and <code>basePath</code>.
 * // ww  w.j  a  v a 2 s  . c om
 * @param fs the file system
 * @param basePath the actual base path
 * @param source the source file/directory
 * @throws IOException in case that copying fails
 */
private static void copy(FileSystem fs, String basePath, File source) throws IOException {
    if (source.isDirectory()) {
        String bp = basePath + "/" + source.getName();
        fs.create(new Path(bp));
        File[] files = source.listFiles();
        if (null != files) {
            for (File f : files) {
                copy(fs, bp, f);
            }
        }
    } else {
        Path target = new Path(basePath, source.getName());
        fs.copyFromLocalFile(new Path(source.getAbsolutePath()), target);
    }
}

From source file:eu.stratosphere.yarn.Utils.java

License:Apache License

/**
 * //ww  w.  j  a v a 2  s  . co m
 * @return Path to remote file (usually hdfs)
 * @throws IOException
 */
public static Path setupLocalResource(Configuration conf, FileSystem fs, String appId, Path localRsrcPath,
        LocalResource appMasterJar, Path homedir) throws IOException {
    // copy to HDFS
    String suffix = ".stratosphere/" + appId + "/" + localRsrcPath.getName();

    Path dst = new Path(homedir, suffix);

    LOG.info("Copying from " + localRsrcPath + " to " + dst);
    fs.copyFromLocalFile(localRsrcPath, dst);
    registerLocalResource(fs, dst, appMasterJar);
    return dst;
}