Example usage for org.apache.hadoop.fs FileUtil createLocalTempFile

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileUtil createLocalTempFile.

Prototype

public static final File createLocalTempFile(final File basefile, final String prefix,
        final boolean isDeleteOnExit) throws IOException

Source Link

Document

Create a tmp file for a base file.

Usage

From source file:com.kylinolap.common.util.SSHClientTest.java

License:Apache License

@Test
public void testScp() throws Exception {
    if (isRemote == false)
        return;//  ww w .  j av  a2 s  .  c om

    SSHClient ssh = new SSHClient(this.hostname, this.username, this.password, null);
    File tmpFile = FileUtil.createLocalTempFile(new File("/tmp/test_scp"), "temp_", false);
    ssh.scpFileToRemote(tmpFile.getAbsolutePath(), "/tmp");
}

From source file:org.apache.kylin.common.util.SSHClientTest.java

License:Apache License

@Test
public void testScp() throws Exception {
    if (isRemote == false)
        return;//ww w  . ja  va 2 s  .  co  m

    SSHClient ssh = new SSHClient(this.hostname, this.username, this.password);
    File tmpFile = FileUtil.createLocalTempFile(new File("/tmp/test_scp"), "temp_", false);
    ssh.scpFileToRemote(tmpFile.getAbsolutePath(), "/tmp");
}

From source file:org.apache.mahout.classifier.df.tools.UDistrib.java

License:Apache License

private static void runTool(String dataStr, String datasetStr, String output, int numPartitions)
        throws IOException {

    Preconditions.checkArgument(numPartitions > 0, "numPartitions <= 0");

    // make sure the output file does not exist
    Path outputPath = new Path(output);
    Configuration conf = new Configuration();
    FileSystem fs = outputPath.getFileSystem(conf);

    Preconditions.checkArgument(!fs.exists(outputPath), "Output path already exists");

    // create a new file corresponding to each partition
    // Path workingDir = fs.getWorkingDirectory();
    // FileSystem wfs = workingDir.getFileSystem(conf);
    // File parentFile = new File(workingDir.toString());
    // File tempFile = FileUtil.createLocalTempFile(parentFile, "Parts", true);
    // File tempFile = File.createTempFile("df.tools.UDistrib","");
    // tempFile.deleteOnExit();
    File tempFile = FileUtil.createLocalTempFile(new File(""), "df.tools.UDistrib", true);
    Path partsPath = new Path(tempFile.toString());
    FileSystem pfs = partsPath.getFileSystem(conf);

    Path[] partPaths = new Path[numPartitions];
    FSDataOutputStream[] files = new FSDataOutputStream[numPartitions];
    for (int p = 0; p < numPartitions; p++) {
        partPaths[p] = new Path(partsPath, String.format(Locale.ENGLISH, "part.%03d", p));
        files[p] = pfs.create(partPaths[p]);
    }/*from  w  ww .j  ava  2s .  c  o m*/

    Path datasetPath = new Path(datasetStr);
    Dataset dataset = Dataset.load(conf, datasetPath);

    // currents[label] = next partition file where to place the tuple
    int[] currents = new int[dataset.nblabels()];

    // currents is initialized randomly in the range [0, numpartitions[
    Random random = RandomUtils.getRandom();
    for (int c = 0; c < currents.length; c++) {
        currents[c] = random.nextInt(numPartitions);
    }

    // foreach tuple of the data
    Path dataPath = new Path(dataStr);
    FileSystem ifs = dataPath.getFileSystem(conf);
    FSDataInputStream input = ifs.open(dataPath);
    Scanner scanner = new Scanner(input, "UTF-8");
    DataConverter converter = new DataConverter(dataset);

    int id = 0;
    while (scanner.hasNextLine()) {
        if (id % 1000 == 0) {
            log.info("progress : {}", id);
        }

        String line = scanner.nextLine();
        if (line.isEmpty()) {
            continue; // skip empty lines
        }

        // write the tuple in files[tuple.label]
        Instance instance = converter.convert(line);
        int label = (int) dataset.getLabel(instance);
        files[currents[label]].writeBytes(line);
        files[currents[label]].writeChar('\n');

        // update currents
        currents[label]++;
        if (currents[label] == numPartitions) {
            currents[label] = 0;
        }
    }

    // close all the files.
    scanner.close();
    for (FSDataOutputStream file : files) {
        Closeables.close(file, false);
    }

    // merge all output files
    FileUtil.copyMerge(pfs, partsPath, fs, outputPath, true, conf, null);
    /*
     * FSDataOutputStream joined = fs.create(new Path(outputPath, "uniform.data")); for (int p = 0; p <
     * numPartitions; p++) {log.info("Joining part : {}", p); FSDataInputStream partStream =
     * fs.open(partPaths[p]);
     * 
     * IOUtils.copyBytes(partStream, joined, conf, false);
     * 
     * partStream.close(); }
     * 
     * joined.close();
     * 
     * fs.delete(partsPath, true);
     */
}

From source file:org.apache.mahout.df.tools.UDistrib.java

License:Apache License

private static void runTool(String dataStr, String datasetStr, String output, int numPartitions)
        throws IOException {

    Configuration conf = new Configuration();

    if (numPartitions <= 0) {
        throw new IllegalArgumentException("numPartitions <= 0");
    }/*  w  ww. j a  va  2 s  .c o  m*/

    // make sure the output file does not exist
    Path outputPath = new Path(output);
    FileSystem fs = outputPath.getFileSystem(conf);

    if (fs.exists(outputPath)) {
        throw new IllegalArgumentException("Output path already exists");
    }

    // create a new file corresponding to each partition
    // Path workingDir = fs.getWorkingDirectory();
    // FileSystem wfs = workingDir.getFileSystem(conf);
    // File parentFile = new File(workingDir.toString());
    // File tempFile = FileUtil.createLocalTempFile(parentFile, "Parts", true);
    // File tempFile = File.createTempFile("df.tools.UDistrib","");
    // tempFile.deleteOnExit();
    File tempFile = FileUtil.createLocalTempFile(new File(""), "df.tools.UDistrib", true);
    Path partsPath = new Path(tempFile.toString());
    FileSystem pfs = partsPath.getFileSystem(conf);

    Path[] partPaths = new Path[numPartitions];
    FSDataOutputStream[] files = new FSDataOutputStream[numPartitions];
    for (int p = 0; p < numPartitions; p++) {
        partPaths[p] = new Path(partsPath, String.format(Locale.ENGLISH, "part.%03d", p));
        files[p] = pfs.create(partPaths[p]);
    }

    Path datasetPath = new Path(datasetStr);
    Dataset dataset = Dataset.load(conf, datasetPath);

    // currents[label] = next partition file where to place the tuple
    int[] currents = new int[dataset.nblabels()];

    // currents is initialized randomly in the range [0, numpartitions[
    Random random = RandomUtils.getRandom();
    for (int c = 0; c < currents.length; c++) {
        currents[c] = random.nextInt(numPartitions);
    }

    // foreach tuple of the data
    Path dataPath = new Path(dataStr);
    FileSystem ifs = dataPath.getFileSystem(conf);
    FSDataInputStream input = ifs.open(dataPath);
    Scanner scanner = new Scanner(input);
    DataConverter converter = new DataConverter(dataset);
    int nbInstances = dataset.nbInstances();

    int id = 0;
    while (scanner.hasNextLine()) {
        if (id % 1000 == 0) {
            log.info("progress : {} / {}", id, nbInstances);
        }

        String line = scanner.nextLine();
        if (line.isEmpty()) {
            continue; // skip empty lines
        }

        // write the tuple in files[tuple.label]
        Instance instance = converter.convert(id++, line);
        int label = instance.getLabel();
        files[currents[label]].writeBytes(line);
        files[currents[label]].writeChar('\n');

        // update currents
        currents[label]++;
        if (currents[label] == numPartitions) {
            currents[label] = 0;
        }
    }

    // close all the files.
    scanner.close();
    for (FSDataOutputStream file : files) {
        file.close();
    }

    // merge all output files
    FileUtil.copyMerge(pfs, partsPath, fs, outputPath, true, conf, null);
    /*
     * FSDataOutputStream joined = fs.create(new Path(outputPath, "uniform.data")); for (int p = 0; p <
     * numPartitions; p++) {log.info("Joining part : {}", p); FSDataInputStream partStream =
     * fs.open(partPaths[p]);
     * 
     * IOUtils.copyBytes(partStream, joined, conf, false);
     * 
     * partStream.close(); }
     * 
     * joined.close();
     * 
     * fs.delete(partsPath, true);
     */
}

From source file:org.godhuli.rhipe.FileUtils.java

License:Apache License

public void copyToLocal(FileSystem srcFS, Path src, File dst) throws IOException {
    if (!srcFS.getFileStatus(src).isDir()) {
        File tmp = FileUtil.createLocalTempFile(dst.getAbsoluteFile(), COPYTOLOCAL_PREFIX, true);
        if (!FileUtil.copy(srcFS, src, tmp, false, srcFS.getConf())) {
            throw new IOException("Failed to copy " + src + " to " + dst);
        }//from   ww  w .j  a  va2  s.c  o m

        if (!tmp.renameTo(dst)) {
            throw new IOException(
                    "Failed to rename tmp file " + tmp + " to local destination \"" + dst + "\".");
        }
    } else {
        dst.mkdirs();
        for (FileStatus path : srcFS.listStatus(src)) {
            copyToLocal(srcFS, path.getPath(), new File(dst, path.getPath().getName()));
        }
    }
}

From source file:org.springframework.data.hadoop.fs.FsShell.java

License:Apache License

private void copyToLocal(final FileSystem srcFS, final Path src, final File dst, final boolean copyCrc)
        throws IOException {

    final String COPYTOLOCAL_PREFIX = "_copyToLocal_";

    /* Keep the structure similar to ChecksumFileSystem.copyToLocal(). 
    * Ideal these two should just invoke FileUtil.copy() and not repeat
    * recursion here. Of course, copy() should support two more options :
    * copyCrc and useTmpFile (may be useTmpFile need not be an option).
    *//*w  w  w . ja v a2  s  . co  m*/
    if (!srcFS.getFileStatus(src).isDir()) {
        if (dst.exists()) {
            // match the error message in FileUtil.checkDest():
            throw new IOException("Target " + dst + " already exists");
        }

        // use absolute name so that tmp file is always created under dest dir
        File tmp = FileUtil.createLocalTempFile(dst.getAbsoluteFile(), COPYTOLOCAL_PREFIX, true);
        if (!FileUtil.copy(srcFS, src, tmp, false, srcFS.getConf())) {
            throw new IOException("Failed to copy " + src + " to " + dst);
        }

        if (!tmp.renameTo(dst)) {
            throw new IOException(
                    "Failed to rename tmp file " + tmp + " to local destination \"" + dst + "\".");
        }

        if (copyCrc) {
            if (!(srcFS instanceof ChecksumFileSystem)) {
                throw new IOException("Source file system does not have crc files");
            }

            ChecksumFileSystem csfs = (ChecksumFileSystem) srcFS;
            File dstcs = FileSystem.getLocal(srcFS.getConf())
                    .pathToFile(csfs.getChecksumFile(new Path(dst.getCanonicalPath())));
            copyToLocal(csfs.getRawFileSystem(), csfs.getChecksumFile(src), dstcs, false);
        }
    } else {
        // once FileUtil.copy() supports tmp file, we don't need to mkdirs().
        dst.mkdirs();
        for (FileStatus path : srcFS.listStatus(src)) {
            copyToLocal(srcFS, path.getPath(), new File(dst, path.getPath().getName()), copyCrc);
        }
    }
}