List of usage examples for org.apache.hadoop.fs FileUtil createLocalTempFile
public static final File createLocalTempFile(final File basefile, final String prefix, final boolean isDeleteOnExit) throws IOException
From source file:com.kylinolap.common.util.SSHClientTest.java
License:Apache License
@Test public void testScp() throws Exception { if (isRemote == false) return;// ww w . j av a2 s . c om SSHClient ssh = new SSHClient(this.hostname, this.username, this.password, null); File tmpFile = FileUtil.createLocalTempFile(new File("/tmp/test_scp"), "temp_", false); ssh.scpFileToRemote(tmpFile.getAbsolutePath(), "/tmp"); }
From source file:org.apache.kylin.common.util.SSHClientTest.java
License:Apache License
@Test public void testScp() throws Exception { if (isRemote == false) return;//ww w . ja va 2 s . co m SSHClient ssh = new SSHClient(this.hostname, this.username, this.password); File tmpFile = FileUtil.createLocalTempFile(new File("/tmp/test_scp"), "temp_", false); ssh.scpFileToRemote(tmpFile.getAbsolutePath(), "/tmp"); }
From source file:org.apache.mahout.classifier.df.tools.UDistrib.java
License:Apache License
private static void runTool(String dataStr, String datasetStr, String output, int numPartitions) throws IOException { Preconditions.checkArgument(numPartitions > 0, "numPartitions <= 0"); // make sure the output file does not exist Path outputPath = new Path(output); Configuration conf = new Configuration(); FileSystem fs = outputPath.getFileSystem(conf); Preconditions.checkArgument(!fs.exists(outputPath), "Output path already exists"); // create a new file corresponding to each partition // Path workingDir = fs.getWorkingDirectory(); // FileSystem wfs = workingDir.getFileSystem(conf); // File parentFile = new File(workingDir.toString()); // File tempFile = FileUtil.createLocalTempFile(parentFile, "Parts", true); // File tempFile = File.createTempFile("df.tools.UDistrib",""); // tempFile.deleteOnExit(); File tempFile = FileUtil.createLocalTempFile(new File(""), "df.tools.UDistrib", true); Path partsPath = new Path(tempFile.toString()); FileSystem pfs = partsPath.getFileSystem(conf); Path[] partPaths = new Path[numPartitions]; FSDataOutputStream[] files = new FSDataOutputStream[numPartitions]; for (int p = 0; p < numPartitions; p++) { partPaths[p] = new Path(partsPath, String.format(Locale.ENGLISH, "part.%03d", p)); files[p] = pfs.create(partPaths[p]); }/*from w ww .j ava 2s . c o m*/ Path datasetPath = new Path(datasetStr); Dataset dataset = Dataset.load(conf, datasetPath); // currents[label] = next partition file where to place the tuple int[] currents = new int[dataset.nblabels()]; // currents is initialized randomly in the range [0, numpartitions[ Random random = RandomUtils.getRandom(); for (int c = 0; c < currents.length; c++) { currents[c] = random.nextInt(numPartitions); } // foreach tuple of the data Path dataPath = new Path(dataStr); FileSystem ifs = dataPath.getFileSystem(conf); FSDataInputStream input = ifs.open(dataPath); Scanner scanner = new Scanner(input, "UTF-8"); DataConverter converter = new DataConverter(dataset); int id = 0; while (scanner.hasNextLine()) { if (id % 1000 == 0) { log.info("progress : {}", id); } String line = scanner.nextLine(); if (line.isEmpty()) { continue; // skip empty lines } // write the tuple in files[tuple.label] Instance instance = converter.convert(line); int label = (int) dataset.getLabel(instance); files[currents[label]].writeBytes(line); files[currents[label]].writeChar('\n'); // update currents currents[label]++; if (currents[label] == numPartitions) { currents[label] = 0; } } // close all the files. scanner.close(); for (FSDataOutputStream file : files) { Closeables.close(file, false); } // merge all output files FileUtil.copyMerge(pfs, partsPath, fs, outputPath, true, conf, null); /* * FSDataOutputStream joined = fs.create(new Path(outputPath, "uniform.data")); for (int p = 0; p < * numPartitions; p++) {log.info("Joining part : {}", p); FSDataInputStream partStream = * fs.open(partPaths[p]); * * IOUtils.copyBytes(partStream, joined, conf, false); * * partStream.close(); } * * joined.close(); * * fs.delete(partsPath, true); */ }
From source file:org.apache.mahout.df.tools.UDistrib.java
License:Apache License
private static void runTool(String dataStr, String datasetStr, String output, int numPartitions) throws IOException { Configuration conf = new Configuration(); if (numPartitions <= 0) { throw new IllegalArgumentException("numPartitions <= 0"); }/* w ww. j a va 2 s .c o m*/ // make sure the output file does not exist Path outputPath = new Path(output); FileSystem fs = outputPath.getFileSystem(conf); if (fs.exists(outputPath)) { throw new IllegalArgumentException("Output path already exists"); } // create a new file corresponding to each partition // Path workingDir = fs.getWorkingDirectory(); // FileSystem wfs = workingDir.getFileSystem(conf); // File parentFile = new File(workingDir.toString()); // File tempFile = FileUtil.createLocalTempFile(parentFile, "Parts", true); // File tempFile = File.createTempFile("df.tools.UDistrib",""); // tempFile.deleteOnExit(); File tempFile = FileUtil.createLocalTempFile(new File(""), "df.tools.UDistrib", true); Path partsPath = new Path(tempFile.toString()); FileSystem pfs = partsPath.getFileSystem(conf); Path[] partPaths = new Path[numPartitions]; FSDataOutputStream[] files = new FSDataOutputStream[numPartitions]; for (int p = 0; p < numPartitions; p++) { partPaths[p] = new Path(partsPath, String.format(Locale.ENGLISH, "part.%03d", p)); files[p] = pfs.create(partPaths[p]); } Path datasetPath = new Path(datasetStr); Dataset dataset = Dataset.load(conf, datasetPath); // currents[label] = next partition file where to place the tuple int[] currents = new int[dataset.nblabels()]; // currents is initialized randomly in the range [0, numpartitions[ Random random = RandomUtils.getRandom(); for (int c = 0; c < currents.length; c++) { currents[c] = random.nextInt(numPartitions); } // foreach tuple of the data Path dataPath = new Path(dataStr); FileSystem ifs = dataPath.getFileSystem(conf); FSDataInputStream input = ifs.open(dataPath); Scanner scanner = new Scanner(input); DataConverter converter = new DataConverter(dataset); int nbInstances = dataset.nbInstances(); int id = 0; while (scanner.hasNextLine()) { if (id % 1000 == 0) { log.info("progress : {} / {}", id, nbInstances); } String line = scanner.nextLine(); if (line.isEmpty()) { continue; // skip empty lines } // write the tuple in files[tuple.label] Instance instance = converter.convert(id++, line); int label = instance.getLabel(); files[currents[label]].writeBytes(line); files[currents[label]].writeChar('\n'); // update currents currents[label]++; if (currents[label] == numPartitions) { currents[label] = 0; } } // close all the files. scanner.close(); for (FSDataOutputStream file : files) { file.close(); } // merge all output files FileUtil.copyMerge(pfs, partsPath, fs, outputPath, true, conf, null); /* * FSDataOutputStream joined = fs.create(new Path(outputPath, "uniform.data")); for (int p = 0; p < * numPartitions; p++) {log.info("Joining part : {}", p); FSDataInputStream partStream = * fs.open(partPaths[p]); * * IOUtils.copyBytes(partStream, joined, conf, false); * * partStream.close(); } * * joined.close(); * * fs.delete(partsPath, true); */ }
From source file:org.godhuli.rhipe.FileUtils.java
License:Apache License
public void copyToLocal(FileSystem srcFS, Path src, File dst) throws IOException { if (!srcFS.getFileStatus(src).isDir()) { File tmp = FileUtil.createLocalTempFile(dst.getAbsoluteFile(), COPYTOLOCAL_PREFIX, true); if (!FileUtil.copy(srcFS, src, tmp, false, srcFS.getConf())) { throw new IOException("Failed to copy " + src + " to " + dst); }//from ww w .j a va2 s.c o m if (!tmp.renameTo(dst)) { throw new IOException( "Failed to rename tmp file " + tmp + " to local destination \"" + dst + "\"."); } } else { dst.mkdirs(); for (FileStatus path : srcFS.listStatus(src)) { copyToLocal(srcFS, path.getPath(), new File(dst, path.getPath().getName())); } } }
From source file:org.springframework.data.hadoop.fs.FsShell.java
License:Apache License
private void copyToLocal(final FileSystem srcFS, final Path src, final File dst, final boolean copyCrc) throws IOException { final String COPYTOLOCAL_PREFIX = "_copyToLocal_"; /* Keep the structure similar to ChecksumFileSystem.copyToLocal(). * Ideal these two should just invoke FileUtil.copy() and not repeat * recursion here. Of course, copy() should support two more options : * copyCrc and useTmpFile (may be useTmpFile need not be an option). *//*w w w . ja v a2 s . co m*/ if (!srcFS.getFileStatus(src).isDir()) { if (dst.exists()) { // match the error message in FileUtil.checkDest(): throw new IOException("Target " + dst + " already exists"); } // use absolute name so that tmp file is always created under dest dir File tmp = FileUtil.createLocalTempFile(dst.getAbsoluteFile(), COPYTOLOCAL_PREFIX, true); if (!FileUtil.copy(srcFS, src, tmp, false, srcFS.getConf())) { throw new IOException("Failed to copy " + src + " to " + dst); } if (!tmp.renameTo(dst)) { throw new IOException( "Failed to rename tmp file " + tmp + " to local destination \"" + dst + "\"."); } if (copyCrc) { if (!(srcFS instanceof ChecksumFileSystem)) { throw new IOException("Source file system does not have crc files"); } ChecksumFileSystem csfs = (ChecksumFileSystem) srcFS; File dstcs = FileSystem.getLocal(srcFS.getConf()) .pathToFile(csfs.getChecksumFile(new Path(dst.getCanonicalPath()))); copyToLocal(csfs.getRawFileSystem(), csfs.getChecksumFile(src), dstcs, false); } } else { // once FileUtil.copy() supports tmp file, we don't need to mkdirs(). dst.mkdirs(); for (FileStatus path : srcFS.listStatus(src)) { copyToLocal(srcFS, path.getPath(), new File(dst, path.getPath().getName()), copyCrc); } } }