List of usage examples for org.apache.hadoop.fs FileSystem mkdirs
public boolean mkdirs(Path f) throws IOException
From source file:com.cloudera.lib.service.hadoop.TestHadoopService.java
License:Open Source License
@Test @TestDir// ww w .ja va 2 s. com @TestHadoop public void fileSystemExecutor() throws Exception { String dir = getTestDir().getAbsolutePath(); String services = StringUtils.toString( Arrays.asList(InstrumentationService.class.getName(), HadoopService.class.getName()), ","); XConfiguration conf = new XConfiguration(); conf.set("server.services", services); Server server = new Server("server", dir, dir, dir, dir, conf); server.init(); Hadoop hadoop = server.get(Hadoop.class); final FileSystem fsa[] = new FileSystem[1]; hadoop.execute("u", getHadoopConf(), new Hadoop.FileSystemExecutor<Void>() { @Override public Void execute(FileSystem fs) throws IOException { fs.mkdirs(new Path("/tmp/foo")); fsa[0] = fs; return null; } }); try { fsa[0].mkdirs(new Path("/tmp/foo")); Assert.fail(); } catch (IOException ex) { } catch (Exception ex) { Assert.fail(); } server.destroy(); }
From source file:com.cloudera.lib.service.hadoop.TestHadoopService.java
License:Open Source License
@Test @TestDir//from ww w .j av a 2 s .com @TestHadoop public void jobClientExecutor() throws Exception { String dir = getTestDir().getAbsolutePath(); String services = StringUtils.toString( Arrays.asList(InstrumentationService.class.getName(), HadoopService.class.getName()), ","); XConfiguration conf = new XConfiguration(); conf.set("server.services", services); Server server = new Server("server", dir, dir, dir, dir, conf); server.init(); Hadoop hadoop = server.get(Hadoop.class); final JobClient jca[] = new JobClient[1]; final FileSystem fsa[] = new FileSystem[1]; hadoop.execute("u", getHadoopConf(), new Hadoop.JobClientExecutor<Void>() { @Override public Void execute(JobClient jc, FileSystem fs) throws IOException { fs.mkdirs(new Path("/tmp/foo")); jc.getQueues(); jca[0] = jc; fsa[0] = fs; return null; } }); // NOT testing JobClient as the closed one still connects to the JobTracker successfully // try { // jca[0].submitJob(jobConf); // Assert.fail(); // } // catch (IOException ex) { // } // catch (Exception ex) { // Assert.fail(); // } try { fsa[0].mkdirs(new Path("/tmp/foo")); Assert.fail(); } catch (IOException ex) { } catch (Exception ex) { Assert.fail(); } server.destroy(); }
From source file:com.cloudera.oryx.ml.MLUpdate.java
License:Open Source License
@Override public void runUpdate(JavaSparkContext sparkContext, long timestamp, JavaPairRDD<Object, M> newKeyMessageData, JavaPairRDD<Object, M> pastKeyMessageData, String modelDirString, TopicProducer<String, String> modelUpdateTopic) throws IOException, InterruptedException { Objects.requireNonNull(newKeyMessageData); JavaRDD<M> newData = newKeyMessageData.values(); JavaRDD<M> pastData = pastKeyMessageData == null ? null : pastKeyMessageData.values(); if (newData != null) { newData.cache();//from w w w .j a v a2s . c o m // This forces caching of the RDD. This shouldn't be necessary but we see some freezes // when many workers try to materialize the RDDs at once. Hence the workaround. newData.foreachPartition(p -> { }); } if (pastData != null) { pastData.cache(); pastData.foreachPartition(p -> { }); } List<HyperParamValues<?>> hyperParamValues = getHyperParameterValues(); int valuesPerHyperParam = HyperParams.chooseValuesPerHyperParam(hyperParamValues.size(), candidates); List<List<?>> hyperParameterCombos = HyperParams.chooseHyperParameterCombos(hyperParamValues, candidates, valuesPerHyperParam); Path modelDir = new Path(modelDirString); Path tempModelPath = new Path(modelDir, ".temporary"); Path candidatesPath = new Path(tempModelPath, Long.toString(System.currentTimeMillis())); FileSystem fs = FileSystem.get(modelDir.toUri(), sparkContext.hadoopConfiguration()); fs.mkdirs(candidatesPath); Path bestCandidatePath = findBestCandidatePath(sparkContext, newData, pastData, hyperParameterCombos, candidatesPath); Path finalPath = new Path(modelDir, Long.toString(System.currentTimeMillis())); if (bestCandidatePath == null) { log.info("Unable to build any model"); } else { // Move best model into place fs.rename(bestCandidatePath, finalPath); } // Then delete everything else fs.delete(candidatesPath, true); if (modelUpdateTopic == null) { log.info("No update topic configured, not publishing models to a topic"); } else { // Push PMML model onto update topic, if it exists Path bestModelPath = new Path(finalPath, MODEL_FILE_NAME); if (fs.exists(bestModelPath)) { FileStatus bestModelPathFS = fs.getFileStatus(bestModelPath); PMML bestModel = null; boolean modelNeededForUpdates = canPublishAdditionalModelData(); boolean modelNotTooLarge = bestModelPathFS.getLen() <= maxMessageSize; if (modelNeededForUpdates || modelNotTooLarge) { // Either the model is required for publishAdditionalModelData, or required because it's going to // be serialized to Kafka try (InputStream in = fs.open(bestModelPath)) { bestModel = PMMLUtils.read(in); } } if (modelNotTooLarge) { modelUpdateTopic.send("MODEL", PMMLUtils.toString(bestModel)); } else { modelUpdateTopic.send("MODEL-REF", fs.makeQualified(bestModelPath).toString()); } if (modelNeededForUpdates) { publishAdditionalModelData(sparkContext, bestModel, newData, pastData, finalPath, modelUpdateTopic); } } } if (newData != null) { newData.unpersist(); } if (pastData != null) { pastData.unpersist(); } }
From source file:com.cloudera.oryx.ml.MLUpdate.java
License:Open Source License
private Pair<Path, Double> buildAndEval(int i, List<List<?>> hyperParameterCombos, JavaSparkContext sparkContext, JavaRDD<M> newData, JavaRDD<M> pastData, Path candidatesPath) { // % = cycle through combinations if needed List<?> hyperParameters = hyperParameterCombos.get(i % hyperParameterCombos.size()); Path candidatePath = new Path(candidatesPath, Integer.toString(i)); log.info("Building candidate {} with params {}", i, hyperParameters); Pair<JavaRDD<M>, JavaRDD<M>> trainTestData = splitTrainTest(newData, pastData); JavaRDD<M> allTrainData = trainTestData.getFirst(); JavaRDD<M> testData = trainTestData.getSecond(); Double eval = Double.NaN; if (empty(allTrainData)) { log.info("No train data to build a model"); } else {//from ww w. jav a 2 s.c o m PMML model = buildModel(sparkContext, allTrainData, hyperParameters, candidatePath); if (model == null) { log.info("Unable to build a model"); } else { Path modelPath = new Path(candidatePath, MODEL_FILE_NAME); log.info("Writing model to {}", modelPath); try { FileSystem fs = FileSystem.get(candidatePath.toUri(), sparkContext.hadoopConfiguration()); fs.mkdirs(candidatePath); try (OutputStream out = fs.create(modelPath)) { PMMLUtils.write(model, out); } } catch (IOException ioe) { throw new IllegalStateException(ioe); } if (empty(testData)) { log.info("No test data available to evaluate model"); } else { log.info("Evaluating model"); eval = evaluate(sparkContext, model, candidatePath, testData, allTrainData); } } } log.info("Model eval for params {}: {} ({})", hyperParameters, eval, candidatePath); return new Pair<>(candidatePath, eval); }
From source file:com.cloudera.RenameTest.java
License:Apache License
public static void testFileSystemRename(URI uri) throws Exception { FileSystem fs = FileSystem.get(uri, new Configuration()); Path testDir = new Path(new Path(uri), "testdir"); System.out.println("mkdir " + testDir); fs.mkdirs(testDir); Path testFile = new Path(new Path(uri), "testfile"); System.out.println("create " + testFile); FSDataOutputStream fos = fs.create(testFile); fos.close();//from ww w . j a v a2s . c om System.out.println("rename " + testFile + " -> " + testDir); fs.rename(testFile, testDir); }
From source file:com.cloudera.sqoop.io.TestSplittableBufferedWriter.java
License:Apache License
/** Create the directory where we'll write our test files to; and * make sure it has no files in it./* w w w . j a v a 2 s. c o m*/ */ private void ensureEmptyWriteDir() throws IOException { FileSystem fs = FileSystem.getLocal(getConf()); Path writeDir = getWritePath(); fs.mkdirs(writeDir); FileStatus[] stats = fs.listStatus(writeDir); for (FileStatus stat : stats) { if (stat.isDir()) { fail("setUp(): Write directory " + writeDir + " contains subdirectories"); } LOG.debug("setUp(): Removing " + stat.getPath()); if (!fs.delete(stat.getPath(), false)) { fail("setUp(): Could not delete residual file " + stat.getPath()); } } if (!fs.exists(writeDir)) { fail("setUp: Could not create " + writeDir); } }
From source file:com.cloudera.sqoop.lib.TestBlobRef.java
License:Apache License
private void doExternalTest(final byte[] data, final String filename) throws IOException { Configuration conf = new Configuration(); if (!BaseSqoopTestCase.isOnPhysicalCluster()) { conf.set(CommonArgs.FS_DEFAULT_NAME, CommonArgs.LOCAL_FS); }//from w w w. j a va 2 s . com FileSystem fs = FileSystem.get(conf); String tmpDir = System.getProperty("test.build.data", "/tmp/"); Path tmpPath = new Path(tmpDir); Path blobFile = new Path(tmpPath, filename); // make any necessary parent dirs. Path blobParent = blobFile.getParent(); if (!fs.exists(blobParent)) { fs.mkdirs(blobParent); } LobFile.Writer lw = LobFile.create(blobFile, conf, false); try { long off = lw.tell(); long len = data.length; OutputStream os = lw.writeBlobRecord(len); os.write(data, 0, data.length); os.close(); lw.close(); String refString = "externalLob(lf," + filename + "," + off + "," + len + ")"; BlobRef blob = BlobRef.parse(refString); assertTrue(blob.isExternal()); assertEquals(refString, blob.toString()); InputStream is = blob.getDataStream(conf, tmpPath); assertNotNull(is); byte[] buf = new byte[4096]; int bytes = is.read(buf, 0, 4096); is.close(); assertEquals(data.length, bytes); for (int i = 0; i < bytes; i++) { assertEquals(data[i], buf[i]); } } finally { fs.delete(blobFile, false); } }
From source file:com.cloudera.sqoop.lib.TestClobRef.java
License:Apache License
private void doExternalTest(final String data, final String filename) throws IOException { Configuration conf = new Configuration(); if (!BaseSqoopTestCase.isOnPhysicalCluster()) { conf.set(CommonArgs.FS_DEFAULT_NAME, CommonArgs.LOCAL_FS); }/*w w w. j av a2 s.co m*/ FileSystem fs = FileSystem.get(conf); String tmpDir = System.getProperty("test.build.data", "/tmp/"); Path tmpPath = new Path(tmpDir); Path clobFile = new Path(tmpPath, filename); // make any necessary parent dirs. Path clobParent = clobFile.getParent(); if (!fs.exists(clobParent)) { fs.mkdirs(clobParent); } LobFile.Writer lw = LobFile.create(clobFile, conf, true); try { long off = lw.tell(); long len = data.length(); Writer w = lw.writeClobRecord(len); w.append(data); w.close(); lw.close(); String refString = "externalLob(lf," + filename + "," + off + "," + len + ")"; ClobRef clob = ClobRef.parse(refString); assertTrue(clob.isExternal()); assertEquals(refString, clob.toString()); Reader r = clob.getDataStream(conf, tmpPath); assertNotNull(r); char[] buf = new char[4096]; int chars = r.read(buf, 0, 4096); r.close(); String str = new String(buf, 0, chars); assertEquals(data, str); } finally { fs.delete(clobFile, false); } }
From source file:com.cloudera.sqoop.lib.TestLargeObjectLoader.java
License:Apache License
public void setUp() throws IOException, InterruptedException { conf = new Configuration(); if (!BaseSqoopTestCase.isOnPhysicalCluster()) { conf.set(CommonArgs.FS_DEFAULT_NAME, CommonArgs.LOCAL_FS); }/* ww w .j a v a 2s .com*/ String tmpDir = System.getProperty("test.build.data", "/tmp/"); this.outDir = new Path(new Path(tmpDir), "testLobLoader"); FileSystem fs = FileSystem.get(conf); if (fs.exists(outDir)) { fs.delete(outDir, true); } fs.mkdirs(outDir); loader = new LargeObjectLoader(conf, outDir); }
From source file:com.cloudera.sqoop.manager.DirectMySQLExportTest.java
License:Apache License
/** * Test an authenticated export using mysqlimport. *//*from www . j a v a2 s . c om*/ public void testAuthExport() throws IOException, SQLException { SqoopOptions options = new SqoopOptions(MySQLAuthTest.AUTH_CONNECT_STRING, getTableName()); options.setUsername(MySQLAuthTest.AUTH_TEST_USER); options.setPassword(MySQLAuthTest.AUTH_TEST_PASS); manager = new DirectMySQLManager(options); Connection connection = null; Statement st = null; String tableName = getTableName(); try { connection = manager.getConnection(); connection.setAutoCommit(false); st = connection.createStatement(); // create a target database table. st.executeUpdate("DROP TABLE IF EXISTS " + tableName); st.executeUpdate("CREATE TABLE " + tableName + " (" + "id INT NOT NULL PRIMARY KEY, " + "msg VARCHAR(24) NOT NULL)"); connection.commit(); // Write a file containing a record to export. Path tablePath = getTablePath(); Path filePath = new Path(tablePath, "datafile"); Configuration conf = new Configuration(); conf.set("fs.default.name", "file:///"); FileSystem fs = FileSystem.get(conf); fs.mkdirs(tablePath); OutputStream os = fs.create(filePath); BufferedWriter w = new BufferedWriter(new OutputStreamWriter(os)); w.write(getRecordLine(0)); w.write(getRecordLine(1)); w.write(getRecordLine(2)); w.close(); os.close(); // run the export and verify that the results are good. runExport(getArgv(true, 10, 10, "--username", MySQLAuthTest.AUTH_TEST_USER, "--password", MySQLAuthTest.AUTH_TEST_PASS, "--connect", MySQLAuthTest.AUTH_CONNECT_STRING)); verifyExport(3, connection); } catch (SQLException sqlE) { LOG.error("Encountered SQL Exception: " + sqlE); sqlE.printStackTrace(); fail("SQLException when accessing target table. " + sqlE); } finally { try { if (null != st) { st.close(); } if (null != connection) { connection.close(); } } catch (SQLException sqlE) { LOG.warn("Got SQLException when closing connection: " + sqlE); } } }