Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.flink.yarn; import org.apache.flink.util.OperatingSystem; import org.apache.flink.util.TestLogger; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.LocalResource; import org.apache.hadoop.yarn.util.ConverterUtils; import org.junit.AfterClass; import org.junit.Assume; import org.junit.Before; import org.junit.BeforeClass; import org.junit.ClassRule; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; import java.io.DataOutputStream; import java.io.File; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.equalTo; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; /** * Tests for verifying file staging during submission to YARN works. */ public class YarnFileStageTest extends TestLogger { @ClassRule public static final TemporaryFolder CLASS_TEMP_DIR = new TemporaryFolder(); @Rule public TemporaryFolder tempFolder = new TemporaryFolder(); private static MiniDFSCluster hdfsCluster; private static Path hdfsRootPath; private org.apache.hadoop.conf.Configuration hadoopConfig; // ------------------------------------------------------------------------ // Test setup and shutdown // ------------------------------------------------------------------------ @BeforeClass public static void createHDFS() throws Exception { Assume.assumeTrue(!OperatingSystem.isWindows()); final File tempDir = CLASS_TEMP_DIR.newFolder(); org.apache.hadoop.conf.Configuration hdConf = new org.apache.hadoop.conf.Configuration(); hdConf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, tempDir.getAbsolutePath()); MiniDFSCluster.Builder builder = new MiniDFSCluster.Builder(hdConf); hdfsCluster = builder.build(); hdfsRootPath = new Path(hdfsCluster.getURI()); } @AfterClass public static void destroyHDFS() { if (hdfsCluster != null) { hdfsCluster.shutdown(); } hdfsCluster = null; hdfsRootPath = null; } @Before public void initConfig() { hadoopConfig = new org.apache.hadoop.conf.Configuration(); hadoopConfig.set(org.apache.hadoop.fs.FileSystem.FS_DEFAULT_NAME_KEY, hdfsRootPath.toString()); } /** * Verifies that nested directories are properly copied with a <tt>hdfs://</tt> file * system (from a <tt>file:///absolute/path</tt> source path). */ @Test public void testCopyFromLocalRecursiveWithScheme() throws Exception { final FileSystem targetFileSystem = hdfsRootPath.getFileSystem(hadoopConfig); final Path targetDir = targetFileSystem.getWorkingDirectory(); testCopyFromLocalRecursive(targetFileSystem, targetDir, tempFolder, true); } /** * Verifies that nested directories are properly copied with a <tt>hdfs://</tt> file * system (from a <tt>/absolute/path</tt> source path). */ @Test public void testCopyFromLocalRecursiveWithoutScheme() throws Exception { final FileSystem targetFileSystem = hdfsRootPath.getFileSystem(hadoopConfig); final Path targetDir = targetFileSystem.getWorkingDirectory(); testCopyFromLocalRecursive(targetFileSystem, targetDir, tempFolder, false); } /** * Verifies that nested directories are properly copied with the given filesystem and paths. * * @param targetFileSystem * file system of the target path * @param targetDir * target path (URI like <tt>hdfs://...</tt>) * @param tempFolder * JUnit temporary folder rule to create the source directory with * @param addSchemeToLocalPath * whether add the <tt>file://</tt> scheme to the local path to copy from */ static void testCopyFromLocalRecursive(FileSystem targetFileSystem, Path targetDir, TemporaryFolder tempFolder, boolean addSchemeToLocalPath) throws Exception { // directory must not yet exist assertFalse(targetFileSystem.exists(targetDir)); final File srcDir = tempFolder.newFolder(); final Path srcPath; if (addSchemeToLocalPath) { srcPath = new Path("file://" + srcDir.getAbsolutePath()); } else { srcPath = new Path(srcDir.getAbsolutePath()); } HashMap<String /* (relative) path */, /* contents */ String> srcFiles = new HashMap<>(4); // create and fill source files srcFiles.put("1", "Hello 1"); srcFiles.put("2", "Hello 2"); srcFiles.put("nested/3", "Hello nested/3"); srcFiles.put("nested/4/5", "Hello nested/4/5"); for (Map.Entry<String, String> src : srcFiles.entrySet()) { File file = new File(srcDir, src.getKey()); //noinspection ResultOfMethodCallIgnored file.getParentFile().mkdirs(); try (DataOutputStream out = new DataOutputStream(new FileOutputStream(file))) { out.writeUTF(src.getValue()); } } // copy the created directory recursively: try { List<Path> remotePaths = new ArrayList<>(); HashMap<String, LocalResource> localResources = new HashMap<>(); AbstractYarnClusterDescriptor.uploadAndRegisterFiles( Collections.singletonList(new File(srcPath.toUri().getPath())), targetFileSystem, targetDir, ApplicationId.newInstance(0, 0), remotePaths, localResources, new StringBuilder()); assertEquals(srcFiles.size(), localResources.size()); Path workDir = ConverterUtils .getPathFromYarnURL(localResources.get(srcPath.getName() + "/1").getResource()).getParent(); RemoteIterator<LocatedFileStatus> targetFilesIterator = targetFileSystem.listFiles(workDir, true); HashMap<String /* (relative) path */, /* contents */ String> targetFiles = new HashMap<>(4); final int workDirPrefixLength = workDir.toString().length() + 1; // one more for the concluding "/" while (targetFilesIterator.hasNext()) { LocatedFileStatus targetFile = targetFilesIterator.next(); int retries = 5; do { try (FSDataInputStream in = targetFileSystem.open(targetFile.getPath())) { String absolutePathString = targetFile.getPath().toString(); String relativePath = absolutePathString.substring(workDirPrefixLength); targetFiles.put(relativePath, in.readUTF()); assertEquals("extraneous data in file " + relativePath, -1, in.read()); break; } catch (FileNotFoundException e) { // For S3, read-after-write may be eventually consistent, i.e. when trying // to access the object before writing it; see // https://docs.aws.amazon.com/AmazonS3/latest/dev/Introduction.html#ConsistencyModel // -> try again a bit later Thread.sleep(50); } } while ((retries--) > 0); } assertThat(targetFiles, equalTo(srcFiles)); } finally { // clean up targetFileSystem.delete(targetDir, true); } } }