List of usage examples for org.apache.hadoop.fs FileSystem getLocal
public static LocalFileSystem getLocal(Configuration conf) throws IOException
From source file:org.apache.apex.malhar.lib.io.fs.AbstractFileInputOperatorTest.java
License:Apache License
@Test public void testPartitioning() throws Exception { LineByLineFileInputOperator oper = new LineByLineFileInputOperator(); oper.getScanner().setFilePatternRegexp(".*partition([\\d]*)"); oper.setDirectory(new File(testMeta.dir).getAbsolutePath()); Path path = new Path(new File(testMeta.dir).getAbsolutePath()); FileContext.getLocalFSFileContext().delete(path, true); for (int file = 0; file < 4; file++) { FileUtils.write(new File(testMeta.dir, "partition00" + file), ""); }// w ww.j a v a2s. co m List<Partition<AbstractFileInputOperator<String>>> partitions = Lists.newArrayList(); partitions.add(new DefaultPartition<AbstractFileInputOperator<String>>(oper)); Collection<Partition<AbstractFileInputOperator<String>>> newPartitions = oper.definePartitions(partitions, new PartitioningContextImpl(null, 2)); Assert.assertEquals(2, newPartitions.size()); Assert.assertEquals(1, oper.getCurrentPartitions()); // partitioned() wasn't called for (Partition<AbstractFileInputOperator<String>> p : newPartitions) { Assert.assertNotSame(oper, p.getPartitionedInstance()); Assert.assertNotSame(oper.getScanner(), p.getPartitionedInstance().getScanner()); Set<String> consumed = Sets.newHashSet(); LinkedHashSet<Path> files = p.getPartitionedInstance().getScanner() .scan(FileSystem.getLocal(new Configuration(false)), path, consumed); Assert.assertEquals("partition " + files, 3, files.size()); } }
From source file:org.apache.apex.malhar.lib.io.fs.AbstractFileInputOperatorTest.java
License:Apache License
/** * Partition the operator in 2// w w w.ja v a2 s. c om * create ten files with index of the file at the start, i.e 1_file, 2_file .. etc. * The scanner returns this index from getPartition method. * each partition should read 5 files as file index are from 0 to 9 (including 0 and 9). * @throws Exception */ @Test public void testWithCustomScanner() throws Exception { LineByLineFileInputOperator oper = new LineByLineFileInputOperator(); oper.setScanner(new MyScanner()); oper.getScanner().setFilePatternRegexp(".*partition_([\\d]*)"); oper.setDirectory(new File(testMeta.dir).getAbsolutePath()); Random rand = new Random(); Path path = new Path(new File(testMeta.dir).getAbsolutePath()); FileContext.getLocalFSFileContext().delete(path, true); for (int file = 0; file < 10; file++) { FileUtils.write(new File(testMeta.dir, file + "_partition_00" + rand.nextInt(100)), ""); } List<Partition<AbstractFileInputOperator<String>>> partitions = Lists.newArrayList(); partitions.add(new DefaultPartition<AbstractFileInputOperator<String>>(oper)); Collection<Partition<AbstractFileInputOperator<String>>> newPartitions = oper.definePartitions(partitions, new PartitioningContextImpl(null, 2)); Assert.assertEquals(2, newPartitions.size()); Assert.assertEquals(1, oper.getCurrentPartitions()); // partitioned() wasn't called for (Partition<AbstractFileInputOperator<String>> p : newPartitions) { Assert.assertNotSame(oper, p.getPartitionedInstance()); Assert.assertNotSame(oper.getScanner(), p.getPartitionedInstance().getScanner()); Set<String> consumed = Sets.newHashSet(); LinkedHashSet<Path> files = p.getPartitionedInstance().getScanner() .scan(FileSystem.getLocal(new Configuration(false)), path, consumed); Assert.assertEquals("partition " + files, 6, files.size()); } }
From source file:org.apache.avro.mapred.TestAvroInputFormat.java
License:Apache License
@Before public void setUp() throws Exception { conf = new JobConf(); fs = FileSystem.getLocal(conf); inputDir = new Path(TEST_DIR); }
From source file:org.apache.beam.sdk.extensions.sorter.ExternalSorter.java
License:Apache License
/** * Initializes the hadoop sorter. Does some local file system setup, and is somewhat expensive * (~20 ms on local machine). Only executed when necessary. *///from www. j a v a2 s. co m private void initHadoopSorter() throws IOException { if (!initialized) { tempDir = new Path(options.getTempLocation(), "tmp" + UUID.randomUUID().toString()); paths = new Path[] { new Path(tempDir, "test.seq") }; JobConf conf = new JobConf(); // Sets directory for intermediate files created during merge of merge sort conf.set("io.seqfile.local.dir", tempDir.toUri().getPath()); writer = SequenceFile.createWriter(conf, Writer.valueClass(BytesWritable.class), Writer.keyClass(BytesWritable.class), Writer.file(paths[0]), Writer.compression(CompressionType.NONE)); FileSystem fs = FileSystem.getLocal(conf); // Directory has to exist for Hadoop to recognize it as deletable on exit fs.mkdirs(tempDir); fs.deleteOnExit(tempDir); sorter = new SequenceFile.Sorter(fs, new BytesWritable.Comparator(), BytesWritable.class, BytesWritable.class, conf); sorter.setMemory(options.getMemoryMB() * 1024 * 1024); initialized = true; } }
From source file:org.apache.bigtop.bigpetstore.generator.TestPetStoreTransactionGeneratorJob.java
License:Apache License
@Test public void test() throws Exception { System.out.println("memory : " + Runtime.getRuntime().freeMemory() / 1000000); if (Runtime.getRuntime().freeMemory() / 1000000 < 75) { // throw new // RuntimeException("need more memory to run this test !"); }/*from w w w . j a v a 2 s .c om*/ int records = 20; /** * Setup configuration with prop. */ Configuration c = new Configuration(); c.setInt(props.bigpetstore_records.name(), records); /** * Run the job */ Path output = new Path("petstoredata/" + (new Date()).toString()); Job createInput = BPSGenerator.getCreateTransactionRecordsJob(output, c); createInput.submit(); System.out.println(createInput); createInput.waitForCompletion(true); FileSystem fs = FileSystem.getLocal(new Configuration()); /** * Read file output into string. */ DataInputStream f = fs.open(new Path(output, "part-r-00000")); BufferedReader br = new BufferedReader(new InputStreamReader(f)); String s; int recordsSeen = 0; boolean CTseen = false; boolean AZseen = false; // confirm that both CT and AZ are seen in the outputs. while (br.ready()) { s = br.readLine(); System.out.println("===>" + s); recordsSeen++; if (s.contains(State.CT.name())) { CTseen = true; } if (s.contains(State.AZ.name())) { AZseen = true; } } // records seen should = 20 assertEquals(records, recordsSeen); // Assert that a couple of the states are seen (todo make it // comprehensive for all states). assertTrue(CTseen); assertTrue(AZseen); log.info("Created " + records + " , file was " + fs.getFileStatus(new Path(output, "part-r-00000")).getLen() + " bytes."); }
From source file:org.apache.bigtop.bigpetstore.ITUtils.java
License:Apache License
/** * Creates a generated input data set in * * test_data_directory/generated. i.e.// w w w. j ava 2s . co m * test_data_directory/generated/part-r-00000 */ public static void setup() throws Throwable { Configuration conf = new Configuration(); // debugging for Jeff and others in local fs that won't build checkConf(conf); conf.setInt(BPSGenerator.props.bigpetstore_records.name(), BPSGenerator.DEFAULT_NUM_RECORDS); if (FileSystem.getLocal(conf).exists(BPS_TEST_GENERATED)) { return; } Job createInput = BPSGenerator.getCreateTransactionRecordsJob(BPS_TEST_GENERATED, conf); createInput.waitForCompletion(true); Path outputfile = new Path(BPS_TEST_GENERATED, "part-r-00000"); List<String> lines = Files.readLines(FileSystem.getLocal(conf).pathToFile(outputfile), Charset.defaultCharset()); log.info("output : " + FileSystem.getLocal(conf).pathToFile(outputfile)); for (String l : lines) { System.out.println(l); } }
From source file:org.apache.bigtop.bigpetstore.ITUtils.java
License:Apache License
public static void assertOutput(Path base, Predicate<String> validator) throws Exception { FileSystem fs = FileSystem.getLocal(new Configuration()); FileStatus[] files = fs.listStatus(base); // print out all the files. for (FileStatus stat : files) { System.out.println(stat.getPath() + " " + stat.getLen()); }//from w w w . j ava2 s . c o m /** * Support map OR reduce outputs */ Path partm = new Path(base, "part-m-00000"); Path partr = new Path(base, "part-r-00000"); Path p = fs.exists(partm) ? partm : partr; /** * Now we read through the file and validate its contents. */ BufferedReader r = new BufferedReader(new InputStreamReader(fs.open(p))); // line:{"product":"big chew toy","count":3} while (r.ready()) { String line = r.readLine(); log.info("line:" + line); // System.out.println("line:"+line); Assert.assertTrue("validationg line : " + line, validator.apply(line)); } }
From source file:org.apache.blur.console.RunMiniCluster.java
License:Apache License
public static void main(String[] args) throws IOException { // GCWatcher.init(0.60); LocalFileSystem localFS = FileSystem.getLocal(new Configuration()); File testDirectory = new File(TMPDIR, "blur-cluster-test").getAbsoluteFile(); testDirectory.mkdirs();/* ww w. j a va 2s . c o m*/ Path directory = new Path(testDirectory.getPath()); FsPermission dirPermissions = localFS.getFileStatus(directory).getPermission(); FsAction userAction = dirPermissions.getUserAction(); FsAction groupAction = dirPermissions.getGroupAction(); FsAction otherAction = dirPermissions.getOtherAction(); StringBuilder builder = new StringBuilder(); builder.append(userAction.ordinal()); builder.append(groupAction.ordinal()); builder.append(otherAction.ordinal()); String dirPermissionNum = builder.toString(); System.setProperty("dfs.datanode.data.dir.perm", dirPermissionNum); testDirectory.delete(); MiniCluster miniCluster = new MiniCluster(); miniCluster.startBlurCluster(new File(testDirectory, "cluster").getAbsolutePath(), 2, 3, true, false); System.out.println("ZK Connection String = [" + miniCluster.getZkConnectionString() + "]"); System.out.println("Controller Connection String = [" + miniCluster.getControllerConnectionStr() + "]"); System.out.println("HDFS URI = [" + miniCluster.getFileSystemUri() + "]"); }
From source file:org.apache.blur.hive.BlurSerDeTest.java
License:Apache License
@BeforeClass public static void startCluster() throws IOException { System.setProperty("hadoop.log.dir", "./target/tmp_BlurSerDeTest_hadoop_log"); GCWatcher.init(0.60);// w w w . j a va2 s.co m LocalFileSystem localFS = FileSystem.getLocal(new Configuration()); File testDirectory = new File(TMPDIR, "blur-SerDe-test").getAbsoluteFile(); testDirectory.mkdirs(); Path directory = new Path(testDirectory.getPath()); FsPermission dirPermissions = localFS.getFileStatus(directory).getPermission(); FsAction userAction = dirPermissions.getUserAction(); FsAction groupAction = dirPermissions.getGroupAction(); FsAction otherAction = dirPermissions.getOtherAction(); StringBuilder builder = new StringBuilder(); builder.append(userAction.ordinal()); builder.append(groupAction.ordinal()); builder.append(otherAction.ordinal()); String dirPermissionNum = builder.toString(); System.setProperty("dfs.datanode.data.dir.perm", dirPermissionNum); testDirectory.delete(); miniCluster = new MiniCluster(); miniCluster.startBlurCluster(new File(testDirectory, "cluster").getAbsolutePath(), 2, 3, true, externalProcesses); miniCluster.startMrMiniCluster(); }
From source file:org.apache.blur.mapreduce.lib.BlurMapReduceUtil.java
License:Apache License
/** * Adds all the jars in the same path as the blur jar files. * //from w w w. j a v a 2 s . c o m * @param conf * @throws IOException */ public static void addAllJarsInBlurLib(Configuration conf) throws IOException { FileSystem localFs = FileSystem.getLocal(conf); Set<String> jars = new HashSet<String>(); jars.addAll(conf.getStringCollection("tmpjars")); String property = System.getProperty("java.class.path"); String[] files = property.split("\\:"); String blurLibPath = getPath("blur-", files); if (blurLibPath == null) { return; } List<String> pathes = getPathes(blurLibPath, files); for (String pathStr : pathes) { Path path = new Path(pathStr); if (!localFs.exists(path)) { LOG.warn("Could not validate jar file " + path); continue; } jars.add(path.makeQualified(localFs.getUri(), localFs.getWorkingDirectory()).toString()); } if (jars.isEmpty()) { return; } conf.set("tmpjars", StringUtils.arrayToString(jars.toArray(new String[0]))); }