List of usage examples for org.apache.hadoop.fs FileSystem getLocal
public static LocalFileSystem getLocal(Configuration conf) throws IOException
From source file:com.datasalt.utils.commons.BaseConfigurationFactory.java
License:Apache License
public void synchronize() throws IOException { //String projectName = projectConf.getProjectName(); Configuration conf = new Configuration(); FileSystem dFs = FileSystem.get(conf); FileSystem local = FileSystem.getLocal(conf); Path libPath = new Path("/" + projectName + "/lib"); Path confPath = new Path("/" + projectName + "/conf"); Path localLibPath = new Path("lib"); Path localConfPath = new Path("conf"); log.info("Synchronizing " + localLibPath + " to " + libPath); HadoopUtils.synchronize(local, localLibPath, dFs, libPath); log.info("Synchronizing " + localConfPath + " to " + confPath); HadoopUtils.synchronize(local, localConfPath, dFs, confPath); }
From source file:com.datasalt.utils.commons.io.DumpSequenceFileAsText.java
License:Apache License
@Override public void execute(String[] args, Configuration conf) throws Exception { boolean localFilesystem = false; long pos = -1; long rows = -1; // Command line parsing ArrayList<String> parameters = new ArrayList<String>(); for (int i = 0; i < args.length; i++) { if ("-l".equals(args[i])) { localFilesystem = true;//from w ww.java2s . c o m } else if ("-p".equals(args[i])) { pos = new Long(args[++i]); } else if ("-n".equals(args[i])) { rows = new Long(args[++i]); } else { parameters.add(args[i]); } } if (parameters.size() != 1) { System.out.println(HELP); throw new IllegalArgumentException("Invalid number of arguments"); } FileSystem fs; if (localFilesystem) { fs = FileSystem.getLocal(conf); } else { fs = FileSystem.get(conf); } SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path(parameters.get(0)), conf); // Syncing the file at a position if (pos != -1) { reader.sync(pos); } // Creating objects for reading Object key = reader.getKeyClass().newInstance(); Object value = null; try { value = reader.getValueClass().newInstance(); } catch (Exception e) { System.err.println(" Warning : Couldn't get a reader class instantated : " + e.getMessage() + " --- NullWritable objects are known to cause this problem."); } int count = 0; while ((reader.next(key) != null)) { if (rows != -1 && count == rows) { break; } if (value != null) { reader.getCurrentValue(value); } if (value != null) { System.out.println(stringfy(key) + "\t" + stringfy(value)); } else { System.out.println("key=" + stringfy(key)); } count++; } System.err.println("done."); reader.close(); }
From source file:com.datasalt.utils.commons.TestHadoopUtils.java
License:Apache License
@Test public void testStringToFile() throws IOException { FileSystem fs = FileSystem.getLocal(getConf()); Path path = new Path(TestHadoopUtils.class.getCanonicalName()); try {//from ww w . j a v a 2 s .co m String text = "String\nDe Prueba"; for (int i = 0; i < 10; i++) { text += text; } HadoopUtils.stringToFile(fs, path, text); String read = HadoopUtils.fileToString(fs, path); assertEquals(text, read); } finally { fs.delete(path, true); } }
From source file:com.datasalt.utils.commons.TestRepoTool.java
License:Apache License
@Test public void test() throws IOException { FileSystem fs = FileSystem.getLocal(getConf()); Path repo = new Path("repoTest87463829"); HadoopUtils.deleteIfExists(fs, repo); RepoTool tool = new RepoTool(repo, "pkg", fs); assertNull(tool.getNewestPackageWithStatus(PackageStatus.NOT_DEFINED)); Path pkg1 = tool.newPackage(); assertEquals("pkg", pkg1.getName().substring(0, 3)); assertEquals(pkg1.makeQualified(fs), tool.getNewestPackageWithStatus(PackageStatus.NOT_DEFINED)); Path pkg2 = tool.newPackage(); assertEquals(pkg2.makeQualified(fs), tool.getNewestPackageWithStatus(PackageStatus.NOT_DEFINED)); assertEquals(2, tool.getPackages().length); RepoTool.setStatus(fs, pkg2, PackageStatus.FINISHED); assertEquals(pkg2.makeQualified(fs), tool.getNewestPackageWithStatus(PackageStatus.FINISHED)); HadoopUtils.deleteIfExists(fs, repo); }
From source file:com.datatorrent.lib.io.fs.AbstractFileInputOperatorTest.java
License:Open Source License
@Test public void testPartitioning() throws Exception { TestFileInputOperator oper = new TestFileInputOperator(); oper.getScanner().setFilePatternRegexp(".*partition([\\d]*)"); oper.setDirectory(new File(testMeta.dir).getAbsolutePath()); Path path = new Path(new File(testMeta.dir).getAbsolutePath()); FileContext.getLocalFSFileContext().delete(path, true); for (int file = 0; file < 4; file++) { FileUtils.write(new File(testMeta.dir, "partition00" + file), ""); }/*w w w . ja v a 2s.co m*/ List<Partition<AbstractFileInputOperator<String>>> partitions = Lists.newArrayList(); partitions.add(new DefaultPartition<AbstractFileInputOperator<String>>(oper)); Collection<Partition<AbstractFileInputOperator<String>>> newPartitions = oper.definePartitions(partitions, new PartitioningContextImpl(null, 2)); Assert.assertEquals(2, newPartitions.size()); Assert.assertEquals(1, oper.getCurrentPartitions()); // partitioned() wasn't called for (Partition<AbstractFileInputOperator<String>> p : newPartitions) { Assert.assertNotSame(oper, p.getPartitionedInstance()); Assert.assertNotSame(oper.getScanner(), p.getPartitionedInstance().getScanner()); Set<String> consumed = Sets.newHashSet(); LinkedHashSet<Path> files = p.getPartitionedInstance().getScanner() .scan(FileSystem.getLocal(new Configuration(false)), path, consumed); Assert.assertEquals("partition " + files, 2, files.size()); } }
From source file:com.datatorrent.lib.io.fs.AbstractFSDirectoryInputOperatorTest.java
License:Open Source License
@Test public void testPartitioning() throws Exception { TestFSDirectoryInputOperator oper = new TestFSDirectoryInputOperator(); oper.getScanner().setFilePatternRegexp(".*partition([\\d]*)"); oper.setDirectory(new File(testMeta.dir).getAbsolutePath()); Path path = new Path(new File(testMeta.dir).getAbsolutePath()); FileContext.getLocalFSFileContext().delete(path, true); for (int file = 0; file < 4; file++) { FileUtils.write(new File(testMeta.dir, "partition00" + file), ""); }//from w w w . j av a 2 s. co m List<Partition<AbstractFSDirectoryInputOperator<String>>> partitions = Lists.newArrayList(); partitions.add(new DefaultPartition<AbstractFSDirectoryInputOperator<String>>(oper)); Collection<Partition<AbstractFSDirectoryInputOperator<String>>> newPartitions = oper .definePartitions(partitions, 1); Assert.assertEquals(2, newPartitions.size()); Assert.assertEquals(2, oper.getCurrentPartitions()); for (Partition<AbstractFSDirectoryInputOperator<String>> p : newPartitions) { Assert.assertNotSame(oper, p.getPartitionedInstance()); Assert.assertNotSame(oper.getScanner(), p.getPartitionedInstance().getScanner()); Set<String> consumed = Sets.newHashSet(); LinkedHashSet<Path> files = p.getPartitionedInstance().getScanner() .scan(FileSystem.getLocal(new Configuration(false)), path, consumed); Assert.assertEquals("partition " + files, 2, files.size()); } }
From source file:com.digitalpebble.behemoth.SerializationTest.java
License:Apache License
@Override protected void setUp() throws Exception { conf = BehemothConfiguration.create(); fs = FileSystem.getLocal(conf); file = new Path("test_" + System.currentTimeMillis()); }
From source file:com.ebay.erl.mobius.core.criterion.TupleRestrictions.java
License:Apache License
/** * Create a tuple criterion that only accepts tuples when the value * of the <code>column</code> are presented in the given <code>file</code> * <p>/* www .j av a 2s . c om*/ * * The assumption of the file is that, it's single column and one to many * line text file. Each line is read into a case insensitive set, and * using the set to check the value of the <code>column</code> within * the set or not. * * * @param column the name of a column to be tested that whether its value is in * the given <code>file</code> or not * * @param file a single column and multiple lines of file that contains strings/numbers, * each line is treated as a single unit. * * @return an instance of {@link TupleCriterion} that extracts only the records * when the value of its <code>column</code> are presented in the given * <code>file</code>. * * @throws FileNotFoundException if the given file cannot be found. */ public static TupleCriterion within(final String column, File file) throws FileNotFoundException { final File f = TupleRestrictions.checkFileExist(file); return new TupleCriterion() { private static final long serialVersionUID = -1121221619118915652L; private Set<String> set; @Override public void setConf(Configuration conf) { try { if (conf.get("tmpfiles") == null || conf.get("tmpfiles").trim().length() == 0) { conf.set("tmpfiles", validateFiles(f.getAbsolutePath(), conf)); } else { conf.set("tmpfiles", validateFiles(f.getAbsolutePath(), conf) + "," + conf.get("tmpfiles")); } } catch (IOException e) { throw new IllegalArgumentException(e); } } /** * COPIED FROM org.apache.hadoop.util.GenericOptionsParser */ private String validateFiles(String files, Configuration conf) throws IOException { if (files == null) return null; String[] fileArr = files.split(","); String[] finalArr = new String[fileArr.length]; for (int i = 0; i < fileArr.length; i++) { String tmp = fileArr[i]; String finalPath; Path path = new Path(tmp); URI pathURI = path.toUri(); FileSystem localFs = FileSystem.getLocal(conf); if (pathURI.getScheme() == null) { // default to the local file system // check if the file exists or not first if (!localFs.exists(path)) { throw new FileNotFoundException("File " + tmp + " does not exist."); } finalPath = path.makeQualified(localFs).toString(); } else { // check if the file exists in this file system // we need to recreate this filesystem object to copy // these files to the file system jobtracker is running // on. FileSystem fs = path.getFileSystem(conf); if (!fs.exists(path)) { throw new FileNotFoundException("File " + tmp + " does not exist."); } finalPath = path.makeQualified(fs).toString(); try { fs.close(); } catch (IOException e) { } ; } finalArr[i] = finalPath; } return StringUtils.arrayToString(finalArr); } @Override protected boolean evaluate(Tuple tuple, Configuration configuration) { if (set == null) { set = new CaseInsensitiveTreeSet(); BufferedReader br = null; try { br = new BufferedReader(new FileReader(new File(f.getName()))); String newLine = null; while ((newLine = br.readLine()) != null) { this.set.add(newLine); } } catch (IOException e) { throw new RuntimeException(e); } finally { try { br.close(); } catch (Throwable e) { } } } String value = tuple.getString(column); if (value != null) { return this.set.contains(value); } else { return false; } } @Override public String[] getInvolvedColumns() { return new String[] { column }; } }; }
From source file:com.ebay.erl.mobius.core.datajoin.EvenlyPartitioner.java
License:Apache License
/** * Read in the partition file and build indexing data structures. * If the keytype is {@link org.apache.hadoop.io.BinaryComparable} and * <tt>total.order.partitioner.natural.order</tt> is not false, a trie * of the first <tt>total.order.partitioner.max.trie.depth</tt>(2) + 1 bytes * will be built. Otherwise, keys will be located using a binary search of * the partition keyset using the {@link org.apache.hadoop.io.RawComparator} * defined for this job. The input file must be sorted with the same * comparator and contain {@link/*from w w w . j ava 2s . com*/ org.apache.hadoop.mapred.JobConf#getNumReduceTasks} - 1 keys. */ @SuppressWarnings("unchecked") // keytype from conf not static public void configure(JobConf job) { try { String parts = getPartitionFile(job); final Path partFile = new Path(parts); final FileSystem fs = (DEFAULT_PATH.equals(parts)) ? FileSystem.getLocal(job) // assume in DistributedCache : partFile.getFileSystem(job); //Class<K> keyClass = (Class<K>)job.getMapOutputKeyClass(); K[] splitPoints = readPartitions(fs, partFile, (Class<K>) Tuple.class, job); if (splitPoints.length != job.getNumReduceTasks() - 1) { throw new IOException("Wrong number of partitions in keyset"); } RawComparator<K> comparator = (RawComparator<K>) job.getOutputKeyComparator(); for (int i = 0; i < splitPoints.length - 1; ++i) { if (comparator.compare(splitPoints[i], splitPoints[i + 1]) >= 0) { throw new IOException("Split points are out of order"); } } boolean natOrder = job.getBoolean("total.order.partitioner.natural.order", true); if (natOrder && BinaryComparable.class.isAssignableFrom(Tuple.class)) { partitions = buildTrie((BinaryComparable[]) splitPoints, 0, splitPoints.length, new byte[0], job.getInt("total.order.partitioner.max.trie.depth", 2)); } else { partitions = new BinarySearchNode(splitPoints, comparator); } } catch (IOException e) { throw new IllegalArgumentException("Can't read partitions file", e); } }
From source file:com.facebook.hive.orc.TestFileDump.java
License:Apache License
@Before public void openFileSystem() throws Exception { conf = new Configuration(); fs = FileSystem.getLocal(conf); fs.mkdirs(workDir);/*from ww w.j ava2s.co m*/ testFilePath = new Path(workDir, "TestFileDump.testDump.orc"); fs.delete(testFilePath, false); }