Example usage for org.apache.hadoop.fs FileSystem getLocal

List of usage examples for org.apache.hadoop.fs FileSystem getLocal

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem getLocal.

Prototype

public static LocalFileSystem getLocal(Configuration conf) throws IOException 

Source Link

Document

Get the local FileSystem.

Usage

From source file:com.datasalt.utils.commons.BaseConfigurationFactory.java

License:Apache License

public void synchronize() throws IOException {
    //String projectName = projectConf.getProjectName();
    Configuration conf = new Configuration();
    FileSystem dFs = FileSystem.get(conf);
    FileSystem local = FileSystem.getLocal(conf);
    Path libPath = new Path("/" + projectName + "/lib");
    Path confPath = new Path("/" + projectName + "/conf");
    Path localLibPath = new Path("lib");
    Path localConfPath = new Path("conf");
    log.info("Synchronizing " + localLibPath + " to " + libPath);
    HadoopUtils.synchronize(local, localLibPath, dFs, libPath);
    log.info("Synchronizing " + localConfPath + " to " + confPath);
    HadoopUtils.synchronize(local, localConfPath, dFs, confPath);
}

From source file:com.datasalt.utils.commons.io.DumpSequenceFileAsText.java

License:Apache License

@Override
public void execute(String[] args, Configuration conf) throws Exception {
    boolean localFilesystem = false;
    long pos = -1;
    long rows = -1;

    // Command line parsing
    ArrayList<String> parameters = new ArrayList<String>();
    for (int i = 0; i < args.length; i++) {
        if ("-l".equals(args[i])) {
            localFilesystem = true;//from w ww.java2s  .  c  o  m
        } else if ("-p".equals(args[i])) {
            pos = new Long(args[++i]);
        } else if ("-n".equals(args[i])) {
            rows = new Long(args[++i]);
        } else {
            parameters.add(args[i]);
        }
    }

    if (parameters.size() != 1) {
        System.out.println(HELP);
        throw new IllegalArgumentException("Invalid number of arguments");
    }

    FileSystem fs;
    if (localFilesystem) {
        fs = FileSystem.getLocal(conf);
    } else {
        fs = FileSystem.get(conf);
    }

    SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path(parameters.get(0)), conf);

    // Syncing the file at a position
    if (pos != -1) {
        reader.sync(pos);
    }

    // Creating objects for reading
    Object key = reader.getKeyClass().newInstance();
    Object value = null;
    try {
        value = reader.getValueClass().newInstance();
    } catch (Exception e) {
        System.err.println(" Warning : Couldn't get a reader class instantated : " + e.getMessage()
                + " --- NullWritable objects are known to cause this problem.");
    }

    int count = 0;

    while ((reader.next(key) != null)) {

        if (rows != -1 && count == rows) {
            break;
        }

        if (value != null) {
            reader.getCurrentValue(value);
        }

        if (value != null) {
            System.out.println(stringfy(key) + "\t" + stringfy(value));
        } else {
            System.out.println("key=" + stringfy(key));
        }

        count++;
    }
    System.err.println("done.");

    reader.close();
}

From source file:com.datasalt.utils.commons.TestHadoopUtils.java

License:Apache License

@Test
public void testStringToFile() throws IOException {
    FileSystem fs = FileSystem.getLocal(getConf());
    Path path = new Path(TestHadoopUtils.class.getCanonicalName());

    try {//from ww  w  .  j  a v  a  2  s  .co  m

        String text = "String\nDe Prueba";

        for (int i = 0; i < 10; i++) {
            text += text;
        }

        HadoopUtils.stringToFile(fs, path, text);
        String read = HadoopUtils.fileToString(fs, path);

        assertEquals(text, read);
    } finally {
        fs.delete(path, true);
    }
}

From source file:com.datasalt.utils.commons.TestRepoTool.java

License:Apache License

@Test
public void test() throws IOException {
    FileSystem fs = FileSystem.getLocal(getConf());

    Path repo = new Path("repoTest87463829");
    HadoopUtils.deleteIfExists(fs, repo);

    RepoTool tool = new RepoTool(repo, "pkg", fs);

    assertNull(tool.getNewestPackageWithStatus(PackageStatus.NOT_DEFINED));

    Path pkg1 = tool.newPackage();
    assertEquals("pkg", pkg1.getName().substring(0, 3));

    assertEquals(pkg1.makeQualified(fs), tool.getNewestPackageWithStatus(PackageStatus.NOT_DEFINED));

    Path pkg2 = tool.newPackage();
    assertEquals(pkg2.makeQualified(fs), tool.getNewestPackageWithStatus(PackageStatus.NOT_DEFINED));

    assertEquals(2, tool.getPackages().length);

    RepoTool.setStatus(fs, pkg2, PackageStatus.FINISHED);
    assertEquals(pkg2.makeQualified(fs), tool.getNewestPackageWithStatus(PackageStatus.FINISHED));

    HadoopUtils.deleteIfExists(fs, repo);
}

From source file:com.datatorrent.lib.io.fs.AbstractFileInputOperatorTest.java

License:Open Source License

@Test
public void testPartitioning() throws Exception {
    TestFileInputOperator oper = new TestFileInputOperator();
    oper.getScanner().setFilePatternRegexp(".*partition([\\d]*)");
    oper.setDirectory(new File(testMeta.dir).getAbsolutePath());

    Path path = new Path(new File(testMeta.dir).getAbsolutePath());
    FileContext.getLocalFSFileContext().delete(path, true);
    for (int file = 0; file < 4; file++) {
        FileUtils.write(new File(testMeta.dir, "partition00" + file), "");
    }/*w  w w .  ja v  a  2s.co m*/

    List<Partition<AbstractFileInputOperator<String>>> partitions = Lists.newArrayList();
    partitions.add(new DefaultPartition<AbstractFileInputOperator<String>>(oper));
    Collection<Partition<AbstractFileInputOperator<String>>> newPartitions = oper.definePartitions(partitions,
            new PartitioningContextImpl(null, 2));
    Assert.assertEquals(2, newPartitions.size());
    Assert.assertEquals(1, oper.getCurrentPartitions()); // partitioned() wasn't called

    for (Partition<AbstractFileInputOperator<String>> p : newPartitions) {
        Assert.assertNotSame(oper, p.getPartitionedInstance());
        Assert.assertNotSame(oper.getScanner(), p.getPartitionedInstance().getScanner());
        Set<String> consumed = Sets.newHashSet();
        LinkedHashSet<Path> files = p.getPartitionedInstance().getScanner()
                .scan(FileSystem.getLocal(new Configuration(false)), path, consumed);
        Assert.assertEquals("partition " + files, 2, files.size());
    }
}

From source file:com.datatorrent.lib.io.fs.AbstractFSDirectoryInputOperatorTest.java

License:Open Source License

@Test
public void testPartitioning() throws Exception {
    TestFSDirectoryInputOperator oper = new TestFSDirectoryInputOperator();
    oper.getScanner().setFilePatternRegexp(".*partition([\\d]*)");
    oper.setDirectory(new File(testMeta.dir).getAbsolutePath());

    Path path = new Path(new File(testMeta.dir).getAbsolutePath());
    FileContext.getLocalFSFileContext().delete(path, true);
    for (int file = 0; file < 4; file++) {
        FileUtils.write(new File(testMeta.dir, "partition00" + file), "");
    }//from w  w w  . j  av  a  2  s. co  m

    List<Partition<AbstractFSDirectoryInputOperator<String>>> partitions = Lists.newArrayList();
    partitions.add(new DefaultPartition<AbstractFSDirectoryInputOperator<String>>(oper));
    Collection<Partition<AbstractFSDirectoryInputOperator<String>>> newPartitions = oper
            .definePartitions(partitions, 1);
    Assert.assertEquals(2, newPartitions.size());
    Assert.assertEquals(2, oper.getCurrentPartitions());

    for (Partition<AbstractFSDirectoryInputOperator<String>> p : newPartitions) {
        Assert.assertNotSame(oper, p.getPartitionedInstance());
        Assert.assertNotSame(oper.getScanner(), p.getPartitionedInstance().getScanner());
        Set<String> consumed = Sets.newHashSet();
        LinkedHashSet<Path> files = p.getPartitionedInstance().getScanner()
                .scan(FileSystem.getLocal(new Configuration(false)), path, consumed);
        Assert.assertEquals("partition " + files, 2, files.size());
    }
}

From source file:com.digitalpebble.behemoth.SerializationTest.java

License:Apache License

@Override
protected void setUp() throws Exception {
    conf = BehemothConfiguration.create();
    fs = FileSystem.getLocal(conf);
    file = new Path("test_" + System.currentTimeMillis());
}

From source file:com.ebay.erl.mobius.core.criterion.TupleRestrictions.java

License:Apache License

/**
 * Create a tuple criterion that only accepts tuples when the value 
 * of the <code>column</code> are presented in the given <code>file</code>
 * <p>/* www .j av  a  2s . c om*/
 * 
 * The assumption of the file is that, it's single column and one to many
 * line text file.  Each line is read into a case insensitive set, and 
 * using the set to check the value of the <code>column</code> within
 * the set or not.
 * 
 * 
 * @param column the name of a column to be tested that whether its value is in 
 * the given <code>file</code> or not
 * 
 * @param file a single column and multiple lines of file that contains strings/numbers,
 * each line is treated as a single unit.
 *
 * @return an instance of {@link TupleCriterion} that extracts only the records 
 * when the value of its <code>column</code> are presented in the given 
 * <code>file</code>.
 * 
 * @throws FileNotFoundException if the given file cannot be found.
 */
public static TupleCriterion within(final String column, File file) throws FileNotFoundException {
    final File f = TupleRestrictions.checkFileExist(file);

    return new TupleCriterion() {

        private static final long serialVersionUID = -1121221619118915652L;
        private Set<String> set;

        @Override
        public void setConf(Configuration conf) {
            try {
                if (conf.get("tmpfiles") == null || conf.get("tmpfiles").trim().length() == 0) {
                    conf.set("tmpfiles", validateFiles(f.getAbsolutePath(), conf));
                } else {
                    conf.set("tmpfiles", validateFiles(f.getAbsolutePath(), conf) + "," + conf.get("tmpfiles"));
                }

            } catch (IOException e) {
                throw new IllegalArgumentException(e);
            }
        }

        /**
         * COPIED FROM org.apache.hadoop.util.GenericOptionsParser
         */
        private String validateFiles(String files, Configuration conf) throws IOException {
            if (files == null)
                return null;
            String[] fileArr = files.split(",");
            String[] finalArr = new String[fileArr.length];
            for (int i = 0; i < fileArr.length; i++) {
                String tmp = fileArr[i];
                String finalPath;
                Path path = new Path(tmp);
                URI pathURI = path.toUri();
                FileSystem localFs = FileSystem.getLocal(conf);
                if (pathURI.getScheme() == null) {
                    // default to the local file system
                    // check if the file exists or not first
                    if (!localFs.exists(path)) {
                        throw new FileNotFoundException("File " + tmp + " does not exist.");
                    }
                    finalPath = path.makeQualified(localFs).toString();
                } else {
                    // check if the file exists in this file system
                    // we need to recreate this filesystem object to copy
                    // these files to the file system jobtracker is running
                    // on.
                    FileSystem fs = path.getFileSystem(conf);
                    if (!fs.exists(path)) {
                        throw new FileNotFoundException("File " + tmp + " does not exist.");
                    }
                    finalPath = path.makeQualified(fs).toString();
                    try {
                        fs.close();
                    } catch (IOException e) {
                    }
                    ;
                }
                finalArr[i] = finalPath;
            }
            return StringUtils.arrayToString(finalArr);
        }

        @Override
        protected boolean evaluate(Tuple tuple, Configuration configuration) {
            if (set == null) {
                set = new CaseInsensitiveTreeSet();
                BufferedReader br = null;
                try {
                    br = new BufferedReader(new FileReader(new File(f.getName())));
                    String newLine = null;
                    while ((newLine = br.readLine()) != null) {
                        this.set.add(newLine);
                    }
                } catch (IOException e) {
                    throw new RuntimeException(e);
                } finally {
                    try {
                        br.close();
                    } catch (Throwable e) {
                    }
                }
            }

            String value = tuple.getString(column);
            if (value != null) {
                return this.set.contains(value);
            } else {
                return false;
            }
        }

        @Override
        public String[] getInvolvedColumns() {
            return new String[] { column };
        }
    };
}

From source file:com.ebay.erl.mobius.core.datajoin.EvenlyPartitioner.java

License:Apache License

/**
 * Read in the partition file and build indexing data structures.
 * If the keytype is {@link org.apache.hadoop.io.BinaryComparable} and
 * <tt>total.order.partitioner.natural.order</tt> is not false, a trie
 * of the first <tt>total.order.partitioner.max.trie.depth</tt>(2) + 1 bytes
 * will be built. Otherwise, keys will be located using a binary search of
 * the partition keyset using the {@link org.apache.hadoop.io.RawComparator}
 * defined for this job. The input file must be sorted with the same
 * comparator and contain {@link/*from w  w w .  j ava  2s .  com*/
org.apache.hadoop.mapred.JobConf#getNumReduceTasks} - 1 keys.
 */
@SuppressWarnings("unchecked") // keytype from conf not static
public void configure(JobConf job) {
    try {
        String parts = getPartitionFile(job);
        final Path partFile = new Path(parts);
        final FileSystem fs = (DEFAULT_PATH.equals(parts)) ? FileSystem.getLocal(job) // assume in DistributedCache
                : partFile.getFileSystem(job);

        //Class<K> keyClass = (Class<K>)job.getMapOutputKeyClass();
        K[] splitPoints = readPartitions(fs, partFile, (Class<K>) Tuple.class, job);
        if (splitPoints.length != job.getNumReduceTasks() - 1) {
            throw new IOException("Wrong number of partitions in keyset");
        }
        RawComparator<K> comparator = (RawComparator<K>) job.getOutputKeyComparator();
        for (int i = 0; i < splitPoints.length - 1; ++i) {
            if (comparator.compare(splitPoints[i], splitPoints[i + 1]) >= 0) {
                throw new IOException("Split points are out of order");
            }
        }
        boolean natOrder = job.getBoolean("total.order.partitioner.natural.order", true);
        if (natOrder && BinaryComparable.class.isAssignableFrom(Tuple.class)) {
            partitions = buildTrie((BinaryComparable[]) splitPoints, 0, splitPoints.length, new byte[0],
                    job.getInt("total.order.partitioner.max.trie.depth", 2));
        } else {
            partitions = new BinarySearchNode(splitPoints, comparator);
        }
    } catch (IOException e) {
        throw new IllegalArgumentException("Can't read partitions file", e);
    }
}

From source file:com.facebook.hive.orc.TestFileDump.java

License:Apache License

@Before
public void openFileSystem() throws Exception {
    conf = new Configuration();
    fs = FileSystem.getLocal(conf);
    fs.mkdirs(workDir);/*from   ww  w.j  ava2s.co m*/
    testFilePath = new Path(workDir, "TestFileDump.testDump.orc");
    fs.delete(testFilePath, false);
}