Example usage for org.apache.hadoop.fs FileSystem getLocal

List of usage examples for org.apache.hadoop.fs FileSystem getLocal

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem getLocal.

Prototype

public static LocalFileSystem getLocal(Configuration conf) throws IOException 

Source Link

Document

Get the local FileSystem.

Usage

From source file:io.amient.kafka.hadoop.testutils.SystemTestBase.java

License:Apache License

@Before
public void setUp() throws IOException, InterruptedException {
    dfsBaseDir = new File(TimestampExtractorSystemTest.class.getResource("/systemtest").getPath());

    //setup hadoop node
    embeddedClusterPath = new File(dfsBaseDir, "local-cluster");
    System.clearProperty(MiniDFSCluster.PROP_TEST_BUILD_DATA);
    conf = new Configuration();
    conf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, embeddedClusterPath.getAbsolutePath());
    cluster = new MiniDFSCluster.Builder(conf).build();
    fs = FileSystem.get(conf);/*from  ww  w .j  a  v  a2 s .c  o  m*/
    localFileSystem = FileSystem.getLocal(conf);

    //setup zookeeper
    embeddedZkPath = new File(dfsBaseDir, "local-zookeeper");
    // smaller testDir footprint, default zookeeper file blocks are 65535Kb
    System.getProperties().setProperty("zookeeper.preAllocSize", "64");
    zookeeper = new ZooKeeperServer(new File(embeddedZkPath, "snapshots"), new File(embeddedZkPath, "logs"),
            3000);
    zkFactory = new NIOServerCnxnFactory();
    zkFactory.configure(new InetSocketAddress(0), 10);
    zkConnect = "localhost:" + zkFactory.getLocalPort();
    System.out.println("starting local zookeeper at " + zkConnect);
    zkFactory.startup(zookeeper);

    //setup kafka
    System.out.println("starting local kafka broker...");

    embeddedKafkaPath = new File(dfsBaseDir, "local-kafka-logs");
    KafkaConfig kafkaConfig = new KafkaConfig(new Properties() {
        {
            put("broker.id", "1");
            put("host.name", "localhost");
            put("port", "0");
            put("log.dir", embeddedKafkaPath.toString());
            put("num.partitions", "2");
            put("auto.create.topics.enable", "true");
            put("zookeeper.connect", zkConnect);
        }
    });
    kafka = new KafkaServerStartable(kafkaConfig);
    kafka.startup();

    //dynamic kafka port allocation
    try (KafkaZkUtils tmpZkClient = new KafkaZkUtils(zkConnect, 30000, 6000)) {
        Broker broker = Broker.createBroker(1, tmpZkClient.getBrokerInfo(1));
        kafkaBootstrap = broker.getBrokerEndPoint(SecurityProtocol.PLAINTEXT).connectionString();
    }

    System.out.println("preparing simpleProducer..");
    simpleProducer = new Producer<>(new ProducerConfig(new Properties() {
        {
            put("metadata.broker.list", kafkaBootstrap);
            put("serializer.class", "kafka.serializer.StringEncoder");
            put("request.required.acks", "1");
        }
    }));

    System.out.println("system test setup complete");

}

From source file:io.amient.yarn1.YarnClient.java

License:Open Source License

/**
 * Distribute all dependencies in a single jar both from Client to Master as well as Master to Container(s)
 *///from  w ww  . j a va  2  s. c  o m
public static void distributeResources(Configuration yarnConf, Properties appConf, String appName)
        throws IOException {
    final FileSystem distFs = FileSystem.get(yarnConf);
    final FileSystem localFs = FileSystem.getLocal(yarnConf);
    try {

        //distribute configuration
        final Path dstConfig = new Path(distFs.getHomeDirectory(), appName + ".configuration");
        final FSDataOutputStream fs = distFs.create(dstConfig);
        appConf.store(fs, "Yarn1 Application Config for " + appName);
        fs.close();
        log.info("Updated resource " + dstConfig);

        //distribute main jar
        final String localPath = YarnClient.class.getProtectionDomain().getCodeSource().getLocation().getFile()
                .replace(".jar/", ".jar");
        final Path src;
        final String jarName = appName + ".jar";
        if (localPath.endsWith(".jar")) {
            log.info("Distributing local jar : " + localPath);
            src = new Path(localPath);
        } else {
            try {
                String localArchive = localPath + appName + ".jar";
                localFs.delete(new Path(localArchive), false);
                log.info("Unpacking compile scope dependencies: " + localPath);
                executeShell("mvn -f " + localPath + "/../.. generate-resources");
                log.info("Preparing application main jar " + localArchive);
                executeShell("jar cMf " + localArchive + " -C " + localPath + " ./");
                src = new Path(localArchive);

            } catch (InterruptedException e) {
                throw new IOException(e);
            }
        }

        byte[] digest;
        final MessageDigest md = MessageDigest.getInstance("MD5");
        try (InputStream is = new FileInputStream(src.toString())) {
            DigestInputStream dis = new DigestInputStream(is, md);
            byte[] buffer = new byte[8192];
            int numOfBytesRead;
            while ((numOfBytesRead = dis.read(buffer)) > 0) {
                md.update(buffer, 0, numOfBytesRead);
            }
            digest = md.digest();
        }
        log.info("Local check sum: " + Hex.encodeHexString(digest));

        final Path dst = new Path(distFs.getHomeDirectory(), jarName);
        Path remoteChecksumFile = new Path(distFs.getHomeDirectory(), jarName + ".md5");
        boolean checksumMatches = false;
        if (distFs.isFile(remoteChecksumFile)) {
            try (InputStream r = distFs.open(remoteChecksumFile)) {
                ByteArrayOutputStream buffer = new ByteArrayOutputStream();
                int nRead;
                byte[] data = new byte[1024];
                while ((nRead = r.read(data, 0, data.length)) != -1) {
                    buffer.write(data, 0, nRead);
                }
                buffer.flush();
                byte[] remoteDigest = buffer.toByteArray();
                log.info("Remote check sum: " + Hex.encodeHexString(remoteDigest));
                checksumMatches = Arrays.equals(digest, remoteDigest);

            }
        }
        if (!checksumMatches) {
            log.info("Updating resource " + dst + " ...");
            distFs.copyFromLocalFile(false, true, src, dst);
            try (FSDataOutputStream remoteChecksumStream = distFs.create(remoteChecksumFile)) {
                log.info("Updating checksum " + remoteChecksumFile + " ...");
                remoteChecksumStream.write(digest);
            }
            FileStatus scFileStatus = distFs.getFileStatus(dst);
            log.info("Updated resource " + dst + " " + scFileStatus.getLen());
        }
    } catch (NoSuchAlgorithmException e) {
        throw new IOException(e);
    }
}

From source file:io.aos.mapreduce.count.WordCountToolTest.java

License:Apache License

@BeforeClass
public static void setup() throws IOException {

    Properties props = new Properties();
    InputStream is = ClassLoader.getSystemResourceAsStream("hdfs-conf.properties");
    props.load(is);// w w w . j  a v  a  2  s  . c o m
    for (Entry<Object, Object> entry : props.entrySet()) {
        System.setProperty((String) entry.getKey(), (String) entry.getValue());
    }

    Map<String, String> envMap = new HashMap<String, String>();
    envMap.put("JAVA_HOME", System.getProperty("java.home"));
    setEnv(envMap);

    final Configuration conf = new Configuration();
    final Path TEST_ROOT_DIR = new Path(System.getProperty("test.build.data", "/tmp"));
    testdir = new Path(TEST_ROOT_DIR, "TestMiniMRClientCluster");
    inDir = new Path(testdir, "in");
    outDir = new Path(testdir, "out");

    FileSystem fs = FileSystem.getLocal(conf);
    if (fs.exists(testdir) && !fs.delete(testdir, true)) {
        throw new IOException("Could not delete " + testdir);
    }
    if (!fs.mkdirs(inDir)) {
        throw new IOException("Mkdirs failed to create " + inDir);
    }

    for (int i = 0; i < inFiles.length; i++) {
        inFiles[i] = new Path(inDir, "part_" + i);
        createFile(inFiles[i], conf);
    }

    // create the mini cluster to be used for the tests
    mrCluster = MiniMRClientClusterFactory.create(WordCountToolTest.class, 1, new Configuration());

}

From source file:io.aos.mapreduce.grep.GrepToolTest.java

License:Apache License

@BeforeClass
public static void setup() throws IOException {

    Properties props = new Properties();
    InputStream is = ClassLoader.getSystemResourceAsStream("hdfs-conf.properties");
    props.load(is);/*ww  w.  j a  va 2  s. c  o m*/
    for (Entry<Object, Object> entry : props.entrySet()) {
        System.setProperty((String) entry.getKey(), (String) entry.getValue());
    }

    Map<String, String> envMap = new HashMap<String, String>();
    envMap.put("JAVA_HOME", System.getProperty("java.home"));
    setEnv(envMap);

    final Configuration conf = new Configuration();
    final Path TEST_ROOT_DIR = new Path(System.getProperty("test.build.data", "/tmp"));
    testdir = new Path(TEST_ROOT_DIR, "TestMiniMRClientCluster");
    inDir = new Path(testdir, "in");
    outDir = new Path(testdir, "out");

    FileSystem fs = FileSystem.getLocal(conf);
    if (fs.exists(testdir) && !fs.delete(testdir, true)) {
        throw new IOException("Could not delete " + testdir);
    }
    if (!fs.mkdirs(inDir)) {
        throw new IOException("Mkdirs failed to create " + inDir);
    }

    for (int i = 0; i < inFiles.length; i++) {
        inFiles[i] = new Path(inDir, "part_" + i);
        createFile(inFiles[i], conf);
    }

    // create the mini cluster to be used for the tests
    mrCluster = MiniMRClientClusterFactory.create(GrepToolTest.class, 1, new Configuration());

}

From source file:io.apigee.lembos.utils.RunnerUtils.java

License:Apache License

/**
 * Takes a module path, which could be a local filesystem path or a url, and returns the local path to the module.
 *
 * <b>Note:</b> If the value is a URL, the URL will be downloaded locally to create the necessary filesystem
 * location for the Node.js module to allow for archiving and adding to DistributedCache.
 *
 * @param conf the Hadoop configuration/* w  ww  .  j av  a2  s.co m*/
 *
 * @return the local filesystem path to the module
 *
 * @throws IOException if anything goes wrong
 */
public static File createLocalCopyOfModule(final Configuration conf) throws IOException {
    final String moduleName = conf.get(LembosConstants.MR_MODULE_NAME);
    final String modulePath = conf.get(LembosConstants.MR_MODULE_PATH);
    File localTempModule = null;

    if (moduleName != null && !moduleName.trim().isEmpty() && modulePath != null
            && !modulePath.trim().isEmpty()) {
        URL moduleUrl;

        // Test if this is a URL or a file
        try {
            moduleUrl = new URL(modulePath);
        } catch (MalformedURLException e) {
            // This is to be expected if the configuration path is not a URL
            moduleUrl = null;
        }

        // Create a local temporary directory to contain the Node.js module
        final java.nio.file.Path tmpDir = Files.createTempDirectory("LembosMapReduceModule");
        FileSystem fs;

        // Delete the temp directory
        tmpDir.toFile().deleteOnExit();

        // Create the proper FileSystem
        if (moduleUrl == null) {
            fs = FileSystem.getLocal(conf);
        } else {
            try {
                fs = FileSystem.get(moduleUrl.toURI(), conf);
            } catch (URISyntaxException e) {
                throw new IOException(e);
            }
        }

        final org.apache.hadoop.fs.Path pathObj = new org.apache.hadoop.fs.Path(modulePath);

        if (fs.exists(pathObj)) {
            final org.apache.hadoop.fs.Path tmpPathObj = new org.apache.hadoop.fs.Path(
                    tmpDir.toAbsolutePath().toString());

            // Copy the local/remote file(s) to the temporary directory
            fs.copyToLocalFile(pathObj, tmpPathObj);

            final File moduleFile = new File(
                    new org.apache.hadoop.fs.Path(tmpPathObj, pathObj.getName()).toString());

            // Set the MapReduce module path accordingly
            if (moduleFile.isFile()) {
                final String fileName = moduleFile.getName();
                boolean wasArchive = false;

                if (fileName.endsWith(".tar") || fileName.endsWith(".tar.gz") || fileName.endsWith(".tgz")) {
                    FileUtil.unTar(moduleFile, tmpDir.toFile());
                    wasArchive = true;
                } else if (fileName.endsWith(".zip")) {
                    FileUtil.unZip(moduleFile, tmpDir.toFile());
                    wasArchive = true;
                }

                if (wasArchive) {
                    for (final String extension : KNOWN_NODE_MODULE_EXTENSIONS) {
                        final File potentialModuleFile = new File(tmpDir.toFile(), moduleName + extension);

                        if (potentialModuleFile.exists()) {
                            localTempModule = potentialModuleFile;
                            break;
                        }
                    }
                } else {
                    localTempModule = moduleFile;
                }
            } else {
                localTempModule = new File(tmpDir.toFile(), moduleName);
            }
        } else {
            throw new RuntimeException("Unable to create/locate Node.js module locally: " + modulePath);
        }
    }

    if (localTempModule == null) {
        throw new RuntimeException("Unable to create local copy of Node.js module from path: "
                + conf.get(LembosConstants.MR_MODULE_PATH));
    }

    return localTempModule;
}

From source file:io.bfscan.data.WarcTrecIdMapping.java

License:Apache License

public WarcTrecIdMapping(Path indexLocation, Configuration conf) throws IOException {
    FileSystem fs = FileSystem.getLocal(conf);
    Directory directory = new FileSystemDirectory(fs, indexLocation, false, conf);

    LOG.info("Opening index " + indexLocation);
    reader = DirectoryReader.open(directory);
    searcher = new IndexSearcher(reader);
}

From source file:io.druid.indexer.IndexGeneratorJobTest.java

License:Apache License

private void writeDataToLocalSequenceFile(File outputFile, List<String> data) throws IOException {
    Configuration conf = new Configuration();
    LocalFileSystem fs = FileSystem.getLocal(conf);
    Writer fileWriter = SequenceFile.createWriter(fs, conf, new Path(outputFile.getAbsolutePath()),
            BytesWritable.class, BytesWritable.class, SequenceFile.CompressionType.NONE,
            (CompressionCodec) null);/*  www . j  a v  a  2 s. c  o  m*/

    int keyCount = 10;
    for (String line : data) {
        ByteBuffer buf = ByteBuffer.allocate(4);
        buf.putInt(keyCount);
        BytesWritable key = new BytesWritable(buf.array());
        BytesWritable value = new BytesWritable(line.getBytes(Charsets.UTF_8));
        fileWriter.append(key, value);
        keyCount += 1;
    }

    fileWriter.close();
}

From source file:io.ssc.trackthetrackers.extraction.hadoop.util.DistributedCacheHelper.java

License:Apache License

public static Path[] getCachedFiles(Configuration conf) throws IOException {
    LocalFileSystem localFs = FileSystem.getLocal(conf);
    Path[] cacheFiles = DistributedCache.getLocalCacheFiles(conf);
    URI[] fallbackFiles = DistributedCache.getCacheFiles(conf);
    // fallback for local execution
    if (cacheFiles == null) {
        Preconditions.checkState(fallbackFiles != null, "Unable to find cached files!");
        cacheFiles = new Path[fallbackFiles.length];
        for (int n = 0; n < fallbackFiles.length; n++) {
            cacheFiles[n] = new Path(fallbackFiles[n].getPath());
        }// w  w w  . j ava  2s  . c o  m
    } else {
        for (int n = 0; n < cacheFiles.length; n++) {
            cacheFiles[n] = localFs.makeQualified(cacheFiles[n]);
            // fallback for local execution
            if (!localFs.exists(cacheFiles[n])) {
                cacheFiles[n] = new Path(fallbackFiles[n].getPath());
            }
        }
    }
    Preconditions.checkState(cacheFiles.length > 0, "Unable to find cached files!");
    return cacheFiles;
}

From source file:io.ssc.trackthetrackers.extraction.hadoop.util.DomainIndexIntegrationTest.java

License:Open Source License

public static void main(String[] args) throws IOException {

    FileSystem fs = FileSystem.getLocal(new Configuration());

    Path indexFile = new Path(Config.get("webdatacommons.pldfile"));

    DomainIndex domainIndex = new DomainIndex(fs, indexFile);

    System.out.println("google-analytics.com " + domainIndex.indexFor("google-analytics.com"));
    System.out.println("spiegel.de " + domainIndex.indexFor("spiegel.de"));
    System.out.println("ssc.io " + domainIndex.indexFor("ssc.io"));
}

From source file:it.crs4.pydoop.mapreduce.pipes.Application.java

License:Apache License

private void writePasswordToLocalFile(String localPasswordFile, byte[] password, Configuration conf)
        throws IOException {
    FileSystem localFs = FileSystem.getLocal(conf);
    Path localPath = new Path(localPasswordFile);
    FSDataOutputStream out = FileSystem.create(localFs, localPath, new FsPermission("400"));
    out.write(password);/*from   w w w .j a  v a  2  s  .co m*/
    out.close();
}