Example usage for org.apache.hadoop.fs FileSystem getLocal

List of usage examples for org.apache.hadoop.fs FileSystem getLocal

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem getLocal.

Prototype

public static LocalFileSystem getLocal(Configuration conf) throws IOException 

Source Link

Document

Get the local FileSystem.

Usage

From source file:com.cloudera.sqoop.TestMerge.java

License:Apache License

/**
 * Return true if there's a file in 'dirName' with a line that starts with
 * 'prefix'./*from  ww  w .j  a  va 2s  . c o  m*/
 */
protected boolean recordStartsWith(String prefix, String dirName) throws Exception {
    Path warehousePath = new Path(LOCAL_WAREHOUSE_DIR);
    Path targetPath = new Path(warehousePath, dirName);

    FileSystem fs = FileSystem.getLocal(new Configuration());
    FileStatus[] files = fs.listStatus(targetPath);

    if (null == files || files.length == 0) {
        fail("Got no import files!");
    }

    for (FileStatus stat : files) {
        Path p = stat.getPath();
        if (p.getName().startsWith("part-")) {
            if (checkFileForLine(fs, p, prefix)) {
                // We found the line. Nothing further to do.
                return true;
            }
        }
    }

    return false;
}

From source file:com.cloudera.training.metrics.JobHistoryHelper.java

License:Apache License

public static JobHistory.JobInfo getJobInfoFromLocalFile(String outputFile, Configuration conf)
        throws IOException {
    FileSystem fs = FileSystem.getLocal(conf);

    Path outputFilePath = new Path(outputFile);

    String[] jobDetails = JobHistory.JobInfo.decodeJobHistoryFileName(outputFilePath.getName()).split("_");
    String jobId = jobDetails[2] + "_" + jobDetails[3] + "_" + jobDetails[4];
    JobHistory.JobInfo job = new JobHistory.JobInfo(jobId);
    DefaultJobHistoryParser.parseJobTasks(outputFile, job, fs);
    return job;/*from   ww  w  .  j av a 2s. c  o  m*/
}

From source file:com.dasasian.chok.mapfile.MapFileServer.java

License:Apache License

public MapFileServer() throws IOException {
    fileSystem = FileSystem.getLocal(conf);
}

From source file:com.dasasian.chok.util.FileUtilTest.java

License:Apache License

@Test
public void testUnzipPathFileFileSystemBoolean() throws IOException {
    Configuration configuration = new Configuration();
    FileSystem fileSystem = FileSystem.getLocal(configuration);

    // Test the unspooled case
    File targetFolder = temporaryFolder.newFolder("unpacked2");
    Path zipPath = new Path(testZipFile.getAbsolutePath());
    FileUtil.unzip(zipPath, targetFolder, fileSystem, false);
    File segment = new File(targetFolder, INDEX_TXT);
    assertTrue("Unzipped local zip directly to target", segment.exists());

    // Test the spooled case

    targetFolder = temporaryFolder.newFolder("unpacked3");
    zipPath = new Path(testZipFile.getAbsolutePath());
    FileUtil.unzip(zipPath, targetFolder, fileSystem, true);
    segment = new File(targetFolder, INDEX_TXT);
    assertTrue("Unzipped spooled local zip to target", segment.exists());

}

From source file:com.datamoin.tajo.tpcds.TpcDSTestUtil.java

License:Apache License

public static void createTables(String database, TajoClient client) throws Exception {
    String dataDir = getDataDir();
    if (dataDir == null || dataDir.isEmpty()) {
        throw new IOException("No TPCDS_DATA_DIR property. Use -DTPCDS_DATA_DIR=<data dir>");
    }//  www  .  j  a  v  a 2s. c  o  m

    if (dataDir.startsWith("hdfs://")) {
        Path path = new Path(dataDir);
        FileSystem fs = path.getFileSystem(new Configuration());
        for (String eachTable : tableNames) {
            Path tableDataDir = new Path(path, eachTable);
            if (!fs.exists(tableDataDir)) {
                throw new IOException(eachTable + " data dir [" + tableDataDir + "] not exists.");
            }
        }
    } else {
        File dataDirFile = new File(dataDir);
        if (!dataDirFile.exists()) {
            throw new IOException("TPCDS_DATA_DIR [" + dataDir + "] not exists.");
        }
        if (dataDirFile.isFile()) {
            throw new IOException("TPCDS_DATA_DIR [" + dataDir + "] is not a directory.");
        }

        for (String eachTable : tableNames) {
            File tableDataDir = new File(dataDirFile, eachTable);
            if (!tableDataDir.exists()) {
                throw new IOException(eachTable + " data dir [" + tableDataDir + "] not exists.");
            }
        }
    }

    KeyValueSet opt = new KeyValueSet();
    opt.set(StorageConstants.TEXT_DELIMITER, StorageConstants.DEFAULT_FIELD_DELIMITER);

    LOG.info("Create database: " + database);
    client.executeQuery("create database if not exists " + database);

    Path tpcdsResourceURL = new Path(ClassLoader.getSystemResource("tpcds").toString());

    Path ddlPath = new Path(tpcdsResourceURL, "ddl");
    FileSystem localFs = FileSystem.getLocal(new Configuration());

    FileStatus[] files = localFs.listStatus(ddlPath);

    String dataDirWithPrefix = dataDir;
    if (dataDir.indexOf("://") < 0) {
        dataDirWithPrefix = "file://" + dataDir;
    }

    for (FileStatus eachFile : files) {
        if (eachFile.isFile()) {
            String tableName = eachFile.getPath().getName().split("\\.")[0];
            String query = FileUtil.readTextFile(new File(eachFile.getPath().toUri()));
            query = query.replace("${DB}", database);
            query = query.replace("${DATA_LOCATION}", dataDirWithPrefix + "/" + tableName);

            LOG.info("Create table:" + tableName + "," + query);
            client.executeQuery(query);
        }
    }
}

From source file:com.datasalt.pangool.solr.SolrRecordWriter.java

License:Apache License

private Path findSolrConfig(Configuration conf) throws IOException {
    Path solrHome = null;//from w ww .j a  v  a 2  s.c o m

    // we added these lines to make this patch work on Hadoop 0.20.2
    FileSystem localFs = FileSystem.getLocal(conf);
    if (FileSystem.get(conf).equals(localFs)) {
        return new Path(localSolrHome);
    }
    // end-of-addition
    Path[] localArchives = DistributedCache.getLocalCacheArchives(conf);

    if (localArchives.length == 0) {
        throw new IOException(String.format("No local cache archives, where is %s", zipName));
    }
    for (Path unpackedDir : localArchives) {
        // Only logged if debugging
        if (LOG.isDebugEnabled()) {
            LOG.debug(String.format("Examining unpack directory %s for %s", unpackedDir, zipName));

            ProcessBuilder lsCmd = new ProcessBuilder(
                    new String[] { "/bin/ls", "-lR", unpackedDir.toString() });
            lsCmd.redirectErrorStream();
            Process ls = lsCmd.start();
            try {
                byte[] buf = new byte[16 * 1024];
                InputStream all = ls.getInputStream();
                int count;
                while ((count = all.read(buf)) > 0) {
                    System.err.write(buf, 0, count);
                }
            } catch (IOException ignore) {
            }
            System.err.format("Exit value is %d%n", ls.exitValue());
        }
        if (unpackedDir.getName().equals(zipName)) {

            solrHome = unpackedDir;
            break;
        }
    }
    return solrHome;
}

From source file:com.datasalt.pangool.solr.SolrRecordWriter.java

License:Apache License

/**
 * Write a file to a zip output stream, removing leading path name components from the actual file name when creating
 * the zip file entry.//from  www. jav  a 2  s.c  o  m
 * 
 * The entry placed in the zip file is <code>baseName</code>/ <code>relativePath</code>, where
 * <code>relativePath</code> is constructed by removing a leading <code>root</code> from the path for
 * <code>itemToZip</code>.
 * 
 * If <code>itemToZip</code> is an empty directory, it is ignored. If <code>itemToZip</code> is a directory, the
 * contents of the directory are added recursively.
 * 
 * @param zos
 *          The zip output stream
 * @param baseName
 *          The base name to use for the file name entry in the zip file
 * @param root
 *          The path to remove from <code>itemToZip</code> to make a relative path name
 * @param itemToZip
 *          The path to the file to be added to the zip file
 * @return the number of entries added
 * @throws IOException
 */
static public int zipDirectory(final Configuration conf, final ZipOutputStream zos, final String baseName,
        final String root, final Path itemToZip) throws IOException {
    LOG.info(String.format("zipDirectory: %s %s %s", baseName, root, itemToZip));
    LocalFileSystem localFs = FileSystem.getLocal(conf);
    int count = 0;

    final FileStatus itemStatus = localFs.getFileStatus(itemToZip);
    if (itemStatus.isDir()) {
        final FileStatus[] statai = localFs.listStatus(itemToZip);

        // Add a directory entry to the zip file
        final String zipDirName = relativePathForZipEntry(itemToZip.toUri().getPath(), baseName, root);
        final ZipEntry dirZipEntry = new ZipEntry(zipDirName + Path.SEPARATOR_CHAR);
        LOG.info(String.format("Adding directory %s to zip", zipDirName));
        zos.putNextEntry(dirZipEntry);
        zos.closeEntry();
        count++;

        if (statai == null || statai.length == 0) {
            LOG.info(String.format("Skipping empty directory %s", itemToZip));
            return count;
        }
        for (FileStatus status : statai) {
            count += zipDirectory(conf, zos, baseName, root, status.getPath());
        }
        LOG.info(String.format("Wrote %d entries for directory %s", count, itemToZip));
        return count;
    }

    final String inZipPath = relativePathForZipEntry(itemToZip.toUri().getPath(), baseName, root);

    if (inZipPath.length() == 0) {
        LOG.warn(String.format("Skipping empty zip file path for %s (%s %s)", itemToZip, root, baseName));
        return 0;
    }

    // Take empty files in case the place holder is needed
    FSDataInputStream in = null;
    try {
        in = localFs.open(itemToZip);
        final ZipEntry ze = new ZipEntry(inZipPath);
        ze.setTime(itemStatus.getModificationTime());
        // Comments confuse looking at the zip file
        // ze.setComment(itemToZip.toString());
        zos.putNextEntry(ze);

        IOUtils.copyBytes(in, zos, conf, false);
        zos.closeEntry();
        LOG.info(String.format("Wrote %d entries for file %s", count, itemToZip));
        return 1;
    } finally {
        in.close();
    }

}

From source file:com.datasalt.pangool.tuplemr.mapred.TestRollup.java

License:Apache License

/**
 * /*from ww  w . j a v a 2s  .co  m*/
 * Checks that {@link RollupReducer} calls properly {@link TupleReducer#onOpenGroup},
 * {@link TupleReducer#onCloseGroup} and {@link TupleReducer#onGroupElements} and checks that the elements (tuples)
 * passed are coherent. This method assumes an specific output from the {@link TupleReducer}. The output needs to be a
 * Text,Text for key and value This will be the format used : key("OPEN depth"), value("serialized value")
 * key("CLOSE depth"), value("serialized value") key("ELEMENT"),value("serialized element") (for every element
 * received in onElements needs to contain a record like this)
 * 
 * For instance : key("OPEN 0"), value(" element1") key("OPEN 1"), value("element1 ") key("ELEMENT") , value
 * ("element1") key("ELEMENT"),value ("element2") key("CLOSE 1"),value ("element2") key("CLOSE 0"),value("element2")
 * 
 * 
 */
public void checkRollupOutput(Path path, int minDepth, int maxDepth) throws IOException {
    SequenceFile.Reader reader = new SequenceFile.Reader(FileSystem.getLocal(getConf()), path, getConf());

    Text actualKey = new Text();
    Text actualValue = new Text();
    reader.next(actualKey, actualValue); // first action
    String currentKey = actualKey.toString();
    String currentValue = actualValue.toString();

    Assert.assertTrue("First output needs to be an OPEN ", currentKey.startsWith("OPEN"));
    int currentDepth = Integer.parseInt(currentKey.split(" ")[1]);
    Assert.assertEquals("First OPEN needs to match minDepth", minDepth, currentDepth);
    int lastDepth = currentDepth;
    String lastValue = currentValue;
    State lastState = State.OPEN;

    while (reader.next(actualKey, actualValue)) {
        currentKey = actualKey.toString();
        currentValue = actualValue.toString();
        if (currentKey.startsWith("OPEN")) {
            currentDepth = Integer.parseInt(currentKey.split(" ")[1]);
            Assert.assertEquals("OPEN needs to increase depth in +1 ", lastDepth + 1, currentDepth);
            Assert.assertTrue("Too many OPENs, over maxDepth ", maxDepth >= currentDepth);
            if (lastState == State.OPEN) {
                Assert.assertEquals("First element in OPEN needs to match first element in previous OPEN",
                        lastValue, currentValue);
            } else if (lastState == State.CLOSE) {
                Assert.assertNotSame(
                        "Element from new group needs to be different from last element from last group ",
                        lastValue, currentValue);
            } else {
                Assert.fail("Not allowed OPEN after ELEMENT");
            }
            lastState = State.OPEN;
            lastValue = currentValue;
            lastDepth = currentDepth;

        } else if (currentKey.startsWith("CLOSE")) {
            currentDepth = Integer.parseInt(currentKey.split(" ")[1]);
            Assert.assertNotSame("Not allowed CLOSE after OPEN , needs at least one ELEMENT in between",
                    State.OPEN, lastState);
            Assert.assertEquals("CLOSE depth needs to match previous OPEN depth", lastDepth, currentDepth);
            Assert.assertEquals("Element in CLOSE needs to match lastElement in group", lastValue,
                    currentValue);

            lastState = State.CLOSE;
            lastValue = currentValue;
            lastDepth = currentDepth - 1;

        } else if (currentKey.startsWith("ELEMENT")) {
            Assert.assertNotSame("Not allowed ELEMENT after CLOSE, needs an OPEN or ELEMENT before",
                    State.CLOSE, lastState);
            lastState = State.ELEMENT;
            lastValue = currentValue;
        }
    }

    Assert.assertEquals("File doesn't properly finishes with a CLOSE ", State.CLOSE, lastState);
    Assert.assertEquals("Last CLOSE doesn't close the minDepth ", minDepth - 1, lastDepth);
    reader.close();
}

From source file:com.datasalt.pangool.utils.DCUtils.java

License:Apache License

/**
 * Utility method for serializing an object and saving it in the Distributed Cache.
 * <p>//www.j a va  2s. co  m
 * The file where it has been serialized will be saved into a Hadoop Configuration property so that you can call
 * {@link DCUtils#loadSerializedObjectInDC(Configuration, Class, String, boolean)} to re-instantiate the serialized instance.
 * 
 * @param obj The obj instance to serialize using Java serialization.
 * @param serializeToLocalFile The local file where the instance will be serialized. It will be copied to the HDFS and removed.
 * @param conf The Hadoop Configuration.
 * @throws FileNotFoundException
 * @throws IOException
 * @throws URISyntaxException
 */
public static void serializeToDC(Object obj, String serializeToLocalFile, Configuration conf)
        throws FileNotFoundException, IOException, URISyntaxException {

    File hadoopTmpDir = new File(conf.get("hadoop.tmp.dir"));
    if (!hadoopTmpDir.exists()) {
        hadoopTmpDir.mkdir();
    }
    File file = new File(hadoopTmpDir, serializeToLocalFile);
    FileSystem fS = FileSystem.get(conf);

    ObjectOutput out = new ObjectOutputStream(new FileOutputStream(file));
    out.writeObject(obj);
    out.close();

    if (fS.equals(FileSystem.getLocal(conf))) {
        return;
    }

    String tmpHdfsFolder = conf.get(HDFS_TMP_FOLDER_CONF);
    if (tmpHdfsFolder == null) {
        // set the temporary folder for Pangool instances to the temporary of the user that is running the Job
        // This folder will be used across the cluster for location the instances. This way, tasktrackers
        // that are being run as different user will still be able to locate this folder
        tmpHdfsFolder = conf.get("hadoop.tmp.dir");
        conf.set(HDFS_TMP_FOLDER_CONF, tmpHdfsFolder);
    }
    Path toHdfs = new Path(tmpHdfsFolder, serializeToLocalFile);
    if (fS.exists(toHdfs)) { // Optionally, copy to DFS if
        fS.delete(toHdfs, false);
    }
    FileUtil.copy(FileSystem.getLocal(conf), new Path(file + ""), FileSystem.get(conf), toHdfs, true, conf);
    DistributedCache.addCacheFile(toHdfs.toUri(), conf);
}

From source file:com.datasalt.pangool.utils.DCUtils.java

License:Apache License

/**
 * Given a file post-fix, locate a file in the DistributedCache. It iterates over all the local files and returns the
 * first one that meets this condition.//  w  ww  .j  ava 2s.c om
 * 
 * @param conf
 *          The Hadoop Configuration.
 * @param filePostFix
 *          The file post-fix.
 * @throws IOException
 */
public static Path locateFileInDC(Configuration conf, String filePostFix) throws IOException {
    FileSystem fS = FileSystem.get(conf);
    Path locatedFile = null;

    if (fS.equals(FileSystem.getLocal(conf))) {
        // We use the File Java API in local because the Hadoop Path, FileSystem, etc is too slow for tests that
        // need to call this method a lot
        File tmpFolder = new File(conf.get("hadoop.tmp.dir"));
        for (File file : tmpFolder.listFiles()) {
            if (file.getName().endsWith(filePostFix)) {
                locatedFile = new Path(file.toString());
                break;
            }
        }
    } else {
        Path tmpHdfsFolder = new Path(conf.get(HDFS_TMP_FOLDER_CONF, conf.get("hadoop.tmp.dir")));
        for (FileStatus fSt : fS.listStatus(tmpHdfsFolder)) {
            Path path = fSt.getPath();
            if (path.toString().endsWith(filePostFix)) {
                locatedFile = path;
                break;
            }
        }
    }

    return locatedFile;
}