Example usage for org.apache.hadoop.fs Path SEPARATOR

List of usage examples for org.apache.hadoop.fs Path SEPARATOR

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path SEPARATOR.

Prototype

String SEPARATOR

To view the source code for org.apache.hadoop.fs Path SEPARATOR.

Click Source Link

Document

The directory separator, a slash.

Usage

From source file:org.deeplearning4j.iterativereduce.runtime.Utils.java

License:Apache License

private static StringBuffer getCommandsBase(Configuration conf, Properties props, String command, String args) {

    StringBuffer sb = new StringBuffer();

    sb.append("java ");
    sb.append("-Xmx").append(props.getProperty(ConfigFields.YARN_MEMORY, "512")).append("m ");

    if (args != null)
        sb.append(" ").append(args).append(" ");

    // Actual command
    sb.append(command);//from  ww  w . ja v  a 2  s  .c o m

    sb.append(" 1> ").append(ApplicationConstants.LOG_DIR_EXPANSION_VAR).append(Path.SEPARATOR)
            .append(ApplicationConstants.STDOUT);

    sb.append(" 2> ").append(ApplicationConstants.LOG_DIR_EXPANSION_VAR).append(Path.SEPARATOR)
            .append(ApplicationConstants.STDERR);

    return sb;
}

From source file:org.gbif.occurrence.download.oozie.ArchiveBuilder.java

License:Creative Commons License

/**
 * Rewrites the zip file by opening the original and appending the pre-compressed content on the fly.
 *//*w w w.ja  v  a2 s .  c o  m*/
private void appendPreCompressedFiles(File zipFile) throws IOException {

    LOG.info("Appending pre-compressed occurrence content to the Zip: " + zipFile.getAbsolutePath());

    File tempZip = new File(archiveDir, zipFile.getName() + ".part");
    boolean renameOk = zipFile.renameTo(tempZip);
    if (renameOk) {
        try (ZipInputStream zin = new ZipInputStream(new FileInputStream(tempZip));
                ModalZipOutputStream out = new ModalZipOutputStream(
                        new BufferedOutputStream(new FileOutputStream(zipFile)));) {

            // copy existing entries
            ZipEntry entry = zin.getNextEntry();
            while (entry != null) {
                out.putNextEntry(new org.gbif.hadoop.compress.d2.zip.ZipEntry(entry.getName()),
                        ModalZipOutputStream.MODE.DEFAULT);
                ByteStreams.copy(zin, out);
                entry = zin.getNextEntry();
            }

            // NOTE: hive lowercases all the paths
            appendPreCompressedFile(out,
                    new Path((hdfsPath + Path.SEPARATOR + interpretedDataTable).toLowerCase()),
                    INTERPRETED_FILENAME, HeadersFileUtil.getIntepretedTableHeader());
            appendPreCompressedFile(out,
                    new Path((hdfsPath + Path.SEPARATOR + verbatimDataTable).toLowerCase()), VERBATIM_FILENAME,
                    HeadersFileUtil.getVerbatimTableHeader());
            appendPreCompressedFile(out,
                    new Path((hdfsPath + Path.SEPARATOR + multimediaDataTable).toLowerCase()),
                    MULTIMEDIA_FILENAME, HeadersFileUtil.getMultimediaTableHeader());

        } finally {
            // we've rewritten so remove the original
            if (tempZip != null) {
                tempZip.delete();
            }
        }

    } else {
        throw new IllegalStateException("Unable to rename existing zip, to allow appending occurrence data");
    }
}

From source file:org.gbif.occurrence.download.oozie.ArchiveBuilder.java

License:Creative Commons License

/**
 * Adds an eml file per dataset involved into a subfolder "dataset" which is supported by our dwc archive reader.
 * Create a rights.txt and citation.txt file targeted at humans to quickly yield an overview about rights and
 * datasets involved.//from   w ww.  j  ava 2  s . c o  m
 */
private void addConstituentMetadata() throws IOException {

    Path citationSrc = new Path(hdfsPath + Path.SEPARATOR + citationTable);

    LOG.info("Adding constituent dataset metadata to archive, based on: " + citationSrc);

    // now read the dataset citation table and create an EML file per datasetId
    // first copy from HDFS to local file
    if (!hdfs.exists(citationSrc)) {
        LOG.warn("No citation file directory existing on HDFS, skip creating of dataset metadata {}",
                citationSrc);
        return;
    }

    final Map<UUID, Integer> srcDatasets = readDatasetCounts(citationSrc);

    File emlDir = new File(archiveDir, "dataset");
    if (!srcDatasets.isEmpty()) {
        emlDir.mkdir();
    }
    Closer closer = Closer.create();

    Writer rightsWriter = closer.register(FileUtils.startNewUtf8File(new File(archiveDir, RIGHTS_FILENAME)));
    Writer citationWriter = closer
            .register(FileUtils.startNewUtf8File(new File(archiveDir, CITATIONS_FILENAME)));

    closer.register(citationWriter);
    // write fixed citations header
    citationWriter.write(CITATION_HEADER);
    // now iterate over constituent UUIDs

    for (Entry<UUID, Integer> dsEntry : srcDatasets.entrySet()) {
        final UUID constituentId = dsEntry.getKey();
        LOG.info("Processing constituent dataset: {}", constituentId);
        // catch errors for each uuid to make sure one broken dataset does not bring down the entire process
        try {
            Dataset srcDataset = datasetService.get(constituentId);

            // citation
            String citationLink = writeCitation(citationWriter, srcDataset, constituentId);
            // rights
            writeRights(rightsWriter, srcDataset, citationLink);
            // eml file
            createEmlFile(constituentId, emlDir);

            // add as constituent for later
            constituents.add(new Constituent(srcDataset.getTitle(), dsEntry.getValue()));

            // add original author as content provider to main dataset description
            Contact provider = getContentProviderContact(srcDataset);
            if (provider != null) {
                dataset.getContacts().add(provider);
            }
        } catch (UniformInterfaceException e) {
            LOG.error(String.format("Registry client http exception: %d \n %s", e.getResponse().getStatus(),
                    e.getResponse().getEntity(String.class)), e);
        } catch (Exception e) {
            LOG.error("Error creating download file", e);
        }
    }
    closer.close();
}

From source file:org.gbif.occurrence.download.oozie.ArchiveBuilder.java

License:Creative Commons License

/**
 * Copies and merges the hive query results files into a single, local occurrence data file.
 *//*from   w  w  w  .j a  v a  2 s .  c  o m*/
private void addOccurrenceDataFile(String dataTable, String headerFileName, String destFileName)
        throws IOException {
    LOG.info("Copy-merge occurrence data hdfs file {} to local filesystem", dataTable);
    final Path dataSrc = new Path(hdfsPath + Path.SEPARATOR + dataTable);
    boolean hasRecords = hdfs.exists(dataSrc);
    if (!hasRecords) {
        hdfs.create(dataSrc);
    }
    if (!isSmallDownload && hasRecords) { // small downloads already include the headers
        FileUtil.copy(new File(headerFileName), hdfs, new Path(dataSrc + Path.SEPARATOR + HEADERS_FILENAME),
                false, conf);
    }
    File rawDataResult = new File(archiveDir, destFileName);
    Path dataDest = new Path(rawDataResult.toURI());
    FileUtil.copyMerge(hdfs, dataSrc, localfs, dataDest, false, conf, null);
    // remove the CRC file created by copyMerge method
    removeDataCRCFile(destFileName);
}

From source file:org.hdl.tensorflow.yarn.util.Utils.java

License:Apache License

public static Path copyLocalFileToDfs(FileSystem fs, String appId, Path srcPath, String dstFileName)
        throws IOException {
    Path dstPath = new Path(fs.getHomeDirectory(),
            Constants.DEFAULT_APP_NAME + Path.SEPARATOR + appId + Path.SEPARATOR + dstFileName);
    LOG.info("Copying " + srcPath + " to " + dstPath);
    fs.copyFromLocalFile(srcPath, dstPath);
    return dstPath;
}

From source file:org.hortonworks.dovetail.am.AppMaster.java

License:Apache License

/**
 * Parse command line options/*  w w w . j  a va 2s. c  o  m*/
 * 
 * @param args
 *            Command line args
 * @return Whether init successful and run should be invoked
 * @throws IOException
 */
public boolean init(String[] args) throws IOException {

    Map<String, String> envs = System.getenv();

    ContainerId containerId = ConverterUtils.toContainerId(envs.get(Environment.CONTAINER_ID.name()));

    containerId = ConverterUtils.toContainerId(envs.get(Environment.CONTAINER_ID.name()));
    appAttemptID = containerId.getApplicationAttemptId();

    if (!envs.containsKey(ApplicationConstants.APP_SUBMIT_TIME_ENV)) {
        throw new RuntimeException(ApplicationConstants.APP_SUBMIT_TIME_ENV + " not set in the environment");
    }
    if (!envs.containsKey(Environment.NM_HOST.name())) {
        throw new RuntimeException(Environment.NM_HOST.name() + " not set in the environment");
    }
    if (!envs.containsKey(Environment.NM_HTTP_PORT.name())) {
        throw new RuntimeException(Environment.NM_HTTP_PORT + " not set in the environment");
    }
    if (!envs.containsKey(Environment.NM_PORT.name())) {
        throw new RuntimeException(Environment.NM_PORT.name() + " not set in the environment");
    }

    LOG.info("Application master for app" + ", appId=" + appAttemptID.getApplicationId().getId()
            + ", clustertimestamp=" + appAttemptID.getApplicationId().getClusterTimestamp() + ", attemptId="
            + appAttemptID.getAttemptId());

    numContainers = conf.getInt(DovetailConfiguration.DOVETAIL_CONTAINER_COUNT,
            DovetailConfiguration.DEFAULT_CONTAINER_COUNT);

    containerMemory = conf.getInt(DovetailConfiguration.DOVETAIL_CONTAINER_MEMORY,
            DovetailConfiguration.DEFAULT_CONTAINER_MEMORY);

    containerMemory = conf.getInt(DovetailConfiguration.DOVETAIL_CONTAINER_PRIORITY,
            DovetailConfiguration.DEFAULT_DOVETAIL_CONTAINER_PRIORITY);

    amJar = conf.get(DovetailConfiguration.DOVETAIL_AM_HDFS_DIR) + Path.SEPARATOR
            + System.getProperty(DovetailConfiguration.DOVETAIL_AM_JAR);

    return true;
}

From source file:org.jahia.modules.crawl.CrawlJob.java

License:Open Source License

protected void executeInternal(JobExecutionContext context) throws JobExecutionException {
    try {//  ww w.  jav a  2 s  . c om
        JobDataMap mergedJobDataMap = context.getMergedJobDataMap();
        if (conf == null) {
            String baseDirPath = (String) mergedJobDataMap.get("baseDir");
            if (StringUtils.isEmpty(baseDirPath)) {
                baseDirPath = System.getProperty("user.dir");
            }
            String folderName = (String) mergedJobDataMap.get("folderName");
            if (folderName == null) {
                folderName = "jahia-crawler";
            }
            baseDir = new Path(
                    baseDirPath + (StringUtils.isEmpty(folderName) ? "" : System.getProperty("file.separator"))
                            + folderName);
            init();
        }

        List<String> urls = (List<String>) mergedJobDataMap.get("urls");

        JobConf job = new NutchJob(conf);

        Path tmpDir = job.getLocalPath("crawl" + Path.SEPARATOR + getDate());

        CrawlDBUtil.generateSeedList(fs, urlPath, urls);
        // inject
        Injector injector = new Injector(conf);
        injector.inject(crawldbPath, urlPath);

        // generate
        Generator g = new Generator(conf);
        // fetch
        conf.setBoolean("fetcher.parse", true);
        Fetcher fetcher = new Fetcher(conf);
        ParseSegment parseSegment = new ParseSegment(conf);
        CrawlDb crawlDbTool = new CrawlDb(conf);

        int depth = 5;
        int threads = 4;
        int i;
        for (i = 0; i < depth; i++) { // generate new segment
            Path generatedSegment = g.generate(crawldbPath, segmentsPath, 1, Long.MAX_VALUE, Long.MAX_VALUE,
                    false, false);

            if (generatedSegment == null) {
                logger.info("Stopping at depth=" + i + " - no more URLs to fetch.");
                break;
            }
            fetcher.fetch(generatedSegment, threads, true);
            if (!Fetcher.isParsing(job)) {
                parseSegment.parse(generatedSegment); // parse it, if needed
            }
            crawlDbTool.update(crawldbPath, new Path[] { generatedSegment }, true, true);
        }
        if (i > 0) {
            LinkDb linkDbTool = new LinkDb(conf);
            Indexer indexer = new Indexer(conf);
            DeleteDuplicates dedup = new DeleteDuplicates(conf);
            IndexMerger merger = new IndexMerger(conf);

            linkDbTool.invert(linkDb, segments, true, true, false); // invert links

            if (indexes != null) {
                // Delete old indexes
                if (fs.exists(indexes)) {
                    logger.info("Deleting old indexes: " + indexes);
                    fs.delete(indexes, true);
                }

                // Delete old index
                if (fs.exists(index)) {
                    logger.info("Deleting old merged index: " + index);
                    fs.delete(index, true);
                }
            }

            // index, dedup & merge
            FileStatus[] fstats = fs.listStatus(segments, HadoopFSUtil.getPassDirectoriesFilter(fs));
            indexer.index(indexes, crawldbPath, linkDb, Arrays.asList(HadoopFSUtil.getPaths(fstats)));
            if (indexes != null) {
                dedup.dedup(new Path[] { indexes });
                fstats = fs.listStatus(indexes, HadoopFSUtil.getPassDirectoriesFilter(fs));
                merger.merge(HadoopFSUtil.getPaths(fstats), index, tmpDir);
            }
        } else {
            logger.warn("No URLs to fetch - check your seed list and URL filters.");
        }

    } catch (IOException e) {
        logger.error("Exception while crawling", e);
    }
}

From source file:org.kaaproject.kaa.server.flume.sink.hdfs.HdfsSinkKey.java

License:Apache License

public String getPath() {
    return rootPath + Path.SEPARATOR + kaaSinkKey.getPath();
}

From source file:org.kaaproject.kaa.server.flume.sink.hdfs.KaaSinkKey.java

License:Apache License

public String getPath() {
    return applicationToken + Path.SEPARATOR + schemaVersion;
}

From source file:org.kaaproject.kaa.server.flume.TestKaaHdfsSink.java

License:Apache License

private void readAndCheckResultsFromHdfs(RecordHeader header, List<TestLogData> testLogs) throws IOException {
    Path logsPath = new Path("/logs" + Path.SEPARATOR + applicationToken + Path.SEPARATOR + logSchemaVersion
            + Path.SEPARATOR + "data*");
    FileStatus[] statuses = fileSystem.globStatus(logsPath);
    List<TestLogData> resultTestLogs = new ArrayList<>();
    Schema wrapperSchema = RecordWrapperSchemaGenerator
            .generateRecordWrapperSchema(TestLogData.getClassSchema().toString());
    for (FileStatus status : statuses) {
        FileReader<GenericRecord> fileReader = null;
        try {//from  w w w .  jav a  2s. com
            SeekableInput input = new FsInput(status.getPath(), fileSystem.getConf());
            DatumReader<GenericRecord> datumReader = new SpecificDatumReader<>(wrapperSchema);
            fileReader = DataFileReader.openReader(input, datumReader);
            for (GenericRecord record : fileReader) {
                RecordHeader recordHeader = (RecordHeader) record
                        .get(RecordWrapperSchemaGenerator.RECORD_HEADER_FIELD);
                Assert.assertEquals(header, recordHeader);
                TestLogData recordData = (TestLogData) record
                        .get(RecordWrapperSchemaGenerator.RECORD_DATA_FIELD);
                resultTestLogs.add(recordData);
            }
        } finally {
            IOUtils.closeQuietly(fileReader);
        }
    }
    Assert.assertEquals(testLogs, resultTestLogs);
}