Example usage for org.apache.hadoop.fs FileStatus getPath

List of usage examples for org.apache.hadoop.fs FileStatus getPath

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileStatus getPath.

Prototype

public Path getPath() 

Source Link

Usage

From source file:com.hdfs.concat.clean.Clean.java

License:Apache License

public int cleanup(Path p) {
    try {/*from w  ww . j  a  v  a2s. c  om*/
        if (fs.isFile(p)) {
            if (conf.get(TARGET_EXPR) != null) {
                if (p.getName().matches(conf.get(TARGET_EXPR))) {
                    warnOrDelete(p);
                }
            }
            if (conf.get(CUTTOFF_MILLIS) != null) {
                if (fs.getFileStatus(p).getModificationTime() < cutoff) {
                    warnOrDelete(p);
                }
            }
        }

        if (fs.isDirectory(p)) {
            for (FileStatus stat : fs.listStatus(p)) {
                cleanup(stat.getPath());
            }
            if (fs.listStatus(p).length == 0) {
                if (conf.get(TARGET_EXPR) != null) {
                    if (p.getName().matches(conf.get(TARGET_EXPR))) {
                        warnOrDelete(p);
                    }
                }
                if (conf.get(CUTTOFF_MILLIS) != null) {
                    if (fs.getFileStatus(p).getModificationTime() < cutoff) {
                        warnOrDelete(p);
                    }
                }
            }
        }
    } catch (IOException e) {
        System.out.println("exception " + e);
        return 7;
    }
    return 0;
}

From source file:com.hdfs.concat.crush.Crush.java

License:Apache License

private void standAlone() throws IOException {
    String absSrcDir = fs.makeQualified(srcDir).toUri().getPath();
    String absOutDir = fs.makeQualified(outDir).toUri().getPath();

    Text bucket = new Text(absSrcDir + "-0");

    List<Text> files = new ArrayList<Text>();

    FileStatus[] contents = fs.listStatus(new Path(absSrcDir));

    for (FileStatus content : contents) {
        if (!content.isDir()) {
            if (ignoredFiles != null) {
                // Check for files to skip
                ignoredFiles.reset(content.getPath().toUri().getPath());
                if (ignoredFiles.matches()) {
                    LOG.trace("Ignoring " + content.getPath().toString());
                    continue;
                }/*  w  ww  .j a  v a  2  s.co m*/
            }
            files.add(new Text(content.getPath().toUri().getPath()));
        }
    }

    /*
     * Is the directory empty?
     */
    if (files.isEmpty()) {
        return;
    }

    /*
     * We trick the reducer into doing some work for us by setting these configuration properties.
     */
    job.set("mapred.tip.id", "task_000000000000_00000_r_000000");
    job.set("mapred.task.id", "attempt_000000000000_0000_r_000000_0");

    job.set("mapred.output.dir", absOutDir);

    /*
     * File output committer needs this.
     */
    fs.mkdirs(new Path(absOutDir, "_temporary"));

    CrushReducer reducer = new CrushReducer();

    reducer.configure(job);
    reducer.reduce(bucket, files.iterator(), new NullOutputCollector<Text, Text>(), Reporter.NULL);
    reducer.close();

    /*
     * Use a glob here because the temporary and task attempt work dirs have funny names.
     * Include a * at the end to cover wildcards for compressed files.
     */
    Path crushOutput = new Path(absOutDir + "/*/*/crush" + absSrcDir + "/" + dest.getName() + "*");

    FileStatus[] statuses = fs.globStatus(crushOutput);

    if (statuses == null || 1 != statuses.length) {
        throw new AssertionError("Did not find the expected output in " + crushOutput.toString());
    }

    rename(statuses[0].getPath(), dest.getParent(), dest.getName());
}

From source file:com.hdfs.concat.crush.Crush.java

License:Apache License

private void cloneOutput() throws IOException {

    List<FileStatus> listStatus = getOutputMappings();

    /*//from w w w . j a v  a 2s  .  c o m
     * Initialize to empty list, in which case swap() will be a no-op. The reference is then replaced with a real list, which is
     * used in the subsequent iterations.
     */
    List<Path> crushInput = emptyList();

    Text srcFile = new Text();
    Text crushOut = new Text();
    Text prevCrushOut = new Text();

    for (FileStatus partFile : listStatus) {
        Path path = partFile.getPath();

        Reader reader = new Reader(fs, path, fs.getConf());

        try {
            while (reader.next(srcFile, crushOut)) {
                if (!crushOut.equals(prevCrushOut)) {
                    swap(crushInput, prevCrushOut.toString());

                    prevCrushOut.set(crushOut);
                    crushInput = new LinkedList<Path>();
                }

                crushInput.add(new Path(srcFile.toString()));
            }
        } finally {
            try {
                reader.close();
            } catch (IOException e) {
                LOG.warn("Trapped exception when closing " + path, e);
            }
        }

        swap(crushInput, prevCrushOut.toString());
    }
}

From source file:com.hdfs.concat.crush.Crush.java

License:Apache License

/**
 * Moves the skipped files to the output directory. Called when operation in normal (non-clone) mode.
 *///from w w w . j  a v  a 2  s .  c  o m
private void moveOutput() throws IOException {

    List<FileStatus> listStatus = getOutputMappings();

    Text srcFile = new Text();
    Text crushOut = new Text();

    Set<String> crushOutputFiles = new HashSet<String>(nBuckets);

    for (FileStatus partFile : listStatus) {
        Path path = partFile.getPath();

        Reader reader = new Reader(fs, path, fs.getConf());

        try {
            while (reader.next(srcFile, crushOut)) {
                crushOutputFiles.add(new Path(crushOut.toString()).toUri().getPath());
            }
        } finally {
            try {
                reader.close();
            } catch (IOException e) {
                LOG.warn("Trapped exception when closing " + path, e);
            }
        }
    }

    assert crushOutputFiles.size() == nBuckets;

    /*
     * The crushoutput files will appear in a subdirectory of the output directory. The subdirectory will be the full path of the
     * input directory that was crushed. E.g.
     *
     * Crush input:
     * /user/me/input/dir1/file1
     * /user/me/input/dir1/file2
     * /user/me/input/dir2/file3
     * /user/me/input/dir2/file4
     * /user/me/input/dir3/dir4/file5
     * /user/me/input/dir3/dir4/file6
     *
     * Crush output:
     * /user/me/output/user/me/input/dir1/crushed_file ...
     * /user/me/output/user/me/input/dir2/crushed_file ...
     * /user/me/output/user/me/input/dir2/dir3/dir4/crushed_file ...
     *
     * We need to collapse this down to:
     * /user/me/output/dir1/crushed_file ...
     * /user/me/output/dir2/crushed_file ...
     * /user/me/output/dir2/dir3/dir4/crushed_file ...
     */
    String srcDirName = fs.makeQualified(srcDir).toUri().getPath();

    String destName = fs.makeQualified(dest).toUri().getPath();
    String partToReplace = fs.makeQualified(outDir).toUri().getPath() + "/crush" + srcDirName;

    print(Verbosity.INFO, "\n\nCopying crush files to " + destName);

    for (String crushOutputFile : crushOutputFiles) {
        Path srcPath = new Path(crushOutputFile);
        Path destPath = new Path(destName + crushOutputFile.substring(partToReplace.length())).getParent();

        rename(srcPath, destPath, null);
    }

    print(Verbosity.INFO, "\n\nMoving skipped files to " + destName);

    /*
     * Don't forget to move the files that were not crushed to the output dir so that the output dir has all the data that was in
     * the input dir, the difference being there are fewer files in the output dir.
     */
    for (String name : skippedFiles) {
        Path srcPath = new Path(name);
        Path destPath = new Path(destName + name.substring(srcDirName.length())).getParent();

        rename(srcPath, destPath, null);
    }
}

From source file:com.hdfs.concat.crush.Crush.java

License:Apache License

void writeDirs() throws IOException {

    print(Verbosity.INFO, "\n\nUsing temporary directory " + tmpDir.toUri().getPath());

    FileStatus status = fs.getFileStatus(srcDir);

    Path tmpIn = new Path(tmpDir, "in");

    bucketFiles = new Path(tmpIn, "dirs");
    partitionMap = new Path(tmpIn, "partition-map");
    counters = new Path(tmpIn, "counters");

    skippedFiles = new HashSet<String>();

    /*/*from   w w w .j a v a 2 s  .  c o  m*/
     * Prefer the path returned by the status because it is always fully qualified.
     */
    List<Path> dirs = asList(status.getPath());

    Text key = new Text();
    Text value = new Text();

    Writer writer = SequenceFile.createWriter(fs, job, bucketFiles, Text.class, Text.class,
            CompressionType.BLOCK);

    int numPartitions = Integer.parseInt(job.get("mapred.reduce.tasks"));

    Bucketer partitionBucketer = new Bucketer(numPartitions, 0, false);
    partitionBucketer.reset("partition-map");

    jobCounters = new Counters();

    try {
        while (!dirs.isEmpty()) {
            List<Path> nextLevel = new LinkedList<Path>();

            for (Path dir : dirs) {
                jobCounters.incrCounter(MapperCounter.DIRS_FOUND, 1);

                print(Verbosity.INFO, "\n\n" + dir.toUri().getPath());

                FileStatus[] contents = fs.listStatus(dir, new PathFilter() {
                    @Override
                    public boolean accept(Path testPath) {
                        if (ignoredFiles == null)
                            return true;
                        ignoredFiles.reset(testPath.toUri().getPath());
                        return !ignoredFiles.matches();
                    }

                });

                if (contents == null || contents.length == 0) {
                    print(Verbosity.INFO, " is empty");

                    jobCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1);
                } else {
                    List<FileStatus> crushables = new ArrayList<FileStatus>(contents.length);
                    Set<String> uncrushedFiles = new HashSet<String>(contents.length);

                    long crushableBytes = 0;

                    /*
                     * Queue sub directories for subsequent inspection and examine the files in this directory.
                     */
                    for (FileStatus content : contents) {
                        Path path = content.getPath();

                        if (content.isDir()) {
                            nextLevel.add(path);
                        } else {
                            boolean changed = uncrushedFiles.add(path.toUri().getPath());

                            assert changed : path.toUri().getPath();

                            long fileLength = content.getLen();

                            if (fileLength <= maxEligibleSize) {
                                crushables.add(content);
                                crushableBytes += fileLength;
                            }
                        }
                    }

                    /*
                     * We found a directory with data in it. Make sure we know how to name the crush output file and then increment the
                     * number of files we found.
                     */
                    if (!uncrushedFiles.isEmpty()) {
                        if (-1 == findMatcher(dir)) {
                            throw new IllegalArgumentException(
                                    "Could not find matching regex for directory: " + dir);
                        }

                        jobCounters.incrCounter(MapperCounter.FILES_FOUND, uncrushedFiles.size());
                    }

                    if (0 == crushableBytes) {
                        print(Verbosity.INFO, " has no crushable files");

                        jobCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1);
                    } else {
                        /*
                         * We found files to consider for crushing.
                         */
                        long nBlocks = crushableBytes / dfsBlockSize;

                        if (nBlocks * dfsBlockSize != crushableBytes) {
                            nBlocks++;
                        }

                        /*
                         * maxFileBlocks will be huge in v1 mode, which will lead to one bucket per directory.
                         */
                        long dirBuckets = nBlocks / maxFileBlocks;

                        if (dirBuckets * maxFileBlocks != nBlocks) {
                            dirBuckets++;
                        }

                        if (dirBuckets > Integer.MAX_VALUE) {
                            throw new AssertionError("Too many buckets: " + dirBuckets);
                        }

                        Bucketer directoryBucketer = new Bucketer((int) dirBuckets, excludeSingleFileDirs);

                        directoryBucketer.reset(getPathPart(dir));

                        for (FileStatus file : crushables) {
                            directoryBucketer.add(new FileStatusHasSize(file));
                        }

                        List<Bucket> crushFiles = directoryBucketer.createBuckets();

                        if (crushFiles.isEmpty()) {
                            jobCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1);
                        } else {
                            nBuckets += crushFiles.size();

                            jobCounters.incrCounter(MapperCounter.DIRS_ELIGIBLE, 1);

                            print(Verbosity.INFO, " => " + crushFiles.size() + " output files");

                            /*
                             * Write out the mapping between a bucket and a file.
                             */
                            for (Bucket crushFile : crushFiles) {
                                String bucketId = crushFile.name();

                                List<String> bucketFiles = crushFile.contents();

                                print(Verbosity.INFO,
                                        format("\n  Output %s will include %,d input bytes from %,d files",
                                                bucketId, crushFile.size(), bucketFiles.size()));

                                key.set(bucketId);

                                for (String f : bucketFiles) {
                                    boolean changed = uncrushedFiles.remove(f);

                                    assert changed : f;

                                    pathMatcher.reset(f);

                                    pathMatcher.matches();

                                    value.set(pathMatcher.group(5));

                                    writer.append(key, value);

                                    /*
                                     * Print the input file with four leading spaces.
                                     */
                                    print(Verbosity.VERBOSE, "\n    " + f);
                                }

                                jobCounters.incrCounter(MapperCounter.FILES_ELIGIBLE, bucketFiles.size());

                                partitionBucketer.add(crushFile);
                            }
                        }
                    }

                    if (!uncrushedFiles.isEmpty()) {
                        print(Verbosity.INFO, "\n\n  Skipped " + uncrushedFiles.size() + " files");

                        for (String uncrushed : uncrushedFiles) {
                            print(Verbosity.VERBOSE, "\n    " + uncrushed);
                        }

                        jobCounters.incrCounter(MapperCounter.FILES_SKIPPED, uncrushedFiles.size());
                    }

                    skippedFiles.addAll(uncrushedFiles);
                }
            }

            dirs = nextLevel;
        }
    } finally {
        try {
            writer.close();
        } catch (Exception e) {
            LOG.error("Trapped exception during close: " + bucketFiles, e);
        }
    }

    /*
     * Now that we have processed all the directories, write the partition map.
     */
    List<Bucket> partitions = partitionBucketer.createBuckets();

    assert partitions.size() <= numPartitions;

    writer = SequenceFile.createWriter(fs, job, partitionMap, Text.class, IntWritable.class);

    IntWritable partNum = new IntWritable();

    try {
        for (Bucket partition : partitions) {
            String partitionName = partition.name();

            partNum.set(Integer.parseInt(partitionName.substring(partitionName.lastIndexOf('-') + 1)));

            for (String bucketId : partition.contents()) {
                key.set(bucketId);

                writer.append(key, partNum);
            }
        }
    } finally {
        try {
            writer.close();
        } catch (Exception e) {
            LOG.error("Trapped exception during close: " + partitionMap, e);
        }
    }

    DataOutputStream countersStream = fs.create(this.counters);

    try {
        jobCounters.write(countersStream);
    } finally {
        try {
            countersStream.close();
        } catch (Exception e) {
            LOG.error("Trapped exception during close: " + partitionMap, e);
        }
    }
}

From source file:com.hdfstoftp.main.HdfsToFtp.java

/**
 * ?//from w w w  .  j av  a  2s  . c  om
 * 
 * @param srcFS
 *            
 * @param src
 *            ?
 * @param dst
 *            
 * @param queryStr
 *            
 * @param deleteSource
 *            ??
 * @param overwrite
 *            ????
 * @return boolean
 * @throws Exception
 */
private static boolean copyFromHDFSToFTP(Config config) throws Exception {
    // ?hdfs
    Configuration conf = new Configuration();
    FileSystem srcFS = FileSystem.get(conf);
    long start = System.currentTimeMillis();
    boolean isRename = config.isRenameUploaded();
    int retryTimes = config.getRetryTimes();
    // ?
    String dstPath = config.getDestDir();
    Path src = new Path(config.getSouceDir());
    FileStatus fileStatus = srcFS.getFileStatus(src);
    String subDir = null;
    if (fileStatus.isDirectory()) {// 
        if (isRename) {// ??rename
            subDir = Config.RENAME_DIR;
            srcFS.mkdirs(new Path(fileStatus.getPath(), subDir));
        }
        int threadNum = config.getThreadNum();
        // 
        ExecutorService threadPool = Executors.newFixedThreadPool(threadNum);
        // ?ftp
        FTPClientPool ftpPool = new FTPClientPool(threadNum, new FtpClientFactory(config.getFTPClientConfig()));
        FTPClient ftpClient = ftpPool.borrowObject();
        // ?
        ftpClient.makeDirectory(dstPath);
        ftpPool.returnObject(ftpClient);
        // ??
        FileStatus contents[] = srcFS.listStatus(src);
        long beginFilter = 0;
        long endFileter = 0;

        if (config.getCommandLine().hasOption("d") || config.getCommandLine().hasOption("h")
                || config.getCommandLine().hasOption("t")) {// ?"["
            beginFilter = System.currentTimeMillis();
            Long[] timeRange = parseTimeRange(config.getCommandLine());
            contents = getNewContents(timeRange, contents);
            endFileter = System.currentTimeMillis();
        }
        // ?
        if (config.getCommandLine().hasOption("r")) {// "["??
            beginFilter = System.currentTimeMillis();
            contents = getFilterContents(config.getCommandLine().getOptionValue("r").trim(), contents);
            endFileter = System.currentTimeMillis();
        }
        logger.info("total file count:" + contents.length);
        Map<String, String> fileNameMap = null;
        long beginSkip = 0;
        long endSkip = 0;
        boolean overwrite = true;
        if (config.getCommandLine().hasOption("o")) {
            overwrite = "true".equals(config.getCommandLine().getOptionValue("o").trim());
        }
        if (!overwrite) {// ?????
            beginSkip = System.currentTimeMillis();
            fileNameMap = getFileNameMap(dstPath, ftpPool);
            endSkip = System.currentTimeMillis();
        }
        int skiped = 0;

        List<Future<?>> futureList = new ArrayList<Future<?>>();
        for (int i = 0; i < contents.length; i++) {
            if (!overwrite && fileNameMap.containsKey(contents[i].getPath().getName())) {
                // 
                skiped++;
                Log.info("skiped filename:" + contents[i].getPath().getName());
                continue;
            }
            if (contents[i].isDirectory()) {
                continue;
            }
            // ???
            Future<?> future = threadPool.submit(new UploadFileTask(srcFS, contents[i].getPath(),
                    new Path(dstPath, contents[i].getPath().getName()), ftpPool, false, isRename, subDir,
                    retryTimes));
            futureList.add(future);
        }
        int transfered = 0;
        int failed = 0;
        for (Future<?> future : futureList) {
            Boolean computeResult = (Boolean) future.get();
            if (computeResult) {
                transfered++;
                if (transfered % 50 == 0 || transfered == contents.length) {
                    logger.info("have transfered:" + transfered + " files");
                }
            } else {
                failed++;
                logger.error("failed transter:" + failed + " files");
            }
        }
        // 
        threadPool.shutdown();
        // FTPCient
        ftpPool.close();
        // ****************
        logger.info("filter time:" + (endFileter - beginFilter) + " ms");
        if (!overwrite) {
            logger.info("skip time:" + (endSkip - beginSkip) + " ms");
        }
        logger.info("total file count:" + contents.length);
        logger.info("total transtered: " + transfered + ",total failed:" + failed + ",total skiped:" + skiped);

    } else {// 

        BufferedReader reader = null;
        FtpClientFactory facotry = new FtpClientFactory(config.getFTPClientConfig());
        FTPClient ftpClient = null;
        InputStream in = null;
        try {
            Path path = fileStatus.getPath();
            if (!path.getName().contains("log")) {

            }
            reader = new BufferedReader(new FileReader(new File(path.toUri().getPath())));
            String str = null;

            ftpClient = facotry.makeObject();

            while ((str = reader.readLine()) != null) {
                String[] feilds = str.split("&");
                Path filePath = null;
                if (feilds.length == 2 && feilds[1] != "") {
                    filePath = new Path(feilds[1]);
                    in = srcFS.open(filePath);
                    boolean result = ftpClient.storeFile(dstPath, in);
                    System.out.println(ftpClient.getReplyCode());
                    if (result) {
                        logger.info(filePath.toString());
                    } else {
                        logger_failed.info(filePath.toString());
                    }
                } else {
                    continue;
                }

            }
        } catch (Exception e) {
            e.printStackTrace();

        } finally {
            in.close();
            reader.close();
            facotry.destroyObject(ftpClient);
        }

    }
    long end = System.currentTimeMillis();
    logger.info("finished transfer,total time:" + (end - start) / 1000 + "s");
    return true;
}

From source file:com.hdfstoftp.main.HdfsToFtp.java

/**
 * ??/*from w ww. ja  v a  2 s .  c  o  m*/
 * 
 * @param queryStr
 * @param contents
 * @return FileStatus[]
 */
public static FileStatus[] getFilterContents(String reg, FileStatus[] contents) {

    Pattern pattern = Pattern.compile(reg);
    List<FileStatus> statusList = new ArrayList<FileStatus>();
    for (FileStatus status : contents) {
        if (!status.isDirectory()) {
            String fileName = status.getPath().getName();
            Matcher matcher = pattern.matcher(fileName);
            if (matcher.matches()) {
                statusList.add(status);
            }
        }
    }
    return statusList.toArray(new FileStatus[statusList.size()]);
}

From source file:com.hortonworks.historian.nifi.reporter.HistorianDeanReporter.java

License:Apache License

@Override
public void onTrigger(ReportingContext reportingContext) {
    // create the Atlas client if we don't have one
    /*//from w  ww  .j a  v a 2s. c  om
    Properties props = System.getProperties();
     props.setProperty("atlas.conf", "/usr/hdp/current/atlas-client/conf");
     getLogger().info("***************** atlas.conf has been set to: " + props.getProperty("atlas.conf"));
    */
    inputs = new ArrayList<Referenceable>();
    outputs = new ArrayList<Referenceable>();
    //EventAccess eventAccess = reportingContext.getEventAccess();
    //int pageSize = reportingContext.getProperty(ACTION_PAGE_SIZE).asInteger();
    lateDataRoot = reportingContext.getProperty(LATE_DATA_ROOT).getValue();
    lateDataTasksPath = lateDataRoot + "/tasks";
    atlasUrl = reportingContext.getProperty(ATLAS_URL).getValue();
    nifiUrl = reportingContext.getProperty(NIFI_URL).getValue();
    nameNodeUrl = reportingContext.getProperty(NAME_NODE_URL).getValue();
    druidBrokerUrl = reportingContext.getProperty(DRUID_BROKER_HTTP_ENDPOINT).getValue();
    druidOverlordUrl = reportingContext.getProperty(DRUID_OVERLORD_HTTP_ENDPOINT).getValue();
    hiveServerUri = reportingContext.getProperty(HIVE_SERVER_CONNECTION_STRING).getValue();
    TAG_DIMENSION_NAME = reportingContext.getProperty(HISTORIAN_TAG_DIMENSION).getValue();
    //druidMetaUri = reportingContext.getProperty(DRUID_METASTORE_CONNECTION_STRING).getValue();
    String[] atlasURL = { atlasUrl };

    if (atlasClient == null) {
        getLogger().info("Creating new Atlas client for {}", new Object[] { atlasUrl });
        atlasClient = new AtlasClient(atlasURL, basicAuth);
    }

    if (atlasVersion == 0.0) {
        atlasVersion = Double.valueOf(getAtlasVersion(atlasUrl + "/api/atlas/admin/version", basicAuth));
        getLogger().info("********** Atlas Version is: " + atlasVersion);
    }

    getLogger().info("********** Number of Reports Sent: " + timesTriggered);
    if (timesTriggered == 0) {
        String hiveUsername = "hive";
        String hivePassword = "hive";

        try {
            getLogger().info("********** Establishing Connection to HDFS...");
            String hdfsPath = nameNodeUrl + "/";
            Configuration conf = new Configuration();
            conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
            conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());
            fs = FileSystem.get(new URI(hdfsPath), conf);
            //createHDFSDirectory(lateDataRoot);
            createHDFSDirectory(lateDataTasksPath);

            getLogger().info("********** Checking for Unresolved Indexing Tasks...");
            FileStatus[] fileStatus = fs.listStatus(new Path(lateDataTasksPath));
            for (FileStatus status : fileStatus) {
                if (status.isDirectory()) {
                    String[] address = status.getPath().toString().split("/");
                    String currentPath = status.getPath().toString();
                    String currentDirName = address[address.length - 1];
                    String currentTaskId = currentDirName.replace("|", ":");
                    String ingestSpec = readHDFSFile(currentPath + "/ingestSpec");
                    List<String> sourceData = Arrays
                            .asList(readHDFSFile(currentPath + "/sourceData").split(","));
                    getLogger().info("********** Loading Unresolved Indexing Task:" + currentTaskId);
                    Map<String, Object> currentTaskMetaData = new HashMap<String, Object>();
                    currentTaskMetaData.put("ingestSpec", ingestSpec);
                    currentTaskMetaData.put("sourceData", sourceData);
                    deltaIndexTasks.put(currentTaskId, currentTaskMetaData);
                }
            }

            getLogger().info("********** Establishing Connection to Hive Server...");
            Class.forName("org.apache.hive.jdbc.HiveDriver");
            hiveConnection = DriverManager.getConnection(hiveServerUri, hiveUsername, hivePassword);

            getLogger().info("********** Create Business Taxonomy Terms...");
            String termPath = "/Catalog/terms/Unassigned";
            String termDefinition = "{\"name\":\"Unassigned\",\"description\":\"\"}";
            createBusinessTerm(termPath, termDefinition);

            getLogger().info("********** Checking if data model has been created...");
            /*
            try {
               atlasClient.getType(HistorianDataTypes.TAG_DIMENSION.getName());
               getLogger().info("********************* Trait: " + HistorianDataTypes.TAG_DIMENSION.getName() + " is already present");
            } catch (AtlasServiceException e) {
               getLogger().info("***************** Creating " + HistorianDataTypes.TAG_DIMENSION.getName() + " Trait...");
               atlasClient.createTraitType(HistorianDataTypes.TAG_DIMENSION.getName());
            }*/
            String historianDataModelJSON = generateHistorianDataModel();
            getLogger().info("********** Historian Data Model as JSON = " + historianDataModelJSON);
            //atlasClient.createType(historianDataModelJSON);
            getLogger().info("********** Created Types: " + atlasClient.createType(historianDataModelJSON));

            updateHiveColumnClassAttributes();

        } catch (AtlasServiceException e) {
            e.printStackTrace();
        } catch (AtlasException e) {
            e.printStackTrace();
        } catch (ClassNotFoundException e) {
            e.printStackTrace();
        } catch (SQLException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        } catch (URISyntaxException e) {
            e.printStackTrace();
        }
    }
    timesTriggered++;

    getLogger().info(
            "********** Looking for Druid Datasources to expose as Hive Tables or update with new information...");
    Iterator<String> resultIterator = getDruidDataSourceList().iterator();
    while (resultIterator.hasNext()) {
        String dataSource = resultIterator.next();
        dataSourceDetails.put(dataSource, getDruidDataSourceDetails(dataSource));

        getLogger().info("********** Exposing Druid Data Source: " + dataSource);
        exposeDruidDataSourceAsHiveTable(dataSource);

        getLogger().info("********** Update Atlas Hive Tables and Column for Druid Data Source: " + dataSource);
        updateDataSourceHiveColumnAttributes(dataSource);
    }

    getLogger().info("********** Checking for Late Arriving Data...");
    List<String> dataSourceExclusions = new ArrayList<String>();
    List<String> deletedTasks = new ArrayList<String>();
    Map<String, Object> newTasks = new HashMap<String, Object>();
    for (String taskId : deltaIndexTasks.keySet()) {
        String status = getIndexTaskStatus(taskId);
        if (status.equalsIgnoreCase("SUCCESS")) {
            getLogger().info("********** Indexing Task " + taskId
                    + " completed successfully, removing source data and task meta data...");
            List<String> sourceDataList = (List<String>) ((Map) deltaIndexTasks.get(taskId)).get("sourceData");
            Iterator<String> currentSourceObjectIterator = sourceDataList.iterator();
            while (currentSourceObjectIterator.hasNext()) {
                String currentSourceObject = currentSourceObjectIterator.next();
                deleteHDFSObject(currentSourceObject);
            }
            deleteHDFSObject(lateDataTasksPath + "/" + taskId.replace(":", "__"));
            deletedTasks.add(taskId);
        } else if (status.equalsIgnoreCase("PENDING") || status.equalsIgnoreCase("RUNNING")) {
            getLogger().info("********** Indexing Task " + taskId + " is currently " + status
                    + ", excluding source data from eligibility for new indexing tasks");
            List<String> sourceDataList = (List<String>) ((Map) deltaIndexTasks.get(taskId)).get("sourceData");
            dataSourceExclusions.addAll(sourceDataList);
        } else {
            getLogger().info("********** Indexing Task " + taskId + " is in " + status
                    + " state, excluding source data from eligibility for new indexing tasks");
            getLogger().info(
                    "********** Obtain task logs from Druid Overlord Console, address the problem, and then restart the task manually...");
            List<String> sourceDataList = (List<String>) ((Map) deltaIndexTasks.get(taskId)).get("sourceData");
            dataSourceExclusions.addAll(sourceDataList);
            String ingestSpec = (String) ((Map) deltaIndexTasks.get(taskId)).get("ingestSpec");
            String newTaskId = createDruidIndexingTask(ingestSpec);
            renameHDFSObject(lateDataTasksPath + "/" + taskId.replace(":", "__"),
                    lateDataTasksPath + "/" + newTaskId.replace(":", "__"));
            newTasks.put(newTaskId, ((Map) deltaIndexTasks.get(taskId)));
            deletedTasks.add(taskId);
        }
    }
    deltaIndexTasks.putAll(newTasks);
    deltaIndexTasks.keySet().removeAll(deletedTasks);
    indexLateData(dataSourceExclusions);
    getLogger().info("********** Done...");
}

From source file:com.hortonworks.historian.nifi.reporter.HistorianDeanReporter.java

License:Apache License

private void indexLateData(List<String> dataSourceExclusions) {
    String nifiControllersUrl = nifiUrl + "/nifi-api/flow/process-groups/root/controller-services";

    try {//from   ww w  .j av  a  2  s . co m
        JSONArray controllers = getJSONFromUrl(nifiControllersUrl, basicAuth)
                .getJSONArray("controllerServices");
        getLogger().info("********** Getting List of Druid Tranquility Controllers...");
        for (int i = 0; i < controllers.length(); i++) {
            JSONObject currentController = controllers.getJSONObject(i).getJSONObject("component");
            String currentControllerType = currentController.getString("type");
            if (currentControllerType
                    .equalsIgnoreCase("com.hortonworks.nifi.controller.DruidTranquilityController")) {
                String lateDataPath = lateDataRoot + "/" + currentController.getJSONObject("properties")
                        .getString("query_granularity").toLowerCase() + "/";
                getLogger().info("********** Checking for Late Arriving Data at HDFS Path: " + lateDataPath);
                if (fs.exists(new Path(lateDataPath))) {
                    FileStatus[] fileStatus = fs.listStatus(new Path(lateDataPath));
                    List<Date> dates = new ArrayList<Date>();
                    List<String> sourceData = new ArrayList<String>();
                    for (FileStatus status : fileStatus) {
                        String[] address = status.getPath().toString().split("/");
                        String currentBin = address[address.length - 1];
                        Date binDate = new SimpleDateFormat("yyyy-MM-dd-HH-mm").parse(currentBin);
                        sourceData.add(lateDataPath + currentBin);
                        dates.add(binDate);
                    }
                    ((Collection<?>) sourceData).removeAll(dataSourceExclusions);
                    getLogger().info("********** Detected " + sourceData.size()
                            + " bins of relevant late data, initiating Delta Indexing task...");

                    if (fileStatus.length > 0 && sourceData.size() > 0) {
                        String intervalStart = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'")
                                .format(Collections.min(dates));
                        String intervalEnd = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'")
                                .format(Collections.max(dates));
                        String bins = String.join(",", sourceData);
                        JSONArray dimensionsList = new JSONArray(Arrays.asList(currentController
                                .getJSONObject("properties").getString("dimensions_list").split(",")));
                        String ingestSpec = "{" + "     \"type\" : \"index_hadoop\"," + "     \"spec\" : {"
                                + "      \"dataSchema\" : {" + "         \"dataSource\": \""
                                + currentController.getJSONObject("properties").getString("data_source") + "\","
                                + "         \"parser\" : {" + "            \"type\" : \"hadoopyString\","
                                + "            \"parseSpec\" : {" + "               \"format\" : \"json\","
                                + "               \"timestampSpec\" : {" + "                  \"column\" : \""
                                + currentController.getJSONObject("properties").getString("timestamp_field")
                                + "\"," + "                  \"format\" : \"auto\"" + "               },"
                                + "               \"dimensionsSpec\" : {" + "                  \"dimensions\": "
                                + dimensionsList + "               }" + "            }" + "         },"
                                + "         \"metricsSpec\" : "
                                + currentController.getJSONObject("properties")
                                        .getString("aggregators_descriptor")
                                + "," + "         \"granularitySpec\" : {"
                                + "            \"type\" : \"uniform\","
                                + "            \"segmentGranularity\" : \""
                                + currentController.getJSONObject("properties").getString("segment_granularity")
                                + "\"," + "            \"queryGranularity\" : \""
                                + currentController.getJSONObject("properties").getString("query_granularity")
                                + "\"," + "            \"intervals\": [\"" + intervalStart + "/" + intervalEnd
                                + "\"]" + "         }" + "      }," + "      \"ioConfig\" : {"
                                + "         \"type\" : \"hadoop\"," + "         \"inputSpec\" : {"
                                + "            \"type\" : \"multi\"," + "            \"children\": ["
                                + "               {" + "                  \"type\" : \"dataSource\","
                                + "                  \"ingestionSpec\" : {"
                                + "                     \"dataSource\": \""
                                + currentController.getJSONObject("properties").getString("data_source") + "\","
                                + "                     \"intervals\": [\"" + intervalStart + "/" + intervalEnd
                                + "\"]" + "                  }" + "               }," + "               {"
                                + "                  \"type\" : \"static\"," + "                  \"paths\": \""
                                + bins + "\"" + "               }" + "            ]" + "         }" + "      },"
                                + "      \"tuningConfig\" : {" + "         \"type\": \"hadoop\"" + "      }"
                                + "     }" + "}";
                        getLogger().info("********** Delta Ingestion Spec: " + ingestSpec);
                        String indexTaskId = createDruidIndexingTask(ingestSpec);
                        getLogger().info("********** Created Indexing Task " + indexTaskId);
                        Map<String, Object> currentTaskMetaData = new HashMap<String, Object>();
                        currentTaskMetaData.put("ingestSpec", ingestSpec);
                        currentTaskMetaData.put("sourceData", sourceData);
                        deltaIndexTasks.put(indexTaskId, currentTaskMetaData);
                        String currentTaskDirPath = lateDataTasksPath + "/" + indexTaskId.replace(":", "__");
                        getLogger().info("********** Persisting Record of Task: " + currentTaskDirPath);
                        currentTaskDirPath = createHDFSDirectory(currentTaskDirPath);
                        writeHDFSFile(currentTaskDirPath + "/ingestSpec", ingestSpec);
                        writeHDFSFile(currentTaskDirPath + "/sourceData", bins);
                    } else {
                        getLogger().info("********** " + lateDataPath + " does not contain any data...");
                    }
                } else {
                    getLogger().info("********** There is a Druid Controller mapped to " + lateDataPath
                            + ", however, the path does not yet exist...");
                }
            }
        }
    } catch (IOException e) {
        e.printStackTrace();
    } catch (ParseException e) {
        e.printStackTrace();
    } catch (JSONException e) {
        e.printStackTrace();
    }
}

From source file:com.ibm.bi.dml.parser.DataExpression.java

License:Open Source License

/**
 * //from  ww w. j a v  a2  s .  c  o  m
 * @param filename
 * @return
 * @throws LanguageException
 */
public JSONObject readMetadataFile(String filename, boolean conditional) throws LanguageException {
    JSONObject retVal = null;
    boolean exists = false;
    FileSystem fs = null;

    try {
        fs = FileSystem.get(ConfigurationManager.getCachedJobConf());
    } catch (Exception e) {
        raiseValidateError("could not read the configuration file: " + e.getMessage(), false);
    }

    Path pt = new Path(filename);
    try {
        if (fs.exists(pt)) {
            exists = true;
        }
    } catch (Exception e) {
        exists = false;
    }

    boolean isDirBoolean = false;
    try {
        if (exists && fs.getFileStatus(pt).isDirectory())
            isDirBoolean = true;
        else
            isDirBoolean = false;
    } catch (Exception e) {
        raiseValidateError(
                "error validing whether path " + pt.toString() + " is directory or not: " + e.getMessage(),
                conditional);
    }

    // CASE: filename is a directory -- process as a directory
    if (exists && isDirBoolean) {

        // read directory contents
        retVal = new JSONObject();

        FileStatus[] stats = null;

        try {
            stats = fs.listStatus(pt);
        } catch (Exception e) {
            raiseValidateError("for MTD file in directory, error reading directory with MTD file "
                    + pt.toString() + ": " + e.getMessage(), conditional);
        }

        for (FileStatus stat : stats) {
            Path childPath = stat.getPath(); // gives directory name
            if (childPath.getName().startsWith("part")) {

                BufferedReader br = null;
                try {
                    br = new BufferedReader(new InputStreamReader(fs.open(childPath)));
                } catch (Exception e) {
                    raiseValidateError("for MTD file in directory, error reading part of MTD file with path "
                            + childPath.toString() + ": " + e.getMessage(), conditional);
                }

                JSONObject childObj = null;
                try {
                    childObj = JSONHelper.parse(br);
                } catch (Exception e) {
                    raiseValidateError("for MTD file in directory, error parsing part of MTD file with path "
                            + childPath.toString() + ": " + e.getMessage(), conditional);
                }

                for (Object obj : childObj.entrySet()) {
                    @SuppressWarnings("unchecked")
                    Entry<Object, Object> e = (Entry<Object, Object>) obj;
                    Object key = e.getKey();
                    Object val = e.getValue();
                    retVal.put(key, val);
                }
            }
        } // end for 
    }

    // CASE: filename points to a file
    else if (exists) {

        BufferedReader br = null;

        // try reading MTD file
        try {
            br = new BufferedReader(new InputStreamReader(fs.open(pt)));
        } catch (Exception e) {
            raiseValidateError("error reading MTD file with path " + pt.toString() + ": " + e.getMessage(),
                    conditional);
        }

        // try parsing MTD file
        try {
            retVal = JSONHelper.parse(br);
        } catch (Exception e) {
            raiseValidateError("error parsing MTD file with path " + pt.toString() + ": " + e.getMessage(),
                    conditional);
        }
    }

    return retVal;
}