Example usage for org.apache.hadoop.fs FileUtil stat2Paths

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileUtil stat2Paths.

Prototype

public static Path[] stat2Paths(FileStatus[] stats)

Source Link

Document

convert an array of FileStatus to an array of Path

Usage

From source file:ph.fingra.hadoop.mapred.common.CopyWithinHdfsFile.java

License:Apache License

public void dirToFile(String srcdir, String dstfile) throws IOException {

    FileSystem shfs = FileSystem.get(URI.create(srcdir), getConf());
    FileSystem thfs = FileSystem.get(URI.create(dstfile), getConf());

    Path srcPath = new Path(srcdir);
    Path dstPath = new Path(dstfile);

    // delete existed destination local file
    if (thfs.exists(dstPath)) {
        thfs.delete(dstPath, true);/*from  w  w w. ja v  a 2  s  . c  o  m*/
    }

    // get hdfs file list
    PathFilter resultFileFilter = new PathFilter() {
        @Override
        public boolean accept(Path path) {
            return path.getName().startsWith(ConstantVars.RESULT_FILE_PREFIX);
        }
    };

    FileStatus[] status = shfs.listStatus(srcPath, resultFileFilter);

    Path[] listedPaths = FileUtil.stat2Paths(status);

    if (listedPaths.length > 0) {
        // create hdfs output stream
        FSDataOutputStream out = thfs.create(dstPath);
        for (int i = 0; i < listedPaths.length; i++) {
            // create hdfs input stream
            FSDataInputStream in = shfs.open(listedPaths[i]);
            byte buffer[] = new byte[256];
            int bytesRead = 0;
            while ((bytesRead = in.read(buffer)) > 0) {
                out.write(buffer, 0, bytesRead);
            }
            in.close();
        }
        out.close();
    }

    return;
}

From source file:ph.fingra.hadoop.mapred.common.HdfsFileUtil.java

License:Apache License

public static int getDateMatchedFileCount(Path srcpath) throws IOException {

    int count = 0;
    Path parentPath = null;//  w w  w. j a v a  2s .  c  om
    String date_ext = null;

    // directory path
    parentPath = srcpath.getParent();

    // date pattern
    Pattern p = Pattern.compile("([0-9]{4})\\-([0-9]{2})\\-([0-9]{2})");

    Matcher m = p.matcher(srcpath.getName());

    if (m.find()) {
        // suffix part like "yyyy-MM-dd.txt" in file name 
        date_ext = srcpath.getName().substring(m.start()/*, m.end()*/);
    }

    Configuration conf = new Configuration();

    FileSystem hdfs = FileSystem.get(conf);

    // get matched file list
    final String suffix = date_ext;
    PathFilter resultFileFilter = new PathFilter() {
        @Override
        public boolean accept(Path path) {
            return path.getName().endsWith(suffix);
        }
    };

    try {
        FileStatus[] status = hdfs.listStatus(parentPath, resultFileFilter);

        if (status != null) {
            Path[] listedPaths = FileUtil.stat2Paths(status);

            if (listedPaths != null) {
                count = listedPaths.length;
            }
        }
    } catch (FileNotFoundException ignore) {
    } catch (InvalidInputException ignore) {
        ; // throw not FileNotFoundException but InvalidInputException
          // at Hadoop 1.x version
    }

    return count;
}

From source file:ph.fingra.hadoop.mapred.common.HdfsFileUtil.java

License:Apache License

public static boolean deleteNBackupFile(String srcdir, String srcfile, int maxcount, String runday,
        final String dbfnameprefix) throws IOException {

    Configuration conf = new Configuration();

    FileSystem hdfs = FileSystem.get(conf);

    Path targetPath = null;/* w  w  w. ja v a  2  s .  c  om*/
    Path rootPath = new Path(srcdir);
    Path sourcePath = new Path(srcfile);
    String target_day = "";
    String target_file = "";
    boolean success = false;

    // if not exist srcfile, stop backup and return true
    if (hdfs.exists(sourcePath) == false) {
        return true;
    }

    // make backup file name as yesterday date
    target_day = DateTimeUtil.addDays(runday, -1, "yyyyMMdd");
    target_file = srcfile + "-" + target_day;
    //System.out.println("target_file - " + target_file);
    targetPath = new Path(target_file);

    // delete backup file if exist same name, then rename source file to backup file
    if (hdfs.exists(new Path(target_file))) {
        hdfs.delete(targetPath, true);
    }
    success = hdfs.rename(sourcePath, targetPath);

    // get bakup file list
    PathFilter resultFileFilter = new PathFilter() {
        @Override
        public boolean accept(Path path) {
            return path.getName().startsWith(dbfnameprefix + "-");
        }
    };

    try {
        FileStatus[] status = hdfs.listStatus(rootPath, resultFileFilter);

        Path[] listedPaths = FileUtil.stat2Paths(status);

        // delete more than maximum number of backup files
        if (listedPaths.length > maxcount) {

            Comparator<Path> c = new Comparator<Path>() {
                public int compare(Path o1, Path o2) {
                    int ret = 0;
                    ret = o1.getName().compareTo(o2.getName());
                    return -(ret); // order by reverse of the period
                }
            };

            Arrays.sort(listedPaths, c);

            for (int i = maxcount; i < listedPaths.length; i++) {
                Path path = listedPaths[i];
                hdfs.delete(path, true);
            }
        }
    } catch (FileNotFoundException ignore) {
    } catch (InvalidInputException ignore) {
        ; // throw not FileNotFoundException but InvalidInputException
          // at Hadoop 1.x version
    }

    return success;
}

From source file:ph.fingra.hadoop.mapred.common.HdfsFileUtil.java

License:Apache License

public static boolean deleteOriginFiles(FingraphConfig config, String year, String month, String day)
        throws IOException {

    Configuration conf = new Configuration();

    FileSystem hdfs = FileSystem.get(conf);

    String root_uri = config.getHadoop_user_path() + (config.getHadoop_user_path().endsWith("/") ? "" : "/")
            + config.getSetting().getHfs_input_path()
            + (config.getSetting().getHfs_input_path().endsWith("/") ? "" : "/");
    root_uri = root_uri.replaceAll("\\{yyyy\\}", year);
    root_uri = root_uri.replaceAll("\\{MM\\}", month);
    root_uri = root_uri.replaceAll("\\{dd\\}", day);
    String file_uri = config.getSetting().getOrigin_input_file();
    file_uri = file_uri.replaceAll("\\{yyyy\\}", year);
    file_uri = file_uri.replaceAll("\\{MM\\}", month);
    file_uri = file_uri.replaceAll("\\{dd\\}", day);
    file_uri = file_uri.replace("*", "[\\w]*");
    final String patt = "^" + file_uri + "$";
    //System.out.println(patt);

    Path rootPath = new Path(root_uri);
    boolean success = false;

    // get matched file list
    PathFilter resultFileFilter = new PathFilter() {
        @Override/*  www. jav  a2  s  .c  om*/
        public boolean accept(Path path) {
            return path.getName().matches(patt);
        }
    };

    try {
        FileStatus[] status = hdfs.listStatus(rootPath, resultFileFilter);

        if (status != null) {
            Path[] listedPaths = FileUtil.stat2Paths(status);

            if (listedPaths != null) {
                for (Path path : listedPaths) {
                    success = hdfs.delete(path, true);
                }
            }
        }
    } catch (FileNotFoundException ignore) {
    } catch (InvalidInputException ignore) {
        ; // throw not FileNotFoundException but InvalidInputException
          // at Hadoop 1.x version
    }

    return success;
}

From source file:si.david.mapreduce.lda.InternalVectorDumper.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    /**/* w w  w  .  j  a va2s. c  o  m*/
     Option seqOpt = obuilder.withLongName("seqFile").withRequired(false).withArgument(
     abuilder.withName("seqFile").withMinimum(1).withMaximum(1).create()).withDescription(
     "The Sequence File containing the Vectors").withShortName("s").create();
     Option dirOpt = obuilder.withLongName("seqDirectory").withRequired(false).withArgument(
     abuilder.withName("seqDirectory").withMinimum(1).withMaximum(1).create())
     .withDescription("The directory containing Sequence File of Vectors")
     .withShortName("d").create();
     */
    addInputOption();
    addOutputOption();
    addOption("useKey", "u", "If the Key is a vector than dump that instead");
    addOption("printKey", "p", "Print out the key as well, delimited by tab (or the value if useKey is true");
    addOption("dictionary", "d", "The dictionary file.", false);
    addOption("dictionaryType", "dt", "The dictionary file type (text|seqfile)", false);
    addOption("csv", "c",
            "Output the Vector as CSV.  Otherwise it substitutes in the terms for vector cell entries");
    addOption("namesAsComments", "n", "If using CSV output, optionally add a comment line for each NamedVector "
            + "(if the vector is one) printing out the name");
    addOption("nameOnly", "N", "Use the name as the value for each NamedVector (skip other vectors)");
    addOption("sortVectors", "sort",
            "Sort output key/value pairs of the vector entries in abs magnitude " + "descending order");
    addOption("quiet", "q", "Print only file contents");
    addOption("sizeOnly", "sz", "Dump only the size of the vector");
    addOption("numItems", "ni", "Output at most <n> vecors", false);
    addOption("vectorSize", "vs",
            "Truncate vectors to <vs> length when dumping (most useful when in" + " conjunction with -sort",
            false);
    addOption(buildOption("filter", "fi",
            "Only dump out those vectors whose name matches the filter."
                    + "  Multiple items may be specified by repeating the argument.",
            true, 1, Integer.MAX_VALUE, false, null));

    if (parseArguments(args, false, true) == null) {
        return -1;
    }

    Path[] pathArr;
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);
    Path input = getInputPath();
    FileStatus fileStatus = fs.getFileStatus(input);
    if (fileStatus.isDir()) {
        pathArr = FileUtil.stat2Paths(fs.listStatus(input, PathFilters.logsCRCFilter()));
    } else {
        FileStatus[] inputPaths = fs.globStatus(input);
        pathArr = new Path[inputPaths.length];
        int i = 0;
        for (FileStatus fstatus : inputPaths) {
            pathArr[i++] = fstatus.getPath();
        }
    }

    String dictionaryType = getOption("dictionaryType", "text");

    boolean sortVectors = hasOption("sortVectors");
    boolean quiet = hasOption("quiet");
    if (!quiet) {
        log.info("Sort? {}", sortVectors);
    }

    String[] dictionary = null;
    if (hasOption("dictionary")) {
        String dictFile = getOption("dictionary");
        switch (dictionaryType) {
        case "text":
            dictionary = VectorHelper.loadTermDictionary(new File(dictFile));
            break;
        case "sequencefile":
            dictionary = VectorHelper.loadTermDictionary(conf, dictFile);
            break;
        default:
            //TODO: support Lucene's FST as a dictionary type
            throw new IOException("Invalid dictionary type: " + dictionaryType);
        }
    }

    Set<String> filters;
    if (hasOption("filter")) {
        filters = Sets.newHashSet(getOptions("filter"));
    } else {
        filters = null;
    }

    boolean useCSV = hasOption("csv");

    boolean sizeOnly = hasOption("sizeOnly");
    boolean nameOnly = hasOption("nameOnly");
    boolean namesAsComments = hasOption("namesAsComments");
    boolean transposeKeyValue = hasOption("vectorAsKey");
    Writer writer;
    boolean shouldClose;
    File output = getOutputFile();
    if (output != null) {
        shouldClose = true;
        log.info("Output file: {}", output);
        Files.createParentDirs(output);
        writer = Files.newWriter(output, Charsets.UTF_8);
    } else {
        shouldClose = false;
        writer = new OutputStreamWriter(System.out, Charsets.UTF_8);
    }
    try {
        boolean printKey = hasOption("printKey");
        if (useCSV && dictionary != null) {
            writer.write("#");
            for (int j = 0; j < dictionary.length; j++) {
                writer.write(dictionary[j]);
                if (j < dictionary.length - 1) {
                    writer.write(',');
                }
            }
            writer.write('\n');
        }
        Long numItems = null;
        if (hasOption("numItems")) {
            numItems = Long.parseLong(getOption("numItems"));
            if (quiet) {
                writer.append("#Max Items to dump: ").append(String.valueOf(numItems)).append('\n');
            }
        }
        int maxIndexesPerVector = hasOption("vectorSize") ? Integer.parseInt(getOption("vectorSize"))
                : Integer.MAX_VALUE;
        long itemCount = 0;
        int fileCount = 0;
        for (Path path : pathArr) {
            if (numItems != null && numItems <= itemCount) {
                break;
            }
            if (quiet) {
                log.info("Processing file '{}' ({}/{})", path, ++fileCount, pathArr.length);
            }
            SequenceFileIterable<Writable, Writable> iterable = new SequenceFileIterable<>(path, true, conf);
            Iterator<Pair<Writable, Writable>> iterator = iterable.iterator();
            long i = 0;
            while (iterator.hasNext() && (numItems == null || itemCount < numItems)) {
                Pair<Writable, Writable> record = iterator.next();
                Writable keyWritable = record.getFirst();
                Writable valueWritable = record.getSecond();
                if (printKey) {
                    Writable notTheVectorWritable = transposeKeyValue ? valueWritable : keyWritable;
                    writer.write(notTheVectorWritable.toString());
                    writer.write('\t');
                }
                Vector vector;
                try {
                    vector = ((VectorWritable) (transposeKeyValue ? keyWritable : valueWritable)).get();
                } catch (ClassCastException e) {
                    if ((transposeKeyValue ? keyWritable
                            : valueWritable) instanceof WeightedPropertyVectorWritable) {
                        vector = ((WeightedPropertyVectorWritable) (transposeKeyValue ? keyWritable
                                : valueWritable)).getVector();
                    } else {
                        throw e;
                    }
                }
                if (filters == null || !(vector instanceof NamedVector)
                        || filters.contains(((NamedVector) vector).getName())) {
                    if (sizeOnly) {
                        if (vector instanceof NamedVector) {
                            writer.write(((NamedVector) vector).getName());
                            writer.write(":");
                        } else {
                            writer.write(String.valueOf(i++));
                            writer.write(":");
                        }
                        writer.write(String.valueOf(vector.size()));
                        writer.write('\n');
                    } else if (nameOnly) {
                        if (vector instanceof NamedVector) {
                            writer.write(((NamedVector) vector).getName());
                            writer.write('\n');
                        }
                    } else {
                        String fmtStr;
                        if (useCSV) {
                            fmtStr = VectorHelper.vectorToCSVString(vector, namesAsComments);
                        } else {
                            fmtStr = VectorHelper.vectorToJson(vector, dictionary, maxIndexesPerVector,
                                    sortVectors);
                        }
                        writer.write(fmtStr);
                        writer.write('\n');
                    }
                    itemCount++;
                }
            }
        }
        writer.flush();
    } finally {
        if (shouldClose) {
            Closeables.close(writer, false);
        }
    }

    return 0;
}

From source file:StorageEngineClient.CombineColumnStorageFileInputFormat.java

License:Open Source License

@Override
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {

    long minSizeNode = 0;
    long minSizeRack = 0;
    long maxSize = 0;

    if (minSplitSizeNode != 0) {
        minSizeNode = minSplitSizeNode;//w w  w . ja  v  a2 s  . com
    } else {
        minSizeNode = job.getLong("mapred.min.split.size.per.node", 0);
    }
    if (minSplitSizeRack != 0) {
        minSizeRack = minSplitSizeRack;
    } else {
        minSizeRack = job.getLong("mapred.min.split.size.per.rack", 0);
    }
    if (maxSplitSize != 0) {
        maxSize = maxSplitSize;
    } else {
        maxSize = job.getLong("mapred.max.split.size", 0);
    }
    if (maxSize == 0) {
        maxSize = (long) (job.getLong("dfs.block.size", 512 * 1024 * 1024) * 0.8);
    }
    if (minSizeNode == 0) {
        minSizeNode = maxSize / 2;
    }
    if (minSizeRack == 0) {
        minSizeRack = maxSize / 2;
    }
    if (minSizeNode != 0 && maxSize != 0 && minSizeNode > maxSize) {
        throw new IOException("Minimum split size pernode " + minSizeNode
                + " cannot be larger than maximum split size " + maxSize);
    }
    if (minSizeRack != 0 && maxSize != 0 && minSizeRack > maxSize) {
        throw new IOException("Minimum split size per rack" + minSizeRack
                + " cannot be larger than maximum split size " + maxSize);
    }
    if (minSizeRack != 0 && minSizeNode > minSizeRack) {
        throw new IOException("Minimum split size per node" + minSizeNode
                + " cannot be smaller than minimum split size per rack " + minSizeRack);
    }

    Path[] paths = FileUtil.stat2Paths(listStatus(job));
    List<CombineFileSplit> splits = new ArrayList<CombineFileSplit>();
    if (paths.length == 0) {
        return splits.toArray(new CombineFileSplit[splits.size()]);
    }

    for (MultiPathFilter onepool : pools) {
        ArrayList<Path> myPaths = new ArrayList<Path>();

        for (int i = 0; i < paths.length; i++) {
            if (paths[i] == null) {
                continue;
            }
            Path p = new Path(paths[i].toUri().getPath());
            if (onepool.accept(p)) {
                myPaths.add(paths[i]);
                paths[i] = null;
            }
        }
        getMoreSplits(job, myPaths.toArray(new Path[myPaths.size()]), maxSize, minSizeNode, minSizeRack,
                splits);
    }

    ArrayList<Path> myPaths = new ArrayList<Path>();
    for (int i = 0; i < paths.length; i++) {
        if (paths[i] == null) {
            continue;
        }
        myPaths.add(paths[i]);
    }
    LOG.info("myPaths size:\t" + myPaths.size());
    getMoreSplits(job, myPaths.toArray(new Path[myPaths.size()]), maxSize, minSizeNode, minSizeRack, splits);
    if (splits.size() == 0)
        return super.getSplits(job, numSplits);
    LOG.info("splits #:\t" + splits.size());
    return splits.toArray(new CombineFileSplit[splits.size()]);
}

From source file:StorageEngineClient.CombineFileInputFormat.java

License:Open Source License

@Override
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {

    long minSizeNode = 0;
    long minSizeRack = 0;
    long maxSize = 0;

    if (minSplitSizeNode != 0) {
        minSizeNode = minSplitSizeNode;//from   w  w w . j  a va2 s  .  c  o  m
    } else {
        minSizeNode = job.getLong("mapred.min.split.size.per.node", 0);
    }
    if (minSplitSizeRack != 0) {
        minSizeRack = minSplitSizeRack;
    } else {
        minSizeRack = job.getLong("mapred.min.split.size.per.rack", 0);
    }
    if (maxSplitSize != 0) {
        maxSize = maxSplitSize;
    } else {
        maxSize = job.getLong("mapred.max.split.size", 0);
    }
    if (maxSize == 0) {
        maxSize = (long) (job.getLong("dfs.block.size", 512 * 1024 * 1024) * 0.8);
    }
    if (minSizeNode == 0) {
        minSizeNode = maxSize / 2;
    }
    if (minSizeRack == 0) {
        minSizeRack = maxSize / 2;
    }
    if (minSizeNode != 0 && maxSize != 0 && minSizeNode > maxSize) {
        throw new IOException("Minimum split size pernode " + minSizeNode
                + " cannot be larger than maximum split size " + maxSize);
    }
    if (minSizeRack != 0 && maxSize != 0 && minSizeRack > maxSize) {
        throw new IOException("Minimum split size per rack" + minSizeRack
                + " cannot be larger than maximum split size " + maxSize);
    }
    if (minSizeRack != 0 && minSizeNode > minSizeRack) {
        throw new IOException("Minimum split size per node" + minSizeNode
                + " cannot be smaller than minimum split size per rack " + minSizeRack);
    }

    FileStatus[] fsStatus = listStatus(job);
    Path[] paths = FileUtil.stat2Paths(fsStatus);
    Map<String, FileStatus> fileNameToStatus = new HashMap<String, FileStatus>();
    int arraySize = fsStatus.length;
    for (int i = 0; i < arraySize; i++) {
        fileNameToStatus.put(getFileName(paths[i]), fsStatus[i]);
    }

    List<CombineFileSplit> splits = new ArrayList<CombineFileSplit>();
    if (paths.length == 0) {
        return splits.toArray(new CombineFileSplit[splits.size()]);
    }

    for (MultiPathFilter onepool : pools) {
        ArrayList<Path> myPaths = new ArrayList<Path>();

        for (int i = 0; i < paths.length; i++) {
            if (paths[i] == null) {
                continue;
            }
            Path p = new Path(paths[i].toUri().getPath());
            if (onepool.accept(p)) {
                myPaths.add(paths[i]);
                paths[i] = null;
            }
        }
        getMoreSplitsWithStatus(job, myPaths.toArray(new Path[myPaths.size()]), fileNameToStatus, maxSize,
                minSizeNode, minSizeRack, splits);
    }

    ArrayList<Path> myPaths = new ArrayList<Path>();
    for (int i = 0; i < paths.length; i++) {
        if (paths[i] == null) {
            continue;
        }
        myPaths.add(paths[i]);
    }
    LOG.debug("myPaths size:\t" + myPaths.size());
    try {
        getMoreSplitsWithStatus(job, myPaths.toArray(new Path[myPaths.size()]), fileNameToStatus, maxSize,
                minSizeNode, minSizeRack, splits);
    } catch (NullGzFileException e) {
        throw new IOException(e);
    }
    LOG.debug("splits #:\t" + splits.size());

    return splits.toArray(new CombineFileSplit[splits.size()]);
}

From source file:StorageEngineClient.CombineFileInputFormat_bak.java

License:Open Source License

@Override
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {

    long minSizeNode = 0;
    long minSizeRack = 0;
    long maxSize = 0;

    if (minSplitSizeNode != 0) {
        minSizeNode = minSplitSizeNode;/*from   www.  j a v  a  2 s.com*/
    } else {
        minSizeNode = job.getLong("mapred.min.split.size.per.node", 0);
    }
    if (minSplitSizeRack != 0) {
        minSizeRack = minSplitSizeRack;
    } else {
        minSizeRack = job.getLong("mapred.min.split.size.per.rack", 0);
    }
    if (maxSplitSize != 0) {
        maxSize = maxSplitSize;
    } else {
        maxSize = job.getLong("mapred.max.split.size", 0);
    }
    if (maxSize == 0) {
        maxSize = (long) (job.getLong("dfs.block.size", 512 * 1024 * 1024) * 0.8);
    }
    if (minSizeNode == 0) {
        minSizeNode = maxSize / 2;
    }
    if (minSizeRack == 0) {
        minSizeRack = maxSize / 2;
    }
    if (minSizeNode != 0 && maxSize != 0 && minSizeNode > maxSize) {
        throw new IOException("Minimum split size pernode " + minSizeNode
                + " cannot be larger than maximum split size " + maxSize);
    }
    if (minSizeRack != 0 && maxSize != 0 && minSizeRack > maxSize) {
        throw new IOException("Minimum split size per rack" + minSizeRack
                + " cannot be larger than maximum split size " + maxSize);
    }
    if (minSizeRack != 0 && minSizeNode > minSizeRack) {
        throw new IOException("Minimum split size per node" + minSizeNode
                + " cannot be smaller than minimum split size per rack " + minSizeRack);
    }

    Path[] paths = FileUtil.stat2Paths(listStatus(job));
    List<CombineFileSplit> splits = new ArrayList<CombineFileSplit>();
    if (paths.length == 0) {
        return splits.toArray(new CombineFileSplit[splits.size()]);
    }

    for (MultiPathFilter onepool : pools) {
        ArrayList<Path> myPaths = new ArrayList<Path>();

        for (int i = 0; i < paths.length; i++) {
            if (paths[i] == null) {
                continue;
            }
            Path p = new Path(paths[i].toUri().getPath());
            if (onepool.accept(p)) {
                myPaths.add(paths[i]);
                paths[i] = null;
            }
        }
        getMoreSplits(job, myPaths.toArray(new Path[myPaths.size()]), maxSize, minSizeNode, minSizeRack,
                splits);
    }

    ArrayList<Path> myPaths = new ArrayList<Path>();
    for (int i = 0; i < paths.length; i++) {
        if (paths[i] == null) {
            continue;
        }
        myPaths.add(paths[i]);
    }
    LOG.info("myPaths size:\t" + myPaths.size());
    getMoreSplits(job, myPaths.toArray(new Path[myPaths.size()]), maxSize, minSizeNode, minSizeRack, splits);
    LOG.info("splits #:\t" + splits.size());

    return splits.toArray(new CombineFileSplit[splits.size()]);
}

From source file:translator.MRTranslate1.java

License:Open Source License

public int run(String[] args) throws Exception {
    Job job = new Job(getConf(), "Translate1");
    job.setJarByClass(MRTranslate1.class);
    job.setMapperClass(MRTranslateMapper1.class);
    job.setReducerClass(MRTranslateReducer1.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    //job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    //job.setMapOutputValueClass(Text.class);

    //job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    //FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    TableMapReduceUtil.newJob();//from w w  w .  j  a va  2  s  . c  o  m
    MyFileInputFormat.addInputPath(job, new Path(args[0]));
    byte[] startRow = new byte[1];
    byte[] stopRow = new byte[1];
    startRow[0] = (byte) 1;
    stopRow[0] = (byte) 2;
    TableMapReduceUtil.addCol("", "T", "H2RDF", startRow, stopRow, "A:", job);
    job.setInputFormatClass(FileTableInputFormat.class);

    FileSystem fs = FileSystem.get(conf);

    Path inputDir = new Path(args[0]);
    System.out.println(args[0]);
    int reducer_num = FileUtil.stat2Paths(fs.listStatus(inputDir)).length;

    job.getConfiguration().set("nikos.inputfile", "translate/trans_hash_" + JoinPlaner.id);

    //job.getConfiguration().setInt("mapred.map.tasks", 18);
    job.getConfiguration().setInt("mapred.reduce.tasks", reducer_num);
    job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false);
    job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false);
    job.getConfiguration().setInt("io.sort.mb", 100);
    job.getConfiguration().setInt("io.file.buffer.size", 131072);
    job.getConfiguration().setInt("mapred.job.reuse.jvm.num.tasks", -1);

    job.waitForCompletion(true);
    return 0;
}

From source file:tv.icntv.log.stb.commons.HadoopUtils.java

License:Apache License

public static Path[] createFile(Path from, Path to, PathFilter filter, String fromSuffix, String toSuffix,
        String parsed) throws IOException {
    FileSystem fileSystem = null;
    try {/*  w w  w  .j av  a 2s . co m*/
        fileSystem = FileSystem.get(configuration);
        Path[] paths = FileUtil.stat2Paths(fileSystem.listStatus(from, filter));
        List<Path> inputs = Lists.newArrayList();
        for (Path path : paths) {
            //file name
            String name = path.getName().replace(fromSuffix, "");

            if (isExist(new Path(to, name.concat(parsed)))) {
                continue;
            }

            if (createFile(new Path(to, name.concat(toSuffix)))) {
                inputs.add(new Path(from, name));
            }
            ;
        }
        return inputs.toArray(new Path[inputs.size()]);
    } catch (IOException e) {
        e.printStackTrace();
        return null;
    } finally {
        if (null != fileSystem) {
            fileSystem.close();
        }
    }
}