Example usage for org.apache.hadoop.fs FileSystem getLocal

List of usage examples for org.apache.hadoop.fs FileSystem getLocal

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem getLocal.

Prototype

public static LocalFileSystem getLocal(Configuration conf) throws IOException 

Source Link

Document

Get the local FileSystem.

Usage

From source file:common.DataNode.java

License:Apache License

/**
 * Make an instance of DataNode after ensuring that at least one of the
 * given data directories (and their parent directories, if necessary)
 * can be created.//from w  ww.j  av  a2 s .  com
 * @param dataDirs List of directories, where the new DataNode instance should
 * keep its files.
 * @param conf Configuration instance to use.
 * @return DataNode instance for given list of data dirs and conf, or null if
 * no directory from this directory list can be created.
 * @throws IOException
 */
static DataNode makeInstance(Collection<URI> dataDirs, Configuration conf) throws IOException {
    LocalFileSystem localFS = FileSystem.getLocal(conf);
    FsPermission permission = new FsPermission(conf.get(DFSConfigKeys.DFS_DATANODE_DATA_DIR_PERMISSION_KEY,
            DFSConfigKeys.DFS_DATANODE_DATA_DIR_PERMISSION_DEFAULT));
    ArrayList<File> dirs = getDataDirsFromURIs(dataDirs, localFS, permission);

    if (dirs.size() > 0) {
        return new DataNode(conf, dirs);
    }
    LOG.error("All directories in " + DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY + " are invalid.");
    return null;
}

From source file:corr.util.DummyDataToSeqFile.java

License:Apache License

/**
 * Export CSV file to SequentialFile.//  w w  w .j  a  va  2s. c o m
 * @param file Name of CSV file.
 * @throws Exception
 */
private static void export(String file) throws Exception {
    String delim = ",";
    BufferedReader reader = null;
    SequenceFile.Writer writer = null;

    try {
        Path path = toPath(file);
        Configuration conf = new Configuration();
        LocalFileSystem fs = FileSystem.getLocal(conf);
        writer = SequenceFile.createWriter(fs, conf, path, LongWritable.class, VectorWritable.class);

        reader = new BufferedReader(new FileReader(file));
        String line = null;
        long counter = 0;
        while (null != (line = reader.readLine())) {
            if ("".equals(line))
                continue;
            String[] tokens = line.split(delim);

            LongWritable key = new LongWritable(counter);
            VectorWritable val = toVector(tokens);
            writer.append(key, val);

            counter++;
        }
    } catch (Exception ex) {
        throw ex;
    } finally {
        if (null != reader) {
            try {
                reader.close();
            } catch (Exception ex) {
            }
        }

        if (null != writer) {
            try {
                writer.close();
            } catch (Exception ex) {
            }
        }
    }
}

From source file:crunch.MaxTemperature.java

License:Apache License

@Test
    public void test() throws Exception {
        Configuration conf = new Configuration();
        conf.set("fs.default.name", "file:///");
        conf.set("mapred.job.tracker", "local");

        Path input = new Path("input/ncdc/micro");
        Path output = new Path("output");

        FileSystem fs = FileSystem.getLocal(conf);
        fs.delete(output, true); // delete old output

        MaxTemperatureDriver driver = new MaxTemperatureDriver();
        driver.setConf(conf);/*from  w  ww  . jav a  2 s .c  om*/

        int exitCode = driver.run(new String[] { input.toString(), output.toString() });
        assertThat(exitCode, is(0));

        checkOutput(conf, output);
    }

From source file:crunch.MaxTemperature.java

License:Apache License

private void checkOutput(Configuration conf, Path output) throws IOException {
        FileSystem fs = FileSystem.getLocal(conf);
        Path[] outputFiles = FileUtil.stat2Paths(fs.listStatus(output, new OutputLogFilter()));
        assertThat(outputFiles.length, is(1));

        BufferedReader actual = asBufferedReader(fs.open(outputFiles[0]));
        BufferedReader expected = asBufferedReader(getClass().getResourceAsStream("/expected.txt"));
        String expectedLine;/*from  w w  w.  j a v  a2s  . c o m*/
        while ((expectedLine = expected.readLine()) != null) {
            assertThat(actual.readLine(), is(expectedLine));
        }
        assertThat(actual.readLine(), nullValue());
        actual.close();
        expected.close();
    }

From source file:csc555.ebratt.depaul.edu.PutMerge.java

License:Open Source License

/**
 * @param args// w w w  .  jav  a 2  s  . c o  m
 *            [0] the input directory on the local filesystem
 * @param args
 *            [1] the fully-qualified output filename on HDFS
 * @throws IOException
 *             in the even there is an issue with input/output
 */
public static void main(String[] args) throws IOException {

    if (args.length != 3) {
        System.err.println("Usage: PutMerge.jar <fs.default.name> <in> <out>");
        System.exit(2);
    }

    Configuration conf = new Configuration();
    conf.set("fs.default.name", args[0]);

    FileSystem hdfs = FileSystem.get(conf);
    FileSystem localFS = FileSystem.getLocal(conf);

    Path localDir = new Path(args[1]);
    Path hdfsFile = new Path(args[2]);

    try {
        if (hdfs.exists(hdfsFile)) {
            System.out.println("deleting target file: " + hdfsFile.toString());
            hdfs.delete(hdfsFile, true);
        }
        System.out.println("copying/merging files from: local:/" + localDir.toString() + " to hdfs:/"
                + hdfsFile.toString());
        FileUtil.copyMerge(localFS, localDir, hdfs, hdfsFile, false, conf, null);
    } catch (IOException e) {
        e.printStackTrace();
    }

}

From source file:de.tuberlin.dima.aim3.assignment1.BookAndAuthorBroadcastJoin.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {

    Map<String, String> parsedArgs = parseArgs(args);

    Path authors = new Path(parsedArgs.get("--authors"));
    Path books = new Path(parsedArgs.get("--books"));
    Path outputPath = new Path(parsedArgs.get("--output"));

    //IMPLEMENT ME

    Job broadCastJoin = prepareJob(books, outputPath, TextInputFormat.class, BroadCastMapper.class, Text.class,
            Text.class, TextOutputFormat.class);
    DistributedCache.addArchiveToClassPath(authors, broadCastJoin.getConfiguration(),
            FileSystem.getLocal(broadCastJoin.getConfiguration()));
    broadCastJoin.waitForCompletion(true);

    return 0;/*from w  ww. j  a va2  s .c  o  m*/
}

From source file:de.tuberlin.dima.cuttlefish.preprocessing.vectorization.Vectorizer.java

License:Open Source License

public void vectorize(File luceneIndexDir, File outputDir) throws Exception {

    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.getLocal(conf);
    SequenceFile.Writer writer = null;

    FeatureDictionary dict = new FeatureDictionary();

    DirectoryReader reader = null;/*  w  w  w  .  ja  va  2 s . c  o  m*/
    try {
        reader = DirectoryReader.open(new SimpleFSDirectory(luceneIndexDir));

        writer = SequenceFile.createWriter(fs, conf, new Path(outputDir.toString(), "documentVectors.seq"),
                IDAndCodes.class, VectorWritable.class);
        IDAndCodes idAndCodes = new IDAndCodes();
        VectorWritable vectorWritable = new VectorWritable();

        Fields fields = MultiFields.getFields(reader);
        if (fields != null) {
            Iterator<String> fieldNames = fields.iterator();
            while (fieldNames.hasNext()) {
                String field = fieldNames.next();
                if (!field.startsWith("bip:") && !"itemID".equals(field)) {

                    Terms terms = fields.terms(field);
                    TermsEnum termsEnum = terms.iterator(null);
                    BytesRef text;
                    while ((text = termsEnum.next()) != null) {
                        dict.addTextFeature(field, text.utf8ToString());
                    }
                }
            }
        }

        int numDocsVectorized = 0;

        for (int docID = 0; docID < reader.maxDoc(); docID++) {
            Document doc = reader.document(docID);

            int itemID = doc.getField("itemID").numericValue().intValue();

            RandomAccessSparseVector documentVector = new RandomAccessSparseVector(dict.numFeatures());
            Multimap<String, String> codes = HashMultimap.create();

            for (IndexableField field : doc.getFields()) {

                String fieldName = field.name();

                if (!fieldName.startsWith("bip:") && !"itemID".equals(fieldName)) {

                    Terms termFreqVector = reader.getTermVector(docID, fieldName);

                    if (termFreqVector != null) {

                        int maxTermFrequency = maxTermFrequency(termFreqVector);

                        TermsEnum te = termFreqVector.iterator(null);
                        BytesRef term;

                        while ((term = te.next()) != null) {

                            String termStr = term.utf8ToString();
                            int termFrequency = (int) te.totalTermFreq();

                            int documentFrequency = reader.docFreq(new Term(fieldName, term));
                            int numDocs = reader.numDocs();

                            double weight = weighting.weight(fieldName, termStr, termFrequency,
                                    documentFrequency, maxTermFrequency, numDocs);

                            int featureIndex = dict.index(fieldName, term.utf8ToString());
                            documentVector.setQuick(featureIndex, weight);
                        }
                    }

                } else if (fieldName.startsWith("bip:")) {
                    for (String value : doc.getValues(fieldName)) {
                        codes.put(fieldName, value);
                    }
                }
            }

            Vector featureVector = new SequentialAccessSparseVector(documentVector);

            weighting.normalize(featureVector);

            idAndCodes.set(itemID, codes);
            vectorWritable.set(featureVector);
            writer.append(idAndCodes, vectorWritable);

            numDocsVectorized++;
            if (numDocsVectorized % 100 == 0) {
                log.info("Vectorized {} documents", numDocsVectorized);
            }
        }

        log.info("Vectorized {} documents", numDocsVectorized);

        dict.writeToFile(new File(outputDir, "features.txt"));

        log.info("Wrote feature dictionary");

    } finally {
        Closeables.close(reader, true);
        Closeables.close(writer, true);
    }

}

From source file:de.tudarmstadt.ukp.dkpro.bigdata.hadoop.UIMAMapReduceBase.java

License:Open Source License

@Override
public void configure(JobConf job) {
    try {/*  w w w. ja  va  2 s. co  m*/
        this.job = job;
        this.mapOutputValueClass = job.getMapOutputValueClass();
        this.outputValueClass = job.getOutputValueClass();
        this.samplingPropability = job.getInt("dkpro.map.samplingratio", 100);
        final EngineFactory engineFactory = (EngineFactory) Class
                .forName(job.get("dkpro.uima.factory", DkproHadoopDriver.class.getName())).newInstance();
        engineFactory.configure(job);

        final AnalysisEngineDescription engineDescription = getEngineDescription(engineFactory, job);

        // replace the $dir variable within the configuration.
        this.fs = FileSystem.get(job);
        this.localFS = FileSystem.getLocal(job);
        this.working_dir = new Path("uima_output_" + job.get("mapred.task.id"));
        final Path outputPath = FileOutputFormat.getOutputPath(job);
        this.results_dir = this.fs.startLocalOutput(outputPath, job.getLocalPath(this.working_dir.getName()));
        this.localFS.mkdirs(this.results_dir);
        final String[] resources = job.get("dkpro.resources", "").split(",");
        sLogger.info("Writing local data to: " + this.results_dir);
        this.resourceURIs = new TreeMap<String, URL>();
        for (final String resource : resources) {
            final URL r = job.getResource(resource);
            if (r != null && !resource.isEmpty()) {
                this.resourceURIs.put(resource, r);
            }

        }
        replaceRecursively(engineDescription);
        this.engine = createEngine(engineDescription);

    } catch (final Exception e) {
        sLogger.fatal("Error while configuring pipeline", e);
        e.printStackTrace();
        throw new RuntimeException(e);
    }

}

From source file:edu.indiana.soic.ts.mapreduce.pwd.SWGMap.java

License:Open Source License

public void map(LongWritable blockIndex, Text value, Context context) throws IOException, InterruptedException {
    long startTime = System.nanoTime();
    Configuration conf = context.getConfiguration();
    Counter alignmentCounter = context.getCounter(Constants.RecordCounters.ALIGNMENTS);
    String valString = value.toString();
    String valArgs[] = valString.split(Constants.BREAK);

    long rowBlock = Long.parseLong(valArgs[0]);
    long columnBlock = Long.parseLong(valArgs[1]);
    boolean isDiagonal = Boolean.parseBoolean(valArgs[2]);
    LOG.info("row column" + rowBlock + "  " + columnBlock + "  " + isDiagonal + "  " + valArgs[2]);

    long row = rowBlock * blockSize;
    long column = columnBlock * blockSize;

    long parseStartTime = System.nanoTime();
    FileSystem fs = FileSystem.getLocal(conf);
    // parse the inputFilePart for row
    Path rowPath = new Path(Constants.HDFS_SEQ_FILENAME + "_" + rowBlock);
    FSDataInputStream rowInStream = fs.open(rowPath);
    List<VectorPoint> rowSequences = SequenceParser.ParseFile(rowInStream);
    // parse the inputFilePart for column if this is not a diagonal block
    List<VectorPoint> colSequences;
    if (isDiagonal) {
        colSequences = rowSequences;//from   w w w  .ja  v a2s.  co  m
    } else {
        // parse the inputFilePart for column
        Path colPath = new Path(Constants.HDFS_SEQ_FILENAME + "_" + columnBlock);
        FSDataInputStream colInStream = fs.open(colPath);
        colSequences = SequenceParser.ParseFile(colInStream);
    }
    LOG.info("Parsing time : " + ((System.nanoTime() - parseStartTime) / 1000000) + "ms");

    short[][] alignments = new short[(int) blockSize][(int) blockSize];
    double[][] doubleDistances = new double[(int) blockSize][(int) blockSize];
    double max = Double.MIN_VALUE;
    for (int rowIndex = 0; ((rowIndex < blockSize) & ((row + rowIndex) < noOfSequences)); rowIndex++) {
        int columnIndex = 0;
        for (; ((columnIndex < blockSize) & ((column + columnIndex) < noOfSequences)); columnIndex++) {
            double alignment;
            alignment = distFunc.calc(rowSequences.get(rowIndex), colSequences.get(columnIndex));
            if (alignment > max) {
                max = alignment;
            }
            // Get the identity and make it percent identity
            doubleDistances[rowIndex][columnIndex] = alignment;
        }
        alignmentCounter.increment(columnIndex);
    }

    // divide by max to get the range to 0 to 1 and then convert to short and output
    for (int rowIndex = 0; ((rowIndex < blockSize) & ((row + rowIndex) < noOfSequences)); rowIndex++) {
        int columnIndex = 0;
        for (; ((columnIndex < blockSize) & ((column + columnIndex) < noOfSequences)); columnIndex++) {
            double alignment = doubleDistances[rowIndex][columnIndex] / max;
            short scaledScore = (short) (alignment * Short.MAX_VALUE);
            alignments[rowIndex][columnIndex] = scaledScore;
        }
    }

    SWGWritable dataWritable = new SWGWritable(rowBlock, columnBlock, blockSize, false);
    dataWritable.setMax(max);
    dataWritable.setAlignments(alignments);
    context.write(new LongWritable(rowBlock), dataWritable);

    if (!isDiagonal) {
        // Create the transpose matrix of (rowBlock, colBlock) block to fill the
        // (colBlock, rowBlock) block.
        SWGWritable inverseDataWritable = new SWGWritable(columnBlock, rowBlock, blockSize, true);
        inverseDataWritable.setAlignments(alignments);
        context.write(new LongWritable(columnBlock), inverseDataWritable);
    }
    LOG.info("Map time : " + ((System.nanoTime() - startTime) / 1000000) + "ms");
}

From source file:edu.isi.mavuno.util.MavunoUtils.java

License:Apache License

public static void readParameters(String[] args, String prefix, Configuration conf) throws IOException {
    FileSystem fs = FileSystem.getLocal(conf);
    for (String arg : args) {
        if (arg.startsWith("-")) {
            int equalsIndex = arg.indexOf('=');
            if (equalsIndex == -1) {
                sLogger.warn("Ignoring malformed parameter -- " + arg);
            }/*from w  w  w . j  ava2  s.  com*/
            String paramName = arg.substring(1, equalsIndex);
            String paramValue = arg.substring(equalsIndex + 1, arg.length());
            conf.set(prefix + "." + paramName, paramValue);
        } else {
            // open parameter file
            BufferedReader reader = new BufferedReader(new InputStreamReader(fs.open(new Path(arg))));

            // read/set parameters
            String input;
            while ((input = reader.readLine()) != null) {
                String[] cols = input.split("\t");
                if (cols.length != 2) {
                    sLogger.warn("Skipping malformed parameter file line -- " + input);
                } else {
                    conf.set(prefix + "." + cols[0], cols[1]);
                }
            }

            reader.close();
        }
    }
}