Example usage for org.apache.hadoop.fs FileSystem getConf

List of usage examples for org.apache.hadoop.fs FileSystem getConf

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem getConf.

Prototype

@Override
    public Configuration getConf() 

Source Link

Usage

From source file:edu.bigdata.training.fileformats.compress.SequenceFileWriter.java

public static void main(String[] args) throws IOException {
    String uri = "output";
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);
    Path path = new Path(uri);
    IntWritable key = new IntWritable();
    Text value = new Text();
    File infile = new File("src/main/resources/input.txt");
    SequenceFile.Writer writer = null;
    try {// w  ww .  j a v  a  2  s .  co  m
        writer = SequenceFile.createWriter(conf, Writer.file(path), Writer.keyClass(key.getClass()),
                Writer.valueClass(value.getClass()),
                Writer.bufferSize(fs.getConf().getInt("io.file.buffer.size", 4096)),
                Writer.replication(fs.getDefaultReplication()), Writer.blockSize(1073741824),
                Writer.compression(SequenceFile.CompressionType.BLOCK, new DefaultCodec()),
                Writer.progressable(null), Writer.metadata(new Metadata()));
        int ctr = 100;
        List<String> lines = FileUtils.readLines(infile);
        for (String line : lines) {
            key.set(ctr++);
            value.set(line);
            if (ctr < 150) {
                System.out.printf("[%s]\t%s\t%s\n", writer.getLength(), key, value);
            }
            writer.append(key, value);
        }
    } finally {
        IOUtils.closeStream(writer);
    }
}

From source file:edu.umd.cloud9.collection.clue.ClueWarcForwardIndex.java

License:Apache License

@Override
public void loadIndex(Path index, Path mapping, FileSystem fs) throws IOException {
    this.conf = fs.getConf();

    LOG.info("Loading forward index: " + index);
    docnoMapping.loadMapping(mapping, fs);

    FSDataInputStream in = fs.open(index);

    // Class name; throw away.
    in.readUTF();/*  w ww  .  j a  v a  2 s  .  c om*/
    collectionPath = in.readUTF();

    int blocks = in.readInt();

    LOG.info(blocks + " blocks expected");
    docnos = new int[blocks];
    offsets = new int[blocks];
    fileno = new short[blocks];

    for (int i = 0; i < blocks; i++) {
        docnos[i] = in.readInt();
        offsets[i] = in.readInt();
        fileno[i] = in.readShort();

        if (i > 0 && i % 100000 == 0)
            LOG.info(i + " blocks read");
    }

    in.close();
}

From source file:edu.umd.cloud9.io.ReadSequenceFile.java

License:Apache License

private static int readSequenceFile(Path path, FileSystem fs, int max) throws IOException {
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, fs.getConf());

    System.out.println("Reading " + path + "...\n");
    try {//from w  w w  .ja  va2  s .  c  o m
        System.out.println("Key type: " + reader.getKeyClass().toString());
        System.out.println("Value type: " + reader.getValueClass().toString() + "\n");
    } catch (Exception e) {
        throw new RuntimeException("Error: loading key/value class");
    }

    Writable key, value;
    int n = 0;
    try {
        if (Tuple.class.isAssignableFrom(reader.getKeyClass())) {
            key = TUPLE_FACTORY.newTuple();
        } else {
            key = (Writable) reader.getKeyClass().newInstance();
        }

        if (Tuple.class.isAssignableFrom(reader.getValueClass())) {
            value = TUPLE_FACTORY.newTuple();
        } else {
            value = (Writable) reader.getValueClass().newInstance();
        }

        while (reader.next(key, value)) {
            System.out.println("Record " + n);
            System.out.println("Key: " + key + "\nValue: " + value);
            System.out.println("----------------------------------------");
            n++;

            if (n >= max)
                break;
        }
        reader.close();
        System.out.println(n + " records read.\n");
    } catch (Exception e) {
        e.printStackTrace();
    }

    return n;
}

From source file:edu.umd.cloud9.io.SequenceFileUtils.java

License:Apache License

/**
 * Reads key-value pairs from a SequenceFile, up to a maximum number.
 *
 * @param path path to file//  w  w  w .  j ava 2 s .  c o  m
 * @param max maximum of key-value pairs to read
 * @return list of key-value pairs
 */
@SuppressWarnings("unchecked")
public static <K extends Writable, V extends Writable> List<PairOfWritables<K, V>> readFile(Path path,
        FileSystem fs, int max) throws IOException {
    List<PairOfWritables<K, V>> list = new ArrayList<PairOfWritables<K, V>>();

    try {
        int k = 0;
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, fs.getConf());

        K key;
        V value;

        if (Tuple.class.isAssignableFrom(reader.getKeyClass())) {
            key = (K) TUPLE_FACTORY.newTuple();
        } else {
            key = (K) reader.getKeyClass().newInstance();
        }

        if (Tuple.class.isAssignableFrom(reader.getValueClass())) {
            value = (V) TUPLE_FACTORY.newTuple();
        } else {
            value = (V) reader.getValueClass().newInstance();
        }

        while (reader.next(key, value)) {
            k++;
            list.add(new PairOfWritables<K, V>(key, value));
            if (k >= max) {
                break;
            }

            // Create new objects, because the key, value gets reused
            if (Tuple.class.isAssignableFrom(reader.getKeyClass())) {
                key = (K) TUPLE_FACTORY.newTuple();
            } else {
                key = (K) reader.getKeyClass().newInstance();
            }

            if (Tuple.class.isAssignableFrom(reader.getValueClass())) {
                value = (V) TUPLE_FACTORY.newTuple();
            } else {
                value = (V) reader.getValueClass().newInstance();
            }
        }
        reader.close();
    } catch (IllegalAccessException e) {
        throw new RuntimeException("Error reading SequenceFile: " + e);
    } catch (InstantiationException e) {
        throw new RuntimeException("Error reading SequenceFile: " + e);
    }

    return list;
}

From source file:edu.umd.cloud9.io.SequenceFileUtils.java

License:Apache License

@SuppressWarnings("unchecked")
public static <K extends Writable> List<K> readKeys(Path path, FileSystem fs, int max) {
    List<K> list = new ArrayList<K>();

    try {/*from  w ww . j ava  2 s.  c  om*/
        int k = 0;
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, fs.getConf());

        K key = (K) reader.getKeyClass().newInstance();
        Writable value = (Writable) reader.getValueClass().newInstance();
        while (reader.next(key, value)) {
            k++;
            list.add(key);
            if (k >= max) {
                break;
            }

            key = (K) reader.getKeyClass().newInstance();
        }
        reader.close();
    } catch (Exception e) {
        throw new RuntimeException("Error reading SequenceFile " + path);
    }

    return list;
}

From source file:edu.umd.cloud9.io.SequenceFileUtils.java

License:Apache License

@SuppressWarnings("unchecked")
public static <V extends Writable> List<V> readValues(Path path, FileSystem fs, int max) {
    List<V> list = new ArrayList<V>();

    try {/*from ww  w  . ja va  2 s  .co m*/
        int k = 0;
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, fs.getConf());

        Writable key = (Writable) reader.getKeyClass().newInstance();
        V value = (V) reader.getValueClass().newInstance();

        while (reader.next(key, value)) {
            k++;
            list.add(value);
            if (k >= max) {
                break;
            }

            value = (V) reader.getValueClass().newInstance();
        }
        reader.close();
    } catch (Exception e) {
        throw new RuntimeException("Error reading SequenceFile " + path);
    }

    return list;
}

From source file:edu.umd.cloud9.util.SequenceFileUtils.java

License:Apache License

public static <K extends WritableComparable, V extends Writable> SortedMap<K, V> readFileIntoMap(FileSystem fs,
        String s, int max) {
    Path path = new Path(s);
    SortedMap<K, V> list = new TreeMap<K, V>();

    try {//from   ww w .j a  v a 2 s  .  c  o m
        int k = 0;

        SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, fs.getConf());

        K key = (K) reader.getKeyClass().newInstance();
        V value = (V) reader.getValueClass().newInstance();

        while (reader.next(key, value)) {
            k++;
            list.put(key, value);
            if (max != -1 && k >= max)
                break;

            key = (K) reader.getKeyClass().newInstance();
            value = (V) reader.getValueClass().newInstance();
        }
        reader.close();
    } catch (Exception e) {
        throw new RuntimeException("Exception reading file " + s);
        // e.printStackTrace();
    }

    return list;
}

From source file:edu.umd.honghongie.BooleanRetrievalCompressed.java

License:Apache License

private void initialize(String indexPath, String collectionPath, FileSystem fs) throws IOException {
    index = new MapFile.Reader(new Path(indexPath + "/part-r-00000"), fs.getConf()); //where index is
    collection = fs.open(new Path(collectionPath)); //where the text is
    stack = new Stack<Set<Integer>>();
}

From source file:edu.umn.cs.spatialHadoop.operations.Sampler.java

License:Open Source License

private static <T extends TextSerializable> int sampleLocalByCount(Path[] files, ResultCollector<T> output,
        OperationsParams params) throws IOException {

    ArrayList<Path> data_files = new ArrayList<Path>();
    for (Path file : files) {
        FileSystem fs = file.getFileSystem(params);
        if (fs.getFileStatus(file).isDir()) {
            // Directory, process all data files in this directory (visible files)
            FileStatus[] fileStatus = fs.listStatus(file, hiddenFileFilter);
            for (FileStatus f : fileStatus) {
                data_files.add(f.getPath());
            }/*  w w w .  j ava2s. c  o m*/
        } else {
            // File, process this file
            data_files.add(file);
        }
    }

    files = data_files.toArray(new Path[data_files.size()]);

    TextSerializable inObj1, outObj1;
    inObj1 = OperationsParams.getTextSerializable(params, "shape", new Text2());
    outObj1 = OperationsParams.getTextSerializable(params, "outshape", new Text2());

    // Make the objects final to be able to use in the anonymous inner class
    final TextSerializable inObj = inObj1;
    final T outObj = (T) outObj1;

    ResultCollector<TextSerializable> converter = createConverter(output, inObj, outObj);
    long[] files_start_offset = new long[files.length + 1]; // Prefix sum of files sizes
    long total_length = 0;
    for (int i_file = 0; i_file < files.length; i_file++) {
        FileSystem fs = files[i_file].getFileSystem(params);
        files_start_offset[i_file] = total_length;
        total_length += fs.getFileStatus(files[i_file]).getLen();
    }
    files_start_offset[files.length] = total_length;

    // Generate offsets to read from and make sure they are ordered to minimize
    // seeks between different HDFS blocks
    Random random = new Random(params.getLong("seed", System.currentTimeMillis()));
    long[] offsets = new long[params.getInt("count", 0)];
    for (int i = 0; i < offsets.length; i++) {
        if (total_length == 0)
            offsets[i] = 0;
        else
            offsets[i] = Math.abs(random.nextLong()) % total_length;
    }
    Arrays.sort(offsets);

    int record_i = 0; // Number of records read so far
    int records_returned = 0;

    int file_i = 0; // Index of the current file being sampled
    while (record_i < offsets.length) {
        // Skip to the file that contains the next sample
        while (offsets[record_i] > files_start_offset[file_i + 1])
            file_i++;

        long current_file_size = files_start_offset[file_i + 1] - files_start_offset[file_i];
        FileSystem fs = files[file_i].getFileSystem(params);
        ShapeLineRecordReader reader = new ShapeLineRecordReader(fs.getConf(),
                new FileSplit(files[file_i], 0, current_file_size, new String[] {}));
        Rectangle key = reader.createKey();
        Text line = reader.createValue();
        long pos = files_start_offset[file_i];

        while (record_i < offsets.length && offsets[record_i] <= files_start_offset[file_i + 1]
                && reader.next(key, line)) {
            pos += line.getLength();
            if (pos > offsets[record_i]) {
                // Passed the offset of record_i
                // Report this element to output
                if (converter != null) {
                    inObj.fromText(line);
                    converter.collect(inObj);
                }
                record_i++;
                records_returned++;
            }
        }
        reader.close();

        // Skip any remaining records that were supposed to be read from this file
        // This case might happen if a generated random position was in the middle
        // of the last line.
        while (record_i < offsets.length && offsets[record_i] <= files_start_offset[file_i + 1])
            record_i++;
    }
    return records_returned;
}

From source file:etl.cmd.test.XTestCase.java

License:Apache License

private void setUpEmbeddedHadoop2() throws Exception {
    if (dfsCluster != null && dfsCluster2 == null) {
        // Trick dfs location for MiniDFSCluster since it doesn't accept location as input)
        String testBuildDataSaved = System.getProperty("test.build.data", "build/test/data");
        try {/* w  w w. j av  a 2  s .com*/
            System.setProperty("test.build.data", FilenameUtils.concat(testBuildDataSaved, "2"));
            // Only DFS cluster is created based upon current need
            MiniDFSCluster.Builder builder = new MiniDFSCluster.Builder(createDFSConfig());
            dfsCluster2 = builder.build();
            FileSystem fileSystem = dfsCluster2.getFileSystem();
            fileSystem.mkdirs(new Path("target/test-data"));
            fileSystem.mkdirs(new Path("/user"));
            fileSystem.mkdirs(new Path("/tmp"));
            fileSystem.setPermission(new Path("target/test-data"), FsPermission.valueOf("-rwxrwxrwx"));
            fileSystem.setPermission(new Path("/user"), FsPermission.valueOf("-rwxrwxrwx"));
            fileSystem.setPermission(new Path("/tmp"), FsPermission.valueOf("-rwxrwxrwx"));
            System.setProperty(OOZIE_TEST_NAME_NODE2, fileSystem.getConf().get("fs.defaultFS"));
        } catch (Exception ex) {
            shutdownMiniCluster2();
            throw ex;
        } finally {
            // Restore previus value
            System.setProperty("test.build.data", testBuildDataSaved);
        }
    }
}