Example usage for org.apache.hadoop.io SequenceFile.Reader SequenceFile.Reader

List of usage examples for org.apache.hadoop.io SequenceFile.Reader SequenceFile.Reader

Introduction

In this page you can find the example usage for org.apache.hadoop.io SequenceFile.Reader SequenceFile.Reader.

Prototype

@Deprecated
public Reader(FileSystem fs, Path file, Configuration conf) throws IOException 

Source Link

Document

Construct a reader by opening a file from the given file system.

Usage

From source file:com.hadoopilluminated.examples.PiEstimator.java

License:Apache License

/**
 * Run a map/reduce job for estimating Pi.
 *
 * @return the estimated value of Pi//  ww  w .jav a  2  s  .  c  om
 */
public static BigDecimal estimate(int numMaps, long numPoints, JobConf jobConf) throws IOException {
    //setup job conf
    jobConf.setJobName(PiEstimator.class.getSimpleName());

    jobConf.setInputFormat(SequenceFileInputFormat.class);

    jobConf.setOutputKeyClass(BooleanWritable.class);
    jobConf.setOutputValueClass(LongWritable.class);
    jobConf.setOutputFormat(SequenceFileOutputFormat.class);

    jobConf.setMapperClass(PiMapper.class);
    jobConf.setNumMapTasks(numMaps);

    jobConf.setReducerClass(PiReducer.class);
    jobConf.setNumReduceTasks(1);

    // turn off speculative execution, because DFS doesn't handle
    // multiple writers to the same file.
    jobConf.setSpeculativeExecution(false);

    //setup input/output directories
    final Path inDir = new Path(TMP_DIR, "in");
    final Path outDir = new Path(TMP_DIR, "out");
    FileInputFormat.setInputPaths(jobConf, inDir);
    FileOutputFormat.setOutputPath(jobConf, outDir);

    final FileSystem fs = FileSystem.get(jobConf);
    if (fs.exists(TMP_DIR)) {
        throw new IOException(
                "Tmp directory " + fs.makeQualified(TMP_DIR) + " already exists.  Please remove it first.");
    }
    if (!fs.mkdirs(inDir)) {
        throw new IOException("Cannot create input directory " + inDir);
    }

    try {
        //generate an input file for each map task
        for (int i = 0; i < numMaps; ++i) {
            final Path file = new Path(inDir, "part" + i);
            final LongWritable offset = new LongWritable(i * numPoints);
            final LongWritable size = new LongWritable(numPoints);
            final SequenceFile.Writer writer = SequenceFile.createWriter(fs, jobConf, file, LongWritable.class,
                    LongWritable.class, CompressionType.NONE);
            try {
                writer.append(offset, size);
            } finally {
                writer.close();
            }
            System.out.println("Wrote input for Map #" + i);
        }

        //start a map/reduce job
        System.out.println("Starting Job");
        final long startTime = System.currentTimeMillis();
        JobClient.runJob(jobConf);
        final double duration = (System.currentTimeMillis() - startTime) / 1000.0;
        System.out.println("Job Finished in " + duration + " seconds");

        //read outputs
        Path inFile = new Path(outDir, "reduce-out");
        LongWritable numInside = new LongWritable();
        LongWritable numOutside = new LongWritable();
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, jobConf);
        try {
            reader.next(numInside, numOutside);
        } finally {
            reader.close();
        }

        //compute estimated value
        return BigDecimal.valueOf(4).setScale(20).multiply(BigDecimal.valueOf(numInside.get()))
                .divide(BigDecimal.valueOf(numMaps)).divide(BigDecimal.valueOf(numPoints));
    } finally {
        fs.delete(TMP_DIR, true);
    }
}

From source file:com.kadwa.hadoop.DistExec.java

License:Open Source License

/** Check whether the file list have duplication. */
static private void checkDuplication(FileSystem fs, Path file, Path sorted, Configuration conf)
        throws IOException {
    SequenceFile.Reader in = null;
    try {/*from w w  w .j  av  a 2  s  .  c o  m*/
        SequenceFile.Sorter sorter = new SequenceFile.Sorter(fs, new Text.Comparator(), Text.class, Text.class,
                conf);
        sorter.sort(file, sorted);
        in = new SequenceFile.Reader(fs, sorted, conf);

        Text prevdst = null, curdst = new Text();
        Text prevsrc = null, cursrc = new Text();
        for (; in.next(curdst, cursrc);) {
            if (prevdst != null && curdst.equals(prevdst)) {
                throw new DuplicationException(
                        "Invalid input, there are duplicated files in the sources: " + prevsrc + ", " + cursrc);
            }
            prevdst = curdst;
            curdst = new Text();
            prevsrc = cursrc;
            cursrc = new Text();
        }
    } finally {
        checkAndClose(in);
    }
}

From source file:com.kylinolap.job.hadoop.cube.RowKeyDistributionCheckerMapper.java

License:Apache License

@SuppressWarnings("deprecation")
public byte[][] getSplits(Configuration conf, Path path) {
    List<byte[]> rowkeyList = new ArrayList<byte[]>();
    SequenceFile.Reader reader = null;
    try {/* w  ww .  j  a va2s  .  c o  m*/
        reader = new SequenceFile.Reader(path.getFileSystem(conf), path, conf);
        Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
        Writable value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf);
        while (reader.next(key, value)) {
            byte[] tmp = ((Text) key).copyBytes();
            if (rowkeyList.contains(tmp) == false) {
                rowkeyList.add(tmp);
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        IOUtils.closeStream(reader);
    }

    byte[][] retValue = rowkeyList.toArray(new byte[rowkeyList.size()][]);

    return retValue;
}

From source file:com.pinterest.hdfsbackup.distcp.DistCp.java

License:Apache License

static private void finalize(Configuration conf, JobConf jobconf, final Path destPath,
        String presevedAttributes) throws IOException {
    if (presevedAttributes == null) {
        return;/*  ww w .j  av a 2 s  . co m*/
    }
    EnumSet<FileAttribute> preseved = FileAttribute.parse(presevedAttributes);
    if (!preseved.contains(FileAttribute.USER) && !preseved.contains(FileAttribute.GROUP)
            && !preseved.contains(FileAttribute.PERMISSION)) {
        return;
    }

    FileSystem dstfs = destPath.getFileSystem(conf);
    Path dstdirlist = new Path(jobconf.get(DST_DIR_LIST_LABEL));
    SequenceFile.Reader in = null;
    try {
        in = new SequenceFile.Reader(dstdirlist.getFileSystem(jobconf), dstdirlist, jobconf);
        Text dsttext = new Text();
        FilePair pair = new FilePair();
        for (; in.next(dsttext, pair);) {
            Path absdst = new Path(destPath, pair.output);
            updatePermissions(pair.input, dstfs.getFileStatus(absdst), preseved, dstfs);
        }
    } finally {
        checkAndClose(in);
    }
}

From source file:com.pinterest.hdfsbackup.distcp.DistCp.java

License:Apache License

/** Delete the dst files/dirs which do not exist in src */
static private void deleteNonexisting(FileSystem dstfs, FileStatus dstroot, Path dstsorted, FileSystem jobfs,
        Path jobdir, JobConf jobconf, Configuration conf) throws IOException {
    if (!dstroot.isDir()) {
        throw new IOException("dst must be a directory when option " + Options.DELETE.cmd
                + " is set, but dst (= " + dstroot.getPath() + ") is not a directory.");
    }//from  www. ja  v a2 s .co m

    //write dst lsr results
    final Path dstlsr = new Path(jobdir, "_distcp_dst_lsr");
    final SequenceFile.Writer writer = SequenceFile.createWriter(jobfs, jobconf, dstlsr, Text.class,
            FileStatus.class, SequenceFile.CompressionType.NONE);
    try {
        //do lsr to get all file statuses in dstroot
        final Stack<FileStatus> lsrstack = new Stack<FileStatus>();
        for (lsrstack.push(dstroot); !lsrstack.isEmpty();) {
            final FileStatus status = lsrstack.pop();
            if (status.isDir()) {
                for (FileStatus child : dstfs.listStatus(status.getPath())) {
                    String relative = makeRelative(dstroot.getPath(), child.getPath());
                    writer.append(new Text(relative), child);
                    lsrstack.push(child);
                }
            }
        }
    } finally {
        checkAndClose(writer);
    }

    //sort lsr results
    final Path sortedlsr = new Path(jobdir, "_distcp_dst_lsr_sorted");
    SequenceFile.Sorter sorter = new SequenceFile.Sorter(jobfs, new Text.Comparator(), Text.class,
            FileStatus.class, jobconf);
    sorter.sort(dstlsr, sortedlsr);

    //compare lsr list and dst list
    SequenceFile.Reader lsrin = null;
    SequenceFile.Reader dstin = null;
    try {
        lsrin = new SequenceFile.Reader(jobfs, sortedlsr, jobconf);
        dstin = new SequenceFile.Reader(jobfs, dstsorted, jobconf);

        //compare sorted lsr list and sorted dst list
        final Text lsrpath = new Text();
        final FileStatus lsrstatus = new FileStatus();
        final Text dstpath = new Text();
        final Text dstfrom = new Text();
        final FsShell shell = new FsShell(conf);
        final String[] shellargs = { "-rmr", null };

        boolean hasnext = dstin.next(dstpath, dstfrom);
        for (; lsrin.next(lsrpath, lsrstatus);) {
            int dst_cmp_lsr = dstpath.compareTo(lsrpath);
            for (; hasnext && dst_cmp_lsr < 0;) {
                hasnext = dstin.next(dstpath, dstfrom);
                dst_cmp_lsr = dstpath.compareTo(lsrpath);
            }

            if (dst_cmp_lsr == 0) {
                //lsrpath exists in dst, skip it
                hasnext = dstin.next(dstpath, dstfrom);
            } else {
                //lsrpath does not exist, delete it
                String s = new Path(dstroot.getPath(), lsrpath.toString()).toString();
                if (shellargs[1] == null || !isAncestorPath(shellargs[1], s)) {
                    shellargs[1] = s;
                    int r = 0;
                    try {
                        r = shell.run(shellargs);
                    } catch (Exception e) {
                        throw new IOException("Exception from shell.", e);
                    }
                    if (r != 0) {
                        throw new IOException(
                                "\"" + shellargs[0] + " " + shellargs[1] + "\" returns non-zero value " + r);
                    }
                }
            }
        }
    } finally {
        checkAndClose(lsrin);
        checkAndClose(dstin);
    }
}

From source file:com.pinterest.secor.uploader.Uploader.java

License:Apache License

/**
 * This method is intended to be overwritten in tests.
 *///from  w w  w  . j  a  v a  2  s .  c o  m
protected SequenceFile.Reader createReader(FileSystem fileSystem, Path path, Configuration configuration)
        throws IOException {
    return new SequenceFile.Reader(fileSystem, path, configuration);
}

From source file:crunch.MaxTemperature.java

License:Apache License

public static void main(String[] args) throws Exception {
        String mapUri = args[0];// w  w w  .  j a  va  2s  . c  o m

        Configuration conf = new Configuration();

        FileSystem fs = FileSystem.get(URI.create(mapUri), conf);
        Path map = new Path(mapUri);
        Path mapData = new Path(map, MapFile.DATA_FILE_NAME);

        // Get key and value types from data sequence file
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, mapData, conf);
        Class keyClass = reader.getKeyClass();
        Class valueClass = reader.getValueClass();
        reader.close();

        // Create the map file index file
        long entries = MapFile.fix(fs, map, keyClass, valueClass, false, conf);
        System.out.printf("Created MapFile %s with %d entries\n", map, entries);
    }

From source file:crunch.MaxTemperature.java

License:Apache License

public static void main(String[] args) throws IOException {
        String uri = args[0];//from  w w w .  ja  va  2  s . co m
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.get(URI.create(uri), conf);
        Path path = new Path(uri);

        SequenceFile.Reader reader = null;
        try {
            reader = new SequenceFile.Reader(fs, path, conf);
            Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
            Writable value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf);
            long position = reader.getPosition();
            while (reader.next(key, value)) {
                String syncSeen = reader.syncSeen() ? "*" : "";
                System.out.printf("[%s%s]\t%s\t%s\n", position, syncSeen, key, value);
                position = reader.getPosition(); // beginning of next record
            }
        } finally {
            IOUtils.closeStream(reader);
        }
    }

From source file:crunch.MaxTemperature.java

License:Apache License

@Before
    public void setUp() throws IOException {
        SequenceFileWriteDemo.main(new String[] { SF_URI });

        Configuration conf = new Configuration();
        fs = FileSystem.get(URI.create(SF_URI), conf);
        Path path = new Path(SF_URI);

        reader = new SequenceFile.Reader(fs, path, conf);
        key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
        value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf);
    }/*from  www.j  a va 2  s .  c  om*/

From source file:dk.aau.cs.cloudetl.io.SequenceIndexFileReader.java

License:Apache License

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
    FileSplit fileSplit = (FileSplit) split;

    conf = context.getConfiguration();//from w  w w .j av a  2  s .  c  om
    Path path = fileSplit.getPath();
    FileSystem fs = path.getFileSystem(conf);
    this.in = new SequenceFile.Reader(fs, path, conf);
    this.end = fileSplit.getStart() + fileSplit.getLength();

    if (fileSplit.getStart() > in.getPosition()) {
        in.sync(fileSplit.getStart()); // sync to start
    }

    this.start = in.getPosition();
    more = start < end;
}