Example usage for org.apache.hadoop.fs Path suffix

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path suffix.

Prototype

public Path suffix(String suffix)

Source Link

Document

Adds a suffix to the final name in the path.

Usage

From source file:org.apache.mahout.text.TestSequenceFilesFromDirectory.java

License:Apache License

private static void checkMRResultFilesRecursive(Configuration configuration, Path outputDir, String[][] data,
        String prefix) throws IOException {
    FileSystem fs = FileSystem.get(configuration);

    // output exists?
    FileStatus[] fileStatuses = fs.listStatus(outputDir.suffix("/part-m-00000"), PathFilters.logsCRCFilter());
    assertEquals(1, fileStatuses.length); // only one
    assertEquals("part-m-00000", fileStatuses[0].getPath().getName());
    Map<String, String> fileToData = Maps.newHashMap();
    String currentPath = prefix;/*from   ww w  .j a  va  2s .  c  om*/

    for (String[] aData : data) {
        currentPath += Path.SEPARATOR + aData[0];
        fileToData.put(currentPath + Path.SEPARATOR + "file.txt", aData[1]);
    }

    // read a chunk to check content
    SequenceFileIterator<Text, Text> iterator = new SequenceFileIterator<Text, Text>(fileStatuses[0].getPath(),
            true, configuration);
    try {
        while (iterator.hasNext()) {
            Pair<Text, Text> record = iterator.next();
            System.out.printf("MR-Recur > Trying to check: %s\n", record.getFirst().toString().trim());
            String retrievedData = fileToData.get(record.getFirst().toString().trim());
            assertNotNull(retrievedData);
            assertEquals(retrievedData, record.getSecond().toString().trim());
        }
    } finally {
        Closeables.close(iterator, true);
    }
}

From source file:org.apache.mrql.Evaluator.java

License:Apache License

/** dump MRQL data into a sequence file */
public void dump(String file, Tree type, MRData data) throws Exception {
    Path path = new Path(file);
    FileSystem fs = path.getFileSystem(Plan.conf);
    PrintStream ftp = new PrintStream(fs.create(path.suffix(".type")));
    ftp.print("2@" + type.toString() + "\n");
    ftp.close();//w  w  w .  j av  a2  s  .com
    SequenceFile.Writer writer = new SequenceFile.Writer(fs, Plan.conf, path, MRContainer.class,
            MRContainer.class);
    if (data instanceof MR_dataset)
        data = Plan.collect(((MR_dataset) data).dataset());
    if (data instanceof Bag) {
        Bag s = (Bag) data;
        long i = 0;
        for (MRData e : s) {
            counter_key.set(i++);
            value_container.set(e);
            writer.append(counter_container, value_container);
        }
    } else {
        counter_key.set(0);
        value_container.set(data);
        writer.append(counter_container, value_container);
    }
    ;
    writer.close();
}

From source file:org.apache.mrql.Evaluator.java

License:Apache License

/** for dumped data to a file, return the MRQL type of the data */
public Tree get_type(String file) {
    try {/*w w  w  .j a v a 2 s. co  m*/
        Path path = new Path(file);
        FileSystem fs = path.getFileSystem(Plan.conf);
        BufferedReader ftp = new BufferedReader(new InputStreamReader(fs.open(path.suffix(".type"))));
        String s[] = ftp.readLine().split("@");
        ftp.close();
        if (s.length != 2)
            return null;
        if (!s[0].equals("2"))
            throw new Error("The binary file has been created in java mode and cannot be read in hadoop mode");
        return Tree.parse(s[1]);
    } catch (Exception e) {
        return null;
    }
}

From source file:org.apache.tajo.storage.TestCompressionStorages.java

License:Apache License

private void storageCompressionTest(StoreType storeType, Class<? extends CompressionCodec> codec)
        throws IOException {
    Schema schema = new Schema();
    schema.addColumn("id", Type.INT4);
    schema.addColumn("age", Type.FLOAT4);
    schema.addColumn("name", Type.TEXT);

    TableMeta meta = CatalogUtil.newTableMeta(storeType);
    meta.putOption("compression.codec", codec.getCanonicalName());
    meta.putOption("compression.type", SequenceFile.CompressionType.BLOCK.name());
    meta.putOption("rcfile.serde", TextSerializerDeserializer.class.getName());
    meta.putOption("sequencefile.serde", TextSerializerDeserializer.class.getName());

    String fileName = "Compression_" + codec.getSimpleName();
    Path tablePath = new Path(testDir, fileName);
    Appender appender = ((FileStorageManager) StorageManager.getFileStorageManager(conf)).getAppender(meta,
            schema, tablePath);//  w  w  w .j a  v  a 2s  . c om
    appender.enableStats();

    appender.init();

    String extension = "";
    if (appender instanceof CSVFile.CSVAppender) {
        extension = ((CSVFile.CSVAppender) appender).getExtension();
    } else if (appender instanceof DelimitedTextFile.DelimitedTextFileAppender) {
        extension = ((DelimitedTextFile.DelimitedTextFileAppender) appender).getExtension();
    }

    int tupleNum = 100000;
    VTuple vTuple;

    for (int i = 0; i < tupleNum; i++) {
        vTuple = new VTuple(3);
        vTuple.put(0, DatumFactory.createInt4(i + 1));
        vTuple.put(1, DatumFactory.createFloat4((float) i));
        vTuple.put(2, DatumFactory.createText(String.valueOf(i)));
        appender.addTuple(vTuple);
    }
    appender.close();

    TableStats stat = appender.getStats();
    assertEquals(tupleNum, stat.getNumRows().longValue());
    tablePath = tablePath.suffix(extension);
    FileStatus status = fs.getFileStatus(tablePath);
    long fileLen = status.getLen();
    FileFragment[] tablets = new FileFragment[1];
    tablets[0] = new FileFragment(fileName, tablePath, 0, fileLen);

    Scanner scanner = StorageManager.getFileStorageManager(conf).getScanner(meta, schema, tablets[0], schema);

    if (StoreType.CSV == storeType) {
        if (SplittableCompressionCodec.class.isAssignableFrom(codec)) {
            assertTrue(scanner.isSplittable());
        } else {
            assertFalse(scanner.isSplittable());
        }
    }
    scanner.init();

    if (storeType == StoreType.SEQUENCEFILE) {
        assertTrue(scanner instanceof SequenceFileScanner);
        Writable key = ((SequenceFileScanner) scanner).getKey();
        assertEquals(key.getClass().getCanonicalName(), LongWritable.class.getCanonicalName());
    }

    int tupleCnt = 0;
    Tuple tuple;
    while ((tuple = scanner.next()) != null) {
        tupleCnt++;
    }
    scanner.close();
    assertEquals(tupleNum, tupleCnt);
    assertNotSame(appender.getStats().getNumBytes().longValue(),
            scanner.getInputStats().getNumBytes().longValue());
    assertEquals(appender.getStats().getNumRows().longValue(),
            scanner.getInputStats().getNumRows().longValue());
}

From source file:org.apache.tajo.storage.TestLineReader.java

License:Apache License

@Test
public void testLineDelimitedReader() throws IOException {
    TajoConf conf = new TajoConf();
    Path testDir = CommonTestingUtil.getTestDir(TEST_PATH);
    FileSystem fs = testDir.getFileSystem(conf);

    Schema schema = new Schema();
    schema.addColumn("id", Type.INT4);
    schema.addColumn("age", Type.INT8);
    schema.addColumn("comment", Type.TEXT);
    schema.addColumn("comment2", Type.TEXT);

    TableMeta meta = CatalogUtil.newTableMeta(StoreType.TEXTFILE);
    meta.putOption("compression.codec", DeflateCodec.class.getCanonicalName());

    Path tablePath = new Path(testDir, "line1." + DeflateCodec.class.getSimpleName());
    FileAppender appender = (FileAppender) StorageManager.getFileStorageManager(conf).getAppender(null, null,
            meta, schema, tablePath);// ww  w  .j a va  2s.c  o m
    appender.enableStats();
    appender.init();
    int tupleNum = 10000;
    VTuple vTuple;

    long splitOffset = 0;
    for (int i = 0; i < tupleNum; i++) {
        vTuple = new VTuple(4);
        vTuple.put(0, DatumFactory.createInt4(i + 1));
        vTuple.put(1, DatumFactory.createInt8(25l));
        vTuple.put(2, DatumFactory.createText("emiya muljomdao"));
        vTuple.put(3, NullDatum.get());
        appender.addTuple(vTuple);

        if (i == (tupleNum / 2)) {
            splitOffset = appender.getOffset();
        }
    }
    String extension = ((DelimitedTextFile.DelimitedTextFileAppender) appender).getExtension();
    appender.close();

    tablePath = tablePath.suffix(extension);
    FileFragment fragment = new FileFragment("table", tablePath, 0, splitOffset);
    DelimitedLineReader reader = new DelimitedLineReader(conf, fragment); // if file is compressed, will read to EOF
    assertTrue(reader.isCompressed());
    assertFalse(reader.isReadable());
    reader.init();
    assertTrue(reader.isReadable());

    int i = 0;
    while (reader.isReadable()) {
        ByteBuf buf = reader.readLine();
        if (buf == null)
            break;
        i++;
    }

    IOUtils.cleanup(null, reader, fs);
    assertEquals(tupleNum, i);

}

From source file:org.apache.tajo.storage.v2.TestCSVCompression.java

License:Apache License

public void testSplitCompressionData() throws IOException {

    Schema schema = new Schema();
    schema.addColumn("id", TajoDataTypes.Type.INT4);
    schema.addColumn("age", TajoDataTypes.Type.INT8);

    TableMeta meta = CatalogUtil.newTableMeta(CatalogProtos.StoreType.CSV);
    meta.putOption("compression.codec", BZip2Codec.class.getCanonicalName());

    Path tablePath = new Path(testDir, "SplitCompression");
    Appender appender = StorageManagerFactory.getStorageManager(conf).getAppender(meta, schema, tablePath);
    appender.enableStats();/* w  w w  .  java 2 s . com*/

    appender.init();

    String extention = "";
    if (appender instanceof CSVFile.CSVAppender) {
        extention = ((CSVFile.CSVAppender) appender).getExtension();
    }

    int tupleNum = 100000;
    VTuple vTuple;

    for (int i = 0; i < tupleNum; i++) {
        vTuple = new VTuple(2);
        vTuple.put(0, DatumFactory.createInt4(i + 1));
        vTuple.put(1, DatumFactory.createInt8(25l));
        appender.addTuple(vTuple);
    }
    appender.close();

    TableStats stat = appender.getStats();
    assertEquals(tupleNum, stat.getNumRows().longValue());
    tablePath = tablePath.suffix(extention);

    FileStatus status = fs.getFileStatus(tablePath);
    long fileLen = status.getLen();
    long randomNum = (long) (Math.random() * fileLen) + 1;

    FileFragment[] tablets = new FileFragment[2];
    tablets[0] = new FileFragment("SplitCompression", tablePath, 0, randomNum);
    tablets[1] = new FileFragment("SplitCompression", tablePath, randomNum, (fileLen - randomNum));

    Scanner scanner = StorageManagerFactory.getStorageManager(conf).getScanner(meta, schema, tablets[0],
            schema);
    scanner.init();
    int tupleCnt = 0;
    Tuple tuple;
    while ((tuple = scanner.next()) != null) {
        tupleCnt++;
    }
    scanner.close();

    scanner = StorageManagerFactory.getStorageManager(conf).getScanner(meta, schema, tablets[1], schema);
    scanner.init();
    while ((tuple = scanner.next()) != null) {
        tupleCnt++;
    }

    scanner.close();
    assertEquals(tupleNum, tupleCnt);
}

From source file:org.apache.tajo.storage.v2.TestCSVCompression.java

License:Apache License

private void storageCompressionTest(CatalogProtos.StoreType storeType, Class<? extends CompressionCodec> codec)
        throws IOException {
    Schema schema = new Schema();
    schema.addColumn("id", TajoDataTypes.Type.INT4);
    schema.addColumn("age", TajoDataTypes.Type.INT8);

    TableMeta meta = CatalogUtil.newTableMeta(storeType);
    meta.putOption("compression.codec", codec.getCanonicalName());

    String fileName = "Compression_" + codec.getSimpleName();
    Path tablePath = new Path(testDir, fileName);
    Appender appender = StorageManagerFactory.getStorageManager(conf).getAppender(meta, schema, tablePath);
    appender.enableStats();//from w ww  . j ava 2 s.c o  m

    appender.init();

    String extension = "";
    if (appender instanceof CSVFile.CSVAppender) {
        extension = ((CSVFile.CSVAppender) appender).getExtension();
    }

    int tupleNum = 10000;
    VTuple vTuple;

    for (int i = 0; i < tupleNum; i++) {
        vTuple = new VTuple(2);
        vTuple.put(0, DatumFactory.createInt4(i + 1));
        vTuple.put(1, DatumFactory.createInt8(25l));
        appender.addTuple(vTuple);
    }
    appender.close();

    TableStats stat = appender.getStats();
    assertEquals(tupleNum, stat.getNumRows().longValue());
    tablePath = tablePath.suffix(extension);
    FileStatus status = fs.getFileStatus(tablePath);
    long fileLen = status.getLen();
    FileFragment[] tablets = new FileFragment[1];
    tablets[0] = new FileFragment(fileName, tablePath, 0, fileLen);

    Scanner scanner = StorageManagerFactory.getStorageManager(conf).getScanner(meta, schema, tablets[0],
            schema);
    scanner.init();
    int tupleCnt = 0;
    while (scanner.next() != null) {
        tupleCnt++;
    }
    scanner.close();
    assertEquals(tupleCnt, tupleNum);
}

From source file:org.apache.tez.runtime.library.common.shuffle.orderedgrouped.MapOutput.java

License:Apache License

public static MapOutput createDiskMapOutput(InputAttemptIdentifier attemptIdentifier, MergeManager merger,
        long size, Configuration conf, int fetcher, boolean primaryMapOutput, TezTaskOutputFiles mapOutputFile)
        throws IOException {
    FileSystem fs = FileSystem.getLocal(conf);
    Path outputpath = mapOutputFile.getInputFileForWrite(attemptIdentifier.getInputIdentifier().getInputIndex(),
            size);/* w  w w  .  j a  va 2 s . com*/
    // Files are not clobbered due to the id being appended to the outputPath in the tmpPath,
    // otherwise fetches for the same task but from different attempts would clobber each other.
    Path tmpOuputPath = outputpath.suffix(String.valueOf(fetcher));
    long offset = 0;

    MapOutput mapOutput = new MapOutput(Type.DISK, attemptIdentifier, merger, size, outputpath, offset,
            primaryMapOutput, fs, tmpOuputPath);
    mapOutput.disk = mapOutput.localFS.create(tmpOuputPath);

    return mapOutput;
}

From source file:org.seqdoop.hadoop_bam.cli.plugins.View.java

License:Open Source License

@Override
protected int run(CmdLineParser parser) {

    final List<String> args = parser.getRemainingArgs();
    if (args.isEmpty()) {
        System.err.println("view :: PATH not given.");
        return 3;
    }//from ww  w  . j a  va 2 s.c  om

    Utils.toStringency(parser.getOptionValue(stringencyOpt, ValidationStringency.DEFAULT_STRINGENCY.toString()),
            "view");

    final String path = args.get(0);
    final List<String> regions = args.subList(1, args.size());

    final boolean headerOnly = parser.getBoolean(headerOnlyOpt);

    final SAMFileReader reader;

    try {
        final Path p = new Path(path);

        SeekableStream idx;
        try {
            idx = WrapSeekable.openPath(getConf(), p.suffix(".bai"));
        } catch (Exception e) {
            idx = null;
        }

        final SeekableStream sam = WrapSeekable.openPath(getConf(), p);

        reader = idx == null ? new SAMFileReader(sam, false) : new SAMFileReader(sam, idx, false);
    } catch (Exception e) {
        System.err.printf("view :: Could not open '%s': %s\n", path, e.getMessage());
        return 4;
    }

    reader.setValidationStringency(ValidationStringency.SILENT);

    final SAMFileHeader header;

    try {
        header = reader.getFileHeader();
    } catch (SAMFormatException e) {
        System.err.printf("view :: Could not parse '%s': %s\n", path, e.getMessage());
        return 4;
    }

    final String fmt = (String) parser.getOptionValue(formatOpt);

    final SAMFormat format = fmt == null ? SAMFormat.SAM : SAMFormat.valueOf(fmt.toUpperCase(Locale.ENGLISH));

    final SAMFileWriterImpl writer;
    switch (format) {
    case BAM:
        // BAM output inside view no longer supported since Picard made the class private
        System.err.println("BAM output inside view no longer supported");
        return 1;
    case SAM:
        writer = new SAMTextWriter(System.out);
        break;
    default:
        writer = null;
        assert false;
    }

    writer.setSortOrder(header.getSortOrder(), true);
    writer.setHeader(header);

    if (regions.isEmpty() || headerOnly) {
        if (!headerOnly)
            if (!writeIterator(writer, reader.iterator(), path))
                return 4;

        writer.close();
        return 0;
    }

    if (!reader.isBinary()) {
        System.err.println("view :: Cannot output regions from SAM file");
        return 4;
    }

    if (!reader.hasIndex()) {
        System.err.println("view :: Cannot output regions from BAM file lacking an index");
        return 4;
    }

    reader.enableIndexCaching(true);

    boolean errors = false;

    for (final String region : regions) {
        final StringTokenizer st = new StringTokenizer(region, ":-");
        final String refStr = st.nextToken();
        final int beg, end;

        if (st.hasMoreTokens()) {
            beg = parseCoordinate(st.nextToken());
            end = st.hasMoreTokens() ? parseCoordinate(st.nextToken()) : -1;

            if (beg < 0 || end < 0) {
                errors = true;
                continue;
            }
            if (end < beg) {
                System.err.printf("view :: Invalid range, cannot end before start: '%d-%d'\n", beg, end);
                errors = true;
                continue;
            }
        } else
            beg = end = 0;

        SAMSequenceRecord ref = header.getSequence(refStr);
        if (ref == null)
            try {
                ref = header.getSequence(Integer.parseInt(refStr));
            } catch (NumberFormatException e) {
            }

        if (ref == null) {
            System.err.printf("view :: Not a valid sequence name or index: '%s'\n", refStr);
            errors = true;
            continue;
        }

        final SAMRecordIterator it = reader.queryOverlapping(ref.getSequenceName(), beg, end);

        if (!writeIterator(writer, it, path))
            return 4;
    }
    writer.close();
    return errors ? 5 : 0;
}

From source file:org.springframework.data.hadoop.store.strategy.naming.CodecFileNamingStrategy.java

License:Apache License

@Override
public Path resolve(Path path) {
    CodecInfo c = getCodecInfo();/*from www . j av a2  s  . co  m*/
    String suffix = c != null ? "." + c.getDefaultSuffix() : "";
    if (path != null) {
        return path.suffix(suffix);
    } else if (StringUtils.hasText(suffix)) {
        return new Path(suffix);
    } else {
        return path;
    }
}