Example usage for org.apache.hadoop.io Text toString

List of usage examples for org.apache.hadoop.io Text toString

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text toString.

Prototype

@Override
public String toString() 

Source Link

Document

Convert text back to string

Usage

From source file:com.ibm.bi.dml.udf.lib.RemoveEmptyRows.java

License:Open Source License

@Override
public void execute() {
    Matrix mat = (Matrix) this.getFunctionInput(0);
    String fnameOld = mat.getFilePath();

    HashMap<Long, Long> keyMap = new HashMap<Long, Long>(); //old,new rowID

    try {/*from  w  ww  . ja v a2 s.c  o m*/
        //prepare input
        JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
        Path path = new Path(fnameOld);
        FileSystem fs = FileSystem.get(job);
        if (!fs.exists(path))
            throw new IOException("File " + fnameOld + " does not exist on HDFS.");
        FileInputFormat.addInputPath(job, path);
        TextInputFormat informat = new TextInputFormat();
        informat.configure(job);

        //prepare output
        String fnameNew = createOutputFilePathAndName(OUTPUT_FILE);
        DataOutputStream ostream = MapReduceTool.getHDFSDataOutputStream(fnameNew, true);

        //read and write if necessary
        InputSplit[] splits = informat.getSplits(job, 1);

        LongWritable key = new LongWritable();
        Text value = new Text();
        long ID = 1;

        try {
            //for obj reuse and preventing repeated buffer re-allocations
            StringBuilder sb = new StringBuilder();

            for (InputSplit split : splits) {
                RecordReader<LongWritable, Text> reader = informat.getRecordReader(split, job, Reporter.NULL);
                try {
                    while (reader.next(key, value)) {
                        String cellStr = value.toString().trim();
                        StringTokenizer st = new StringTokenizer(cellStr, " ");
                        long row = Integer.parseInt(st.nextToken());
                        long col = Integer.parseInt(st.nextToken());
                        double lvalue = Double.parseDouble(st.nextToken());

                        if (!keyMap.containsKey(row))
                            keyMap.put(row, ID++);
                        long rowNew = keyMap.get(row);

                        sb.append(rowNew);
                        sb.append(' ');
                        sb.append(col);
                        sb.append(' ');
                        sb.append(lvalue);
                        sb.append('\n');

                        ostream.writeBytes(sb.toString());
                        sb.setLength(0);
                    }
                } finally {
                    if (reader != null)
                        reader.close();
                }
            }

            _ret = new Matrix(fnameNew, keyMap.size(), mat.getNumCols(), ValueType.Double);
        } finally {
            if (ostream != null)
                ostream.close();
        }
    } catch (Exception ex) {
        throw new RuntimeException("Unable to execute external function.", ex);
    }
}

From source file:com.ibm.db2j.AccumuloVTI.java

License:Open Source License

/**
 * GaianDB extract rows by calling this method repeatedly.
 * 'dvdRecord' contains the number of columns resolved in tableShapeRSMD.
 * However we only need to populate the projected columns indexes.
 */// w  ww  . jav  a2 s  .  com
@Override
public int nextRow(final DataValueDescriptor[] dvdRecord) throws StandardException, SQLException {

    //      logger.logDetail("Getting new relational record based on set of Accumulo rows. rowCount = " + rowCount +
    //            ", currenAccumuloRow: " + currentAccumuloRow );

    if (0 == rowCount) {
        numRowsReceivedFromAccumulo = 0;
        if (0 == projectedColumns.length || false == rowScanIterator.hasNext())
            return IFastPath.SCAN_COMPLETED; // empty table
        else
            currentAccumuloRow = rowScanIterator.next(); // kick-start row extraction
    }

    // Check if there are any Accumulo records left...
    if (null == currentAccumuloRow)
        return IFastPath.SCAN_COMPLETED;

    Key key = currentAccumuloRow.getKey(); // lots of info available off the Key: rowID, col name/family, col qualifier, visibility, timestamp
    Text rowID = key.getRow();

    // Look for a new record... until one is found that meets qualifiers, or until none are left
    do {
        // Check if there are any Accumulo records left...
        if (null == currentAccumuloRow)
            return IFastPath.SCAN_COMPLETED;

        numRowsReceivedFromAccumulo++;

        // Set rowID column before extracting others associated with it in the while loop
        if (1 == rowidColShift)
            dvdRecord[0].setValue(rowID.toString());

        // Initialise column cells to NULL value.
        for (int i = rowidColShift; i < projectedColumns.length; i++)
            dvdRecord[projectedColumns[i] - 1].setToNull();

        // Extract columns from Accumulo records for this rowID - note: Accumulo rows don't have to be complete
        Text previousRowID = rowID;
        while (rowID.equals(previousRowID)) {

            final String colName = key.getColumnFamily().toString();
            final Integer pColID = projectedColumnsNameToIndexMap.get(colName);
            if (null == pColID) {
                logger.logImportant(
                        "Encountered Accumulo column which was not requested as column family (skipped): "
                                + colName);
                continue; // this column was not requested - should not happen
            }

            // Log info about the newly found column
            final String cellStringValue = isExtractAccumuloColumnQualifiersInPlaceOfValues
                    ? currentAccumuloRow.getKey().getColumnQualifier().toString()
                    : currentAccumuloRow.getValue().toString();
            //            logger.logDetail("Setting ProjectedColID: " + pColID +
            //                  ", from record with Key: " + key + " ==> ColFamily: " + key.getColumnFamily()
            //                  + ( isExtractAccumuloColumnQualifiersInPlaceOfValues ? ", ColQualifier: " : ", Value: " ) + cellStringValue );

            // Set column value for the row - this also does type conversion.
            dvdRecord[pColID - 1].setValue(cellStringValue); // normalise to 0-based

            // Scroll to the next column - break if we run out of records (rows don't have to be complete)
            if (false == rowScanIterator.hasNext()) {
                currentAccumuloRow = null;
                break;
            }
            currentAccumuloRow = rowScanIterator.next();
            key = currentAccumuloRow.getKey();
            previousRowID = rowID;
            rowID = key.getRow();
        }

    } while (null != qualifiers && false == RowsFilter.testQualifiers(dvdRecord, qualifiers));

    rowCount++;
    return IFastPath.GOT_ROW;
}

From source file:com.ibm.jaql.io.hadoop.converter.FromJsonTextConverter.java

License:Apache License

@Override
protected ToJson<Text> createValueConverter() {
    return new ToJson<Text>() {
        JsonParser parser = new JsonParser();

        public JsonValue convert(Text src, JsonValue tgt) {
            if (src == null) {
                return null;
            }//from  w  w w  .  j  av  a  2  s.  c om

            try {
                parser.ReInit(new StringReader(src.toString()));
                JsonValue value = parser.JsonVal();
                return value;
            } catch (ParseException pe) {
                throw new RuntimeException(pe);
            }
        }

        public JsonValue createTarget() {
            return null;
        }

        public Schema getSchema() {
            return SchemaFactory.anySchema();
        }
    };
}

From source file:com.ibm.jaql.io.hadoop.FromLinesConverter.java

License:Apache License

/** Converts the given line into a JSON value. */
@Override/*from   ww  w .j a va 2  s. c  o  m*/
public JsonValue convert(LongWritable key, Text value, JsonValue target) {
    String text = value.toString();
    if (text.equals(nullString)) {
        if (converter.isNullable()) {
            return null;
        } else {
            throw new RuntimeException("found null value, expected " + converter.getType());
        }
    }
    target = converter.convert(new JsonString(value.toString()), target);
    return target;
}

From source file:com.ibm.spss.hive.serde2.xml.XmlSerDe.java

License:Open Source License

/**
 * @see org.apache.hadoop.hive.serde2.Deserializer#deserialize(org.apache.hadoop.io.Writable)
 *//*  ww  w  .  ja  va 2s  . com*/
@Override
public Object deserialize(Writable writable) throws SerDeException {
    Text text = (Text) writable;
    if (text == null || text.getLength() == 0) {
        return (Object) null;
    }
    try {
        return this.xmlProcessor.parse(text.toString());
    } catch (Exception e) {
        throw new SerDeException(e);
    }
}

From source file:com.ifeng.hadoop.thinker.LogMapper.java

License:Apache License

@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)
        throws IOException, InterruptedException {

    try {/*from   ww  w. j av a2s . c om*/
        if (value != null && value.toString().trim().length() > 0) {
            String line = value.toString();
            String[] items = line.split("\\s+");
            if (items.length == 3) {
                if (items[0] != null && items[1] != null && items[2] != null) {
                    String name = items[0];
                    String swift = items[1].trim();
                    String origin = items[2].trim();

                    if (swift.split(":").length == 2 && origin.split(":").length == 2) {
                        swift = swift.split(":")[1];
                        origin = origin.split(":")[1];

                        long swfitSize = Long.parseLong(swift);
                        long originSize = Long.parseLong(origin);

                        LogModel model = new LogModel(name, swfitSize, originSize, (swfitSize - originSize));
                        context.write(new Text(name), new Text(model.toString()));
                    } else {
                        log.info("Invalid line: ", value.toString());
                    }
                }

            } else {

                log.info("Invalid line: ", value.toString());
            }

        }
    } catch (NumberFormatException e) {
        log.error(e.getMessage(), e);
    }
}

From source file:com.ifeng.ipserver.IPServerLogParseMapper.java

License:Apache License

@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)
        throws IOException, InterruptedException {

    String line = value.toString();

    if (line.contains("The node 59.53.89.94") || line.contains("http://59.53.89.94")) {
        log.info("Matched: {}", line);
        context.write(new Text(key.toString()), value);
    }/*from w ww.j a  va  2s. com*/
}

From source file:com.ifeng.logparser.NginxLogMapper.java

License:Apache License

@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)
        throws IOException, InterruptedException {

    //log.info("Key = {} -- value = {}", key, value);
    String line = value.toString();
    String[] logs = line.split(" - - ");

    if (logs.length == 2) {
        String seria = "\"NULL\"";
        String ip = logs[0];/*from  w w  w . ja  v a 2s. c om*/
        String factors = logs[1];

        //String[] msgs = factors.split("\"-\"");
        //String[] msgs = factors.split("\\\"[^\"]*\""); //\"[^"]*"

        Pattern pattern = Pattern.compile("\"[^\"]*\"");

        Matcher matcher = pattern.matcher(factors);

        matcher.find();
        matcher.find();

        if (matcher.find()) {
            seria = matcher.group().replaceAll("\"", "");
            log.info("Seria: {}", seria);
        }

        //log.info("length: {} == {}", msgs.length, msgs);

        /*if(msgs != null && msgs.length > 1){
           seria = msgs[2].trim();
        }*/

        seria = (seria.length() > 0 && (!seria.equals("-"))) ? seria : "NULL";
        for (int i = 0; i < (15 - ip.length()); i++) {
            ip += " ";
        }

        context.write(new Text(ip), new Text("\t" + seria));

        log.info("IP: {}, Mathine: {}", ip, seria);
    }

}

From source file:com.ifeng.sorter.LogSortMapper.java

License:Apache License

@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)
        throws IOException, InterruptedException {
    log.info("Value: {}", value.toString());

}

From source file:com.ifeng.vdn.iparea.parser.IPAreaMapper.java

License:Apache License

@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)
        throws IOException, InterruptedException {

    log.info("Key: {}, Value: {}", key, value);

    if (value != null) {
        String[] items = value.toString().split("\\|");
        String start = items[0];/*w  w  w  .  ja  v a2  s .  c o m*/
        String end = items[1];

        List<String> ips = IPV4Handler.getAllFromRange(start, end);

        for (String ip : ips) {
            StringBuilder sb = new StringBuilder();
            //sb.append("\t");

            sb.append(items[2]);
            sb.append("\t");

            sb.append(items[3]);
            sb.append("\t");

            sb.append(items[4]);

            context.write(new Text(ip), new Text(sb.toString()));
        }
    }

}