Example usage for org.apache.hadoop.io MapWritable get

Introduction

In this page you can find the example usage for org.apache.hadoop.io MapWritable get.

Prototype

@Override
    public Writable get(Object key)

Source Link

Usage

From source file:org.apache.mahout.classifier.sequencelearning.hmm.hadoop.BaumWelchReducer.java

License:Apache License

@Override
protected void reduce(Text key, Iterable<MapWritable> stripes, Context context)
        throws IOException, InterruptedException {

    MapWritable sumOfStripes = new MapWritable();

    // Finish the Expectation Step by aggregating all posterior probabilities for one key
    if (scaling.equals("logscaling")) {
        double totalValSum = Double.NEGATIVE_INFINITY;
        for (MapWritable stripe : stripes) {
            for (Map.Entry e : stripe.entrySet()) {
                double val = ((DoubleWritable) e.getValue()).get();
                double max = totalValSum > val ? totalValSum : val;
                totalValSum = max + Math.log(Math.exp(totalValSum - max) + Math.exp(val - max));
                if (!sumOfStripes.containsKey(e.getKey())) {
                    sumOfStripes.put((IntWritable) e.getKey(), new DoubleWritable(val));
                } else {
                    double sumSripesVal = ((DoubleWritable) sumOfStripes.get(e.getKey())).get();
                    if (sumSripesVal > Double.NEGATIVE_INFINITY) {
                        val = val + Math.log(1 + Math.exp(sumSripesVal - val));
                    }// ww  w.j a v  a  2s  . c om
                    sumOfStripes.put((IntWritable) e.getKey(), new DoubleWritable(val));
                }
            }
        }

        //normalize the aggregate
        for (Map.Entry e : sumOfStripes.entrySet()) {
            double val = ((DoubleWritable) e.getValue()).get();
            if (totalValSum > Double.NEGATIVE_INFINITY) {
                val = val - totalValSum;
            }
            sumOfStripes.put((IntWritable) e.getKey(), new DoubleWritable(Math.exp(val)));
        }
    } else if (scaling.equals("rescaling")) {
        double totalValSum = 0.0;

        for (MapWritable stripe : stripes) {
            for (Map.Entry e : stripe.entrySet()) {
                if (key.charAt(0) == (int) 'I') {
                    double val = ((DoubleWritable) e.getValue()).get();
                    totalValSum += val;
                    if (!sumOfStripes.containsKey(e.getKey())) {
                        sumOfStripes.put((IntWritable) e.getKey(), (DoubleWritable) e.getValue());
                    } else {
                        val += ((DoubleWritable) sumOfStripes.get(e.getKey())).get();
                        sumOfStripes.put((IntWritable) e.getKey(), new DoubleWritable(val));
                    }
                } else {
                    double[] pr = BaumWelchUtils.toDoublePair(((BytesWritable) e.getValue()).getBytes());
                    double num = pr[0];
                    double denom = pr[1];
                    if (!sumOfStripes.containsKey(e.getKey())) {
                        sumOfStripes.put((IntWritable) e.getKey(), (BytesWritable) e.getValue());
                    } else {
                        double[] pr1 = BaumWelchUtils
                                .toDoublePair(((BytesWritable) sumOfStripes.get(e.getKey())).getBytes());
                        num += pr1[0];
                        denom += pr1[1];
                        byte[] doublePair1 = BaumWelchUtils.doublePairToByteArray(num, denom);
                        sumOfStripes.put((IntWritable) e.getKey(), new BytesWritable(doublePair1));
                    }
                }
            }
        }

        if (key.charAt(0) == (int) 'I') {
            //normalize the aggregate
            for (Map.Entry e : sumOfStripes.entrySet()) {
                double val = ((DoubleWritable) e.getValue()).get();
                if (totalValSum > 0) {
                    val /= totalValSum;
                }
                sumOfStripes.put((IntWritable) e.getKey(), new DoubleWritable(val));
            }

        } else {
            // compute the probabilities
            for (Map.Entry e : sumOfStripes.entrySet()) {
                double[] pr1 = BaumWelchUtils
                        .toDoublePair(((BytesWritable) sumOfStripes.get(e.getKey())).getBytes());
                sumOfStripes.put((IntWritable) e.getKey(), new DoubleWritable(pr1[0] / pr1[1]));
            }
        }
    } else {
        double totalValSum = 0.0;

        for (MapWritable stripe : stripes) {
            for (Map.Entry e : stripe.entrySet()) {
                int state = ((IntWritable) e.getKey()).get();
                double val = ((DoubleWritable) e.getValue()).get();
                totalValSum += val;
                if (!sumOfStripes.containsKey(e.getKey())) {
                    sumOfStripes.put((IntWritable) e.getKey(), (DoubleWritable) e.getValue());
                } else {
                    val += ((DoubleWritable) sumOfStripes.get(e.getKey())).get();
                    sumOfStripes.put((IntWritable) e.getKey(), new DoubleWritable(val));
                }
            }
        }

        //normalize the aggregate
        for (Map.Entry e : sumOfStripes.entrySet()) {
            double val = ((DoubleWritable) e.getValue()).get();
            if (totalValSum > 0) {
                val /= totalValSum;
            }
            sumOfStripes.put((IntWritable) e.getKey(), new DoubleWritable(val));
        }
    }

    //Write the distribution parameter vector to HDFS for the next iteration
    context.write(key, sumOfStripes);

}

From source file:org.apache.nutch.crawl.CrawlDatum.java

License:Apache License

public void readFields(DataInput in) throws IOException {
    byte version = in.readByte(); // read version
    if (version > CUR_VERSION) // check version
        throw new VersionMismatchException(CUR_VERSION, version);

    status = in.readByte();//from   w  ww .  j  av  a  2  s. c  om
    fetchTime = in.readLong();
    retries = in.readByte();
    if (version > 5) {
        fetchInterval = in.readInt();
    } else
        fetchInterval = Math.round(in.readFloat());
    score = in.readFloat();
    if (version > 2) {
        modifiedTime = in.readLong();
        int cnt = in.readByte();
        if (cnt > 0) {
            signature = new byte[cnt];
            in.readFully(signature);
        } else
            signature = null;
    }

    if (version > 3) {
        boolean hasMetadata = false;
        if (version < 7) {
            org.apache.hadoop.io.MapWritable oldMetaData = new org.apache.hadoop.io.MapWritable();
            if (in.readBoolean()) {
                hasMetadata = true;
                metaData = new org.apache.hadoop.io.MapWritable();
                oldMetaData.readFields(in);
            }
            for (Writable key : oldMetaData.keySet()) {
                metaData.put(key, oldMetaData.get(key));
            }
        } else {
            if (in.readBoolean()) {
                hasMetadata = true;
                metaData = new org.apache.hadoop.io.MapWritable();
                metaData.readFields(in);
            }
        }
        if (hasMetadata == false)
            metaData = null;
    }
    // translate status codes
    if (version < 5) {
        if (oldToNew.containsKey(status))
            status = oldToNew.get(status);
        else
            status = STATUS_DB_UNFETCHED;

    }
}

From source file:org.apache.nutch.tools.compat.ReprUrlFixer.java

License:Apache License

/**
 * Runs the new ReprUrl logic on all crawldatums.
 *///from   w w w  .  jav  a  2  s. c  o m
public void reduce(Text key, Iterator<CrawlDatum> values, OutputCollector<Text, CrawlDatum> output,
        Reporter reporter) throws IOException {

    String url = key.toString();
    Node node = null;
    List<CrawlDatum> datums = new ArrayList<CrawlDatum>();

    // get all crawl datums for a given url key, fetch for instance can have
    // more than one under a given key if there are multiple redirects to a
    // given url
    while (values.hasNext()) {
        CrawlDatum datum = values.next();
        datums.add((CrawlDatum) WritableUtils.clone(datum, conf));
    }

    // apply redirect repr url logic for each datum
    for (CrawlDatum datum : datums) {

        MapWritable metadata = datum.getMetaData();
        Text reprUrl = (Text) metadata.get(Nutch.WRITABLE_REPR_URL_KEY);
        byte status = datum.getStatus();
        boolean isCrawlDb = (CrawlDatum.hasDbStatus(datum));
        boolean segFetched = (status == CrawlDatum.STATUS_FETCH_SUCCESS);

        // only if the crawl datum is from the crawldb or is a successfully
        // fetched page from the segments
        if ((isCrawlDb || segFetched) && reprUrl != null) {

            String src = reprUrl.toString();
            String dest = url;
            URL srcUrl = null;
            URL dstUrl = null;

            // both need to be well formed urls
            try {
                srcUrl = new URL(src);
                dstUrl = new URL(url);
            } catch (MalformedURLException e) {
            }

            // if the src and repr urls are the same after the new logic then
            // remove the repr url from the metadata as it is no longer needed
            if (srcUrl != null && dstUrl != null) {
                String reprOut = URLUtil.chooseRepr(src, dest, true);
                if (reprOut.equals(dest)) {
                    LOG.info("Removing " + reprOut + " from " + dest);
                    metadata.remove(Nutch.WRITABLE_REPR_URL_KEY);
                }
            }
        }

        // collect each datum
        output.collect(key, datum);
    }

}

From source file:org.apache.pirk.query.wideskies.QueryUtils.java

License:Apache License

/**
 * Method to convert the given data element given by the MapWritable data element into the extracted BigInteger partitions based upon the given queryType
 *//*from   w w w.  j ava 2s  .  co m*/
public static List<BigInteger> partitionDataElement(MapWritable dataMap, QuerySchema qSchema,
        DataSchema dSchema, boolean embedSelector) throws PIRException {
    List<BigInteger> parts = new ArrayList<>();

    logger.debug("queryType = " + qSchema.getSchemaName());

    // Add the embedded selector to the parts
    if (embedSelector) {
        String selectorFieldName = qSchema.getSelectorName();
        String type = dSchema.getElementType(selectorFieldName);
        String selector = getSelectorByQueryType(dataMap, qSchema, dSchema);

        parts.addAll(embeddedSelectorToPartitions(selector, type,
                dSchema.getPartitionerForElement(selectorFieldName)));

        logger.debug("Added embedded selector for selector = " + selector + " parts.size() = " + parts.size());
    }

    // Add all appropriate data fields
    List<String> dataFieldsToExtract = qSchema.getElementNames();
    for (String fieldName : dataFieldsToExtract) {
        Object dataElement = null;
        if (dataMap.containsKey(dSchema.getTextName(fieldName))) {
            dataElement = dataMap.get(dSchema.getTextName(fieldName));
        }

        if (dSchema.isArrayElement(fieldName)) {
            List<String> elementArray = null;
            if (dataElement == null) {
                elementArray = Collections.singletonList("");
            } else if (dataElement instanceof WritableArrayWritable) {
                elementArray = Arrays.asList(((WritableArrayWritable) dataElement).toStrings());
            } else if (dataElement instanceof ArrayWritable) {
                elementArray = Arrays.asList(((ArrayWritable) dataElement).toStrings());
            }

            parts.addAll(dSchema.getPartitionerForElement(fieldName).arrayToPartitions(elementArray,
                    dSchema.getElementType(fieldName)));
        } else {
            if (dataElement == null) {
                dataElement = "";
            } else if (dataElement instanceof Text) {
                dataElement = dataElement.toString();
            }
            parts.addAll(dSchema.getPartitionerForElement(fieldName).toPartitions(dataElement,
                    dSchema.getElementType(fieldName)));
        }
    }
    logger.debug("parts.size() = " + parts.size());

    return parts;
}

From source file:org.apache.pirk.query.wideskies.QueryUtils.java

License:Apache License

/**
 * Pulls the correct selector from the MapWritable data element given the queryType
 * <p>/*w w w  . j  av a2  s  .c  om*/
 * Pulls first element of array if element is an array type
 */
public static String getSelectorByQueryType(MapWritable dataMap, QuerySchema qSchema, DataSchema dSchema) {
    String selector;

    String fieldName = qSchema.getSelectorName();
    if (dSchema.isArrayElement(fieldName)) {
        if (dataMap.get(dSchema.getTextName(fieldName)) instanceof WritableArrayWritable) {
            String[] selectorArray = ((WritableArrayWritable) dataMap.get(dSchema.getTextName(fieldName)))
                    .toStrings();
            selector = selectorArray[0];
        } else {
            String[] elementArray = ((ArrayWritable) dataMap.get(dSchema.getTextName(fieldName))).toStrings();
            selector = elementArray[0];
        }
    } else {
        selector = dataMap.get(dSchema.getTextName(fieldName)).toString();
    }

    return selector;
}

From source file:org.apache.pirk.schema.query.filter.StopListFilter.java

License:Apache License

@Override
public boolean filterDataElement(MapWritable dataElement, DataSchema dSchema) {
    boolean passFilter = true;

    // If the data element contains a value on the stoplist (corresponding to a key in the filterSet), do not use
    for (String filterName : filterSet) {
        if (dSchema.isArrayElement(filterName)) {
            List<String> elementArray = null;
            if (dataElement.get(dSchema.getTextName(filterName)) instanceof WritableArrayWritable) {
                elementArray = Arrays.asList(
                        ((WritableArrayWritable) dataElement.get(dSchema.getTextName(filterName))).toStrings());
            } else if (dataElement.get(dSchema.getTextName(filterName)) instanceof ArrayWritable) {
                elementArray = Arrays
                        .asList(((ArrayWritable) dataElement.get(dSchema.getTextName(filterName))).toStrings());
            }//from   w w  w.jav a2  s . c  o m

            if (elementArray != null && elementArray.size() > 0) {
                for (String element : elementArray) {
                    passFilter = StopListUtils.checkElement(element, stopList);
                    if (!passFilter) {
                        break;
                    }
                }
            }
        } else {
            String element = dataElement.get(dSchema.getTextName(filterName)).toString();
            passFilter = StopListUtils.checkElement(element, stopList);
        }
        if (!passFilter) {
            break;
        }
    }
    return passFilter;
}

From source file:org.apache.pirk.utils.QueryParserUtils.java

License:Apache License

/**
 * Given a URI query string, checks to see if the given document satisfies the query
 * <p>/*from   w w  w .  j a  v a 2 s.  c o  m*/
 * ...Very hacky...
 * <p>
 * NOTE: Assumes that MapWritable keys are Text objects and values are Text or TextArrayWritable objects
 * <p>
 * NOTE: Support for list fields (values) is provided for checkRecord with Map<String, Object> and checkRecord with MapWritable containing
 * WritableArrayWritable types for array values (vs. json string list representation)
 * <p>
 * NOTE: @ symbol represents flag ignore case sensitivity used after field (e.g. user_agent@:*searchparam*)
 * <p>
 * NOTE: Assumes that all AND booleans come before any OR booleans - ADD @ case sensitivity support for range queries
 * 
 */
public static boolean checkRecord(String uriQuery, MapWritable doc, DataSchema dataSchema) {
    boolean satisfiesQuery = true;

    logger.debug("uriQuery = " + uriQuery);
    uriQuery = uriQuery.substring(3); // strip the beginning query tag '?q='
    logger.debug("uriQuery = " + uriQuery);

    if (uriQuery.equals("*")) {
        return true;
    }

    String[] queryTokens = uriQuery.split("\\+(?=AND)|\\+(?=OR)|\\+(?=[a-z])"); // booleans of the form +AND+, +OR+, don't split on +T0+
    int index = 0;
    String item;
    while (index < queryTokens.length) {
        boolean ignoreCase = false;

        item = queryTokens[index];
        logger.debug("item = " + item);

        String[] itemTokens = item.split(":", 2); // There are two components <field>:<query>
        logger.debug("itemTokens[0] = " + itemTokens[0] + " itemTokens[1] = " + itemTokens[1]);

        // check for ignore case flag
        if (itemTokens[0].endsWith("@")) {
            ignoreCase = true;
            logger.debug("ignore case = true");
            itemTokens[0] = itemTokens[0].replaceAll("@", ""); // strip flag
            logger.debug("itemTokens[0]:" + itemTokens[0]);
        }

        Object value = doc.get(new Text(itemTokens[0]));
        if (value != null) // if the field is not present, a null Writable is returned
        {

            if (itemTokens[1].startsWith("[")) // Inclusive range query
            {
                if (value instanceof Text) {
                    if (!checkRangeQuery(true, itemTokens[0], itemTokens[1], value.toString(), dataSchema)) {
                        logger.debug("checkRangeQuery returned false");
                        satisfiesQuery = false;
                    }
                } else if (value instanceof TextArrayWritable) {
                    String[] elements = ((TextArrayWritable) value).toStrings();
                    boolean oneSatisfied = false;
                    for (String element : elements) {
                        if (checkRangeQuery(true, itemTokens[0], itemTokens[1], element, dataSchema)) {
                            logger.debug("checkRangeQuery returned true");
                            oneSatisfied = true;
                            break;
                        }
                    }
                    satisfiesQuery = oneSatisfied;
                }
            } else if (itemTokens[1].startsWith("{")) // Exclusive range query
            {
                if (value instanceof Text) {
                    if (!checkRangeQuery(false, itemTokens[0], itemTokens[1], value.toString(), dataSchema)) {
                        logger.debug("checkRangeQuery returned false");
                        satisfiesQuery = false;
                    }
                } else if (value instanceof TextArrayWritable) {
                    String[] elements = ((TextArrayWritable) value).toStrings();
                    boolean oneSatisfied = false;
                    for (String element : elements) {
                        if (checkRangeQuery(false, itemTokens[0], itemTokens[1], element, dataSchema)) {
                            logger.debug("checkRangeQuery returned true");
                            oneSatisfied = true;
                            break;
                        }
                    }
                    satisfiesQuery = oneSatisfied;
                }
            } else
            // Not a range query
            {
                if (value instanceof Text) {
                    String valueString = value.toString();
                    if (ignoreCase) { // Case insensitivity
                        logger.debug("not a range query; itemstoken1:" + itemTokens[1]);
                        itemTokens[1] = itemTokens[1].toLowerCase();
                        valueString = valueString.toLowerCase();
                        logger.debug("valuestring after:" + valueString);
                    }

                    if (itemTokens[1].contains("*") || itemTokens[1].contains("?")) // Wildcard match
                    {
                        logger.debug("itemTokens[1] = " + itemTokens[1] + " contains wildcard");
                        if (!Pattern.matches(wildcardToRegex(itemTokens[1]), valueString)) {
                            logger.debug("stringValue = " + valueString + " did not satisfy itemTokens[1] = "
                                    + itemTokens[1]);
                            satisfiesQuery = false;
                        }
                        logger.debug("stringValue = " + valueString + " did satisfy itemTokens[1] = "
                                + itemTokens[1]);
                    } else if (!(valueString).equals(itemTokens[1])) // Single value match
                    {
                        logger.debug("We do not have a single value match: stringValue " + valueString
                                + " != itemTokens[1] = " + itemTokens[1]);
                        satisfiesQuery = false;
                    }
                } else if (value instanceof TextArrayWritable) {
                    String[] elements = ((TextArrayWritable) value).toStrings();
                    logger.debug("elements.size() = " + elements.length);

                    boolean oneSatisfied = false;
                    for (String element : elements) {
                        if (ignoreCase) { // Case insensitivity
                            itemTokens[1] = itemTokens[1].toLowerCase();
                            logger.debug("waw: itemtoken1 after:" + itemTokens[1]);
                            element = element.toLowerCase();
                            logger.debug("element after:" + element);
                        }

                        logger.debug("element: " + element);
                        if (itemTokens[1].contains("*") || itemTokens[1].contains("?")) // Wildcard match
                        {
                            logger.debug("itemTokens[1] = " + itemTokens[1] + " contains wildcard");
                            if (Pattern.matches(wildcardToRegex(itemTokens[1]), element)) {
                                logger.debug("stringValue = " + element + " satisfied itemTokens[1] = "
                                        + itemTokens[1]);
                                oneSatisfied = true;
                                break;
                            }
                        } else if (element.equals(itemTokens[1])) // Single value match
                        {
                            logger.debug("We have a single value match: stringValue " + element
                                    + " = itemTokens[1] = " + itemTokens[1]);
                            oneSatisfied = true;
                            break;
                        }
                    }
                    satisfiesQuery = oneSatisfied;
                }
            }
        } else {
            satisfiesQuery = false; // add fix - account for case if query field does not exist
        }

        ++index; // Try to pick up the boolean operators
        if (index < (queryTokens.length - 1)) {
            if (queryTokens[index].equals("AND")) // Do nothing and keep going
            {
                if (!satisfiesQuery) {
                    break;
                }
                ++index;
                item = queryTokens[index];
            } else if (queryTokens[index].equals("OR")) // Assume all OR's occur after all AND's
            {
                if (satisfiesQuery) // if we passed the query and it is not the first term
                {
                    break;
                } else {
                    ++index;
                    item = queryTokens[index];
                    satisfiesQuery = true; // reset so that we pick up matches for the next term
                }
            } else if (!satisfiesQuery) {
                logger.debug("Does not satisfy the query and no boolean ops next...");
                break;
            }
        }
    }

    return satisfiesQuery;
}

From source file:org.apache.pirk.utils.QueryParserUtils.java

License:Apache License

@SuppressWarnings("unchecked")
public static boolean checkRecordWritableArrayWritable(String uriQuery, MapWritable doc,
        DataSchema dataSchema) {//from   w  ww  . j  a va 2s. co  m
    boolean satisfiesQuery = true;

    logger.debug("uriQuery = " + uriQuery);
    uriQuery = uriQuery.substring(3); // strip the beginning query tag '?q='
    logger.debug("uriQuery = " + uriQuery);

    if (uriQuery.equals("*")) {
        return true;
    }

    String[] queryTokens = uriQuery.split("\\+(?=AND)|\\+(?=OR)|\\+(?=[a-z])"); // booleans of the form +AND+, +OR+, don't split on +T0+
    int index = 0;
    String item;
    while (index < queryTokens.length) {
        boolean ignoreCase = false;

        item = queryTokens[index];
        logger.debug("item = " + item);

        String[] itemTokens = item.split(":", 2); // There are two components <field>:<query>
        logger.debug("itemTokens[0] = " + itemTokens[0] + " itemTokens[1] = " + itemTokens[1]);

        // check for ignore case flag
        if (itemTokens[0].endsWith("@")) {
            ignoreCase = true;
            logger.debug("ignore case = true");
            itemTokens[0] = itemTokens[0].replaceAll("@", ""); // strip flag
            logger.debug("itemTokens[0]:" + itemTokens[0]);
        }

        Object value = doc.get(new Text(itemTokens[0]));
        if (value != null) // if the field is not present, a null Writable is returned
        {

            if (itemTokens[1].startsWith("[")) // Inclusive range query
            {
                if (value instanceof Text) {
                    if (!checkRangeQuery(true, itemTokens[0], itemTokens[1], value.toString(), dataSchema)) {
                        logger.debug("checkRangeQuery returned false");
                        satisfiesQuery = false;
                    }
                } else if (value instanceof WritableArrayWritable) {
                    String[] elements = ((WritableArrayWritable) value).toStrings();
                    boolean oneSatisfied = false;
                    for (String element : elements) {
                        if (checkRangeQuery(true, itemTokens[0], itemTokens[1], element, dataSchema)) {
                            logger.debug("checkRangeQuery returned true");
                            oneSatisfied = true;
                            break;
                        }
                    }
                    satisfiesQuery = oneSatisfied;
                }
            } else if (itemTokens[1].startsWith("{")) // Exclusive range query
            {
                if (value instanceof Text) {
                    if (!checkRangeQuery(false, itemTokens[0], itemTokens[1], value.toString(), dataSchema)) {
                        logger.debug("checkRangeQuery returned false");
                        satisfiesQuery = false;
                    }
                } else if (value instanceof WritableArrayWritable) {
                    String[] elements = ((WritableArrayWritable) value).toStrings();
                    boolean oneSatisfied = false;
                    for (String element : elements) {
                        if (checkRangeQuery(false, itemTokens[0], itemTokens[1], element, dataSchema)) {
                            logger.debug("checkRangeQuery returned true");
                            oneSatisfied = true;
                            break;
                        }
                    }
                    satisfiesQuery = oneSatisfied;
                }
            } else
            // Not a range query
            {
                if (value instanceof Text) {
                    String valueString = value.toString();
                    if (ignoreCase) { // Case insensitivity
                        logger.debug("not a range query; itemstoken1:" + itemTokens[1]);
                        itemTokens[1] = itemTokens[1].toLowerCase();
                        valueString = valueString.toLowerCase();
                        logger.debug("valuestring after:" + valueString);
                    }

                    if (itemTokens[1].contains("*") || itemTokens[1].contains("?")) // Wildcard match
                    {
                        logger.debug("itemTokens[1] = " + itemTokens[1] + " contains wildcard");
                        if (!Pattern.matches(wildcardToRegex(itemTokens[1]), valueString)) {
                            logger.debug("stringValue = " + valueString + " did not satisfy itemTokens[1] = "
                                    + itemTokens[1]);
                            satisfiesQuery = false;
                        }
                        logger.debug("stringValue = " + valueString + " did satisfy itemTokens[1] = "
                                + itemTokens[1]);
                    } else if (!(valueString).equals(itemTokens[1])) // Single value match
                    {
                        logger.debug("We do not have a single value match: stringValue " + valueString
                                + " != itemTokens[1] = " + itemTokens[1]);
                        satisfiesQuery = false;
                    }
                } else if (value instanceof WritableArrayWritable) {
                    String[] elements = ((WritableArrayWritable) value).toStrings();
                    logger.debug("elements.size() = " + elements.length);

                    boolean oneSatisfied = false;
                    for (String element : elements) {
                        if (ignoreCase) { // Case insensitivity
                            itemTokens[1] = itemTokens[1].toLowerCase();
                            logger.debug("waw: itemtoken1 after:" + itemTokens[1]);
                            element = element.toLowerCase();
                            logger.debug("element after:" + element);
                        }

                        logger.debug("element: " + element);
                        if (itemTokens[1].contains("*") || itemTokens[1].contains("?")) // Wildcard match
                        {
                            logger.debug("itemTokens[1] = " + itemTokens[1] + " contains wildcard");
                            if (Pattern.matches(wildcardToRegex(itemTokens[1]), element)) {
                                logger.debug("stringValue = " + element + " satisfied itemTokens[1] = "
                                        + itemTokens[1]);
                                oneSatisfied = true;
                                break;
                            }
                        } else if (element.equals(itemTokens[1])) // Single value match
                        {
                            logger.debug("We have a single value match: stringValue " + element
                                    + " = itemTokens[1] = " + itemTokens[1]);
                            oneSatisfied = true;
                            break;
                        }
                    }
                    satisfiesQuery = oneSatisfied;
                }
            }
        } else {
            satisfiesQuery = false; // add fix - account for case if query field does not exist
        }

        ++index; // Try to pick up the boolean operators
        if (index < (queryTokens.length - 1)) {
            if (queryTokens[index].equals("AND")) // Do nothing and keep going
            {
                if (!satisfiesQuery) {
                    break;
                }
                ++index;
                item = queryTokens[index];
            } else if (queryTokens[index].equals("OR")) // Assume all OR's occur after all AND's
            {
                if (satisfiesQuery) // if we passed the query and it is not the first term
                {
                    break;
                } else {
                    ++index;
                    item = queryTokens[index];
                    satisfiesQuery = true; // reset so that we pick up matches for the next term
                }
            } else if (!satisfiesQuery) {
                logger.debug("Does not satisfy the query and no boolean ops next...");
                break;
            }
        }
    }

    return satisfiesQuery;
}

From source file:org.apache.pirk.utils.StringUtils.java

License:Apache License

/**
 * Method to convert a MapWritable into a JSON string
 * //  ww w . j  a v  a 2 s.c  om
 */
@SuppressWarnings("unchecked")
public static String mapWritableToString(MapWritable map) {
    // Convert to JSON and then write to a String - ensures JSON read-in compatibility
    JSONObject jsonObj = new JSONObject();
    for (Writable key : map.keySet()) {
        jsonObj.put(key.toString(), map.get(key).toString());
    }

    return jsonObj.toJSONString();
}

From source file:org.bigsolr.hadoop.SolrRecordWriter.java

License:Apache License

@Override
public void write(NullWritable key, Writable value) throws IOException {

    log.info("SolrRecordWriter ->  write");

    if (solr == null) {
        solr = SolrOperations.getSolrServer(conf);
    }/*from   w ww .  ja v a2s  .  c om*/

    SolrInputDocument doc = new SolrInputDocument();
    if (value.getClass().getName().equals("org.apache.hadoop.io.MapWritable")) {
        MapWritable valueMap = (MapWritable) value;

        for (Writable keyWritable : valueMap.keySet()) {
            String fieldName = keyWritable.toString();
            Object fieldValue = valueMap.get(new Text(fieldName));
            // Need to add proper conversion of object to Schema field type
            doc.addField(fieldName, fieldValue.toString());
        }
    } else if (value.getClass().getName().equals("org.bigsolr.hadoop.SolrInputRecord")) {
        doc = (SolrInputDocument) value;
    } else {
        log.error("SolrRecordWriter write() Class for Value is not Supported: " + value.getClass().getName());
        System.exit(0);
    }

    try {
        solr.add(doc);
        //solr.commit(true,true);
    } catch (SolrServerException e) {
        log.error("SolrRecordWriter-- solr.add(doc) failed");
        throw new IOException(e);
    }

}