Example usage for org.apache.hadoop.util.hash Hash JENKINS_HASH

List of usage examples for org.apache.hadoop.util.hash Hash JENKINS_HASH

Introduction

In this page you can find the example usage for org.apache.hadoop.util.hash Hash JENKINS_HASH.

Prototype

int JENKINS_HASH

To view the source code for org.apache.hadoop.util.hash Hash JENKINS_HASH.

Click Source Link

Document

Constant to denote JenkinsHash .

Usage

From source file:edu.isi.karma.controller.command.publish.PublishRDFCommand.java

License:Apache License

@Override
public UpdateContainer doIt(Workspace workspace) throws CommandException {

    //save the preferences 
    savePreferences(workspace);/* www . j  a  va2s . c  o m*/

    Worksheet worksheet = workspace.getWorksheet(worksheetId);
    this.worksheetName = worksheet.getTitle();

    // Prepare the file path and names
    final String rdfFileName = workspace.getCommandPreferencesId() + worksheetId + ".ttl";
    final String rdfFileLocalPath = ServletContextParameterMap
            .getParameterValue(ContextParameter.RDF_PUBLISH_DIR) + rdfFileName;

    // Get the alignment for this worksheet
    Alignment alignment = AlignmentManager.Instance()
            .getAlignment(AlignmentManager.Instance().constructAlignmentId(workspace.getId(), worksheetId));

    if (alignment == null) {
        logger.info("Alignment is NULL for " + worksheetId);
        return new UpdateContainer(new ErrorUpdate("Please align the worksheet before generating RDF!"));
    }

    // Generate the KR2RML data structures for the RDF generation
    final ErrorReport errorReport = new ErrorReport();
    KR2RMLMappingGenerator mappingGen = null;
    String url = worksheet.getMetadataContainer().getWorksheetProperties().getPropertyValue(Property.modelUrl);
    String modelContext = worksheet.getMetadataContainer().getWorksheetProperties()
            .getPropertyValue(Property.modelContext);
    TripleStoreUtil utilObj = new TripleStoreUtil();
    String modelRepoUrl = worksheet.getMetadataContainer().getWorksheetProperties()
            .getPropertyValue(Property.modelRepository);
    modelRepoUrl = modelRepoUrl == null || modelRepoUrl.isEmpty() ? TripleStoreUtil.defaultModelsRepoUrl
            : modelRepoUrl;
    Map<String, String> bloomfilterMapping = new HashMap<String, String>();
    boolean result = true;
    try {
        mappingGen = new KR2RMLMappingGenerator(workspace, worksheet,

                alignment, worksheet.getSemanticTypes(), rdfSourcePrefix, rdfSourceNamespace,
                Boolean.valueOf(addInverseProperties), errorReport);
    } catch (KarmaException e) {
        logger.error("Error occured while generating RDF!", e);
        return new UpdateContainer(new ErrorUpdate("Error occured while generating RDF: " + e.getMessage()));
    }

    KR2RMLMapping mapping = mappingGen.getKR2RMLMapping();
    if (url != null && !url.trim().isEmpty() && modelContext != null && !modelContext.trim().isEmpty()) {
        try {
            File tmp = new File("tmp");
            PrintWriter pw = new PrintWriter(tmp);
            pw.println(utilObj.getMappingFromTripleStore(modelRepoUrl, modelContext, url));
            pw.close();
            Model model = WorksheetR2RMLJenaModelParser.loadSourceModelIntoJenaModel(tmp.toURI().toURL());
            tmp.delete();
            R2RMLMappingIdentifier identifier = new R2RMLMappingIdentifier(mapping.getId().getName(),
                    new URL(url));
            WorksheetR2RMLJenaModelParser parser = new WorksheetR2RMLJenaModelParser(model, identifier);
            mapping = parser.parse();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
    logger.debug(mapping.toString());

    StringWriter sw = new StringWriter();
    // Generate the RDF using KR2RML data structures
    long start = 0;
    try {
        List<KR2RMLRDFWriter> writers = new ArrayList<KR2RMLRDFWriter>();
        File f = new File(rdfFileLocalPath);
        File parentDir = f.getParentFile();
        parentDir.mkdirs();
        BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(f), "UTF-8"));
        N3KR2RMLRDFWriter writer = new N3KR2RMLRDFWriter(
                new URIFormatter(workspace.getOntologyManager(), errorReport), new PrintWriter(bw));
        writer.setBaseURI(rdfSourceNamespace);
        writers.add(writer);
        if (generateBloomFilters)
            writers.add(new BloomFilterKR2RMLRDFWriter(new PrintWriter(sw), mapping.getId(), false,
                    this.rdfSourceNamespace));
        KR2RMLWorksheetRDFGenerator rdfGen = new KR2RMLWorksheetRDFGenerator(worksheet, workspace.getFactory(),
                workspace.getOntologyManager(), writers, false, mapping, errorReport);

        rdfGen.generateRDF(true);
        logger.info("RDF written to file: " + rdfFileLocalPath);
        if (saveToStore) {
            //take the contents of the RDF file and save them to the store
            logger.info("Using Jena DB:" + hostName + "/" + dbName + " user=" + userName);
            saveToStore(rdfFileLocalPath);
        }
        start = System.currentTimeMillis();
        if (generateBloomFilters) {
            JSONObject obj = new JSONObject(sw.toString());
            result &= updateTripleStore(obj, bloomfilterMapping, modelRepoUrl, modelContext, utilObj);
            Map<String, String> verification = new HashMap<String, String>();
            Set<String> triplemaps = new HashSet<String>(Arrays.asList(obj.getString("ids").split(",")));
            verification.putAll(utilObj.getBloomFiltersForMaps(modelRepoUrl, modelContext, triplemaps));
            boolean verify = true;
            for (Entry<String, String> entry : verification.entrySet()) {
                String key = entry.getKey();
                String value = entry.getValue();
                KR2RMLBloomFilter bf2 = new KR2RMLBloomFilter(KR2RMLBloomFilter.defaultVectorSize,
                        KR2RMLBloomFilter.defaultnbHash, Hash.JENKINS_HASH);
                KR2RMLBloomFilter bf = new KR2RMLBloomFilter(KR2RMLBloomFilter.defaultVectorSize,
                        KR2RMLBloomFilter.defaultnbHash, Hash.JENKINS_HASH);
                bf2.populateFromCompressedAndBase64EncodedString(value);
                bf.populateFromCompressedAndBase64EncodedString(obj.getString(key));
                bf2.and(bf);
                bf2.xor(bf);
                try {
                    Field f1 = BloomFilter.class.getDeclaredField("bits");
                    f1.setAccessible(true);
                    BitSet bits = (BitSet) f1.get(bf2);
                    if (bits.cardinality() != 0) {
                        verify = false;
                        break;
                    }
                } catch (Exception e) {

                }
            }
            if (!verify) {
                result &= updateTripleStore(obj, verification, modelRepoUrl, modelContext, utilObj);
            }
            long end = System.currentTimeMillis();
            System.out
                    .println("execution time: " + (end - start) + " node total: " + bloomfilterMapping.size());
        }
    } catch (Exception e1) {
        logger.error("Error occured while generating RDF!", e1);
        return new UpdateContainer(new ErrorUpdate("Error occured while generating RDF: " + e1.getMessage()));
    }
    try {

        // Get the graph name from properties if empty graph uri 
        //         String graphName = worksheet.getMetadataContainer().getWorksheetProperties()
        //               .getPropertyValue(Property.graphName);
        //         if (this.graphUri == null || this.graphUri.isEmpty()) {
        //            // Set to default
        //            worksheet.getMetadataContainer().getWorksheetProperties().setPropertyValue(
        //                  Property.graphName, WorksheetProperties.createDefaultGraphName(worksheet.getTitle()));
        //            this.graphUri = WorksheetProperties.createDefaultGraphName(worksheet.getTitle());
        //         }

        if (tripleStoreUrl == null || tripleStoreUrl.isEmpty()) {
            tripleStoreUrl = TripleStoreUtil.defaultDataRepoUrl;
        }
        logger.info("tripleStoreURl : " + tripleStoreUrl);

        result &= utilObj.saveToStore(rdfFileLocalPath, tripleStoreUrl, this.graphUri, this.replaceContext,
                this.rdfSourceNamespace);
        if (url != null && !url.isEmpty() && url.compareTo("") != 0
                && utilObj.testURIExists(modelRepoUrl, "", url)) {
            StringBuilder sb = new StringBuilder();
            url = url.trim();
            if (!url.startsWith("<")) {
                sb.append("<");
            }
            sb.append(url);
            if (!url.endsWith(">")) {
                sb.append(">");
            }
            sb.append(" <");
            sb.append(Uris.MODEL_HAS_DATA_URI);
            sb.append("> \"true\" .\n");
            String input = sb.toString();
            result &= utilObj.saveToStore(input, modelRepoUrl, modelContext, new Boolean(false),
                    this.rdfSourceNamespace);
        }
        if (result) {
            logger.info("Saved rdf to store");
        } else {
            logger.error("Falied to store rdf to karma_data store");
            return new UpdateContainer(new ErrorUpdate("Error: Failed to store RDF to the triple store"));
        }
    } catch (Exception e) {
        logger.error(e.getMessage());
        e.printStackTrace();
        return new UpdateContainer(new ErrorUpdate("Error occured while generating RDF: " + e.getMessage()));
    }

    try {
        return new UpdateContainer(new AbstractUpdate() {
            public void generateJson(String prefix, PrintWriter pw, VWorkspace vWorkspace) {
                JSONObject outputObject = new JSONObject();
                try {
                    outputObject.put(PublishRDFCommandJsonKeys.updateType.name(), "PublishRDFUpdate");
                    outputObject.put(PublishRDFCommandJsonKeys.fileUrl.name(), ServletContextParameterMap
                            .getParameterValue(ContextParameter.RDF_PUBLISH_RELATIVE_DIR) + rdfFileName);
                    outputObject.put(PublishRDFCommandJsonKeys.worksheetId.name(), worksheetId);
                    outputObject.put(PublishRDFCommandJsonKeys.errorReport.name(), errorReport.toJSONString());
                    pw.println(outputObject.toString(4));
                } catch (JSONException e) {
                    logger.error("Error occured while generating JSON!");
                }
            }
        });
    } catch (Exception e) {
        return new UpdateContainer(new ErrorUpdate(e.getMessage()));
    }
}

From source file:edu.isi.karma.rdf.bloom.TestJSONRDFGeneratorWithBloomFilters.java

License:Apache License

/**
 * Test method for// w  w  w.j  a va  2s . com
 * {@link edu.isi.karma.rdf.JSONRDFGenerator#generateRDF(java.lang.String, java.lang.String, boolean, java.io.PrintWriter)}
 * .
 */
@Test
public void testGenerateRDF1() {
    try {

        KR2RMLBloomFilterManager peopleBloomFilterManager = getBloomFilterManagerForSource("people.json",
                InputType.JSON, "people-model");
        KR2RMLBloomFilterManager scheduleBloomFilterManager = getBloomFilterManagerForSource("schedule.csv",
                InputType.CSV, "schedule-model");
        KR2RMLBloomFilter peoplePersonWithTwitterIdBF = peopleBloomFilterManager.getBloomFilter(
                "http://isi.edu/integration/karma/dev#PredicateObjectMap_5fcf2d39-f62b-4cdd-863e-bde21493e1bd");
        Key k = new Key(("<http://lod.isi.edu/cs548/person/Slepicka>").getBytes());
        assertFalse(peoplePersonWithTwitterIdBF.membershipTest(k));
        k = new Key(("<http://lod.isi.edu/cs548/person/Taheriyan>").getBytes());
        assertTrue(peoplePersonWithTwitterIdBF.membershipTest(k));
        k = new Key(("<http://lod.isi.edu/cs548/person/Kozareva>").getBytes());
        assertFalse(peoplePersonWithTwitterIdBF.membershipTest(k));
        k = new Key(("<http://lod.isi.edu/cs548/person/Ambite>").getBytes());
        assertFalse(peoplePersonWithTwitterIdBF.membershipTest(k));
        k = new Key(("<http://lod.isi.edu/cs548/person/Szekely>").getBytes());
        assertTrue(peoplePersonWithTwitterIdBF.membershipTest(k));
        k = new Key(("<http://lod.isi.edu/cs548/person/Knoblock>").getBytes());
        assertTrue(peoplePersonWithTwitterIdBF.membershipTest(k));
        k = new Key(("<http://lod.isi.edu/cs548/person/Wu>").getBytes());
        assertFalse(peoplePersonWithTwitterIdBF.membershipTest(k));
        assertEquals(3, peoplePersonWithTwitterIdBF.estimateNumberOfHashedValues());
        KR2RMLBloomFilter schedulePersonBF = scheduleBloomFilterManager.getBloomFilter(
                "http://isi.edu/integration/karma/dev#TriplesMap_413a6176-d893-45aa-b1c2-6661b5c491ab");
        k = new Key(("<http://lod.isi.edu/cs548/person/Slepicka>").getBytes());
        assertTrue(schedulePersonBF.membershipTest(k));
        k = new Key(("<http://lod.isi.edu/cs548/person/Taheriyan>").getBytes());
        assertTrue(schedulePersonBF.membershipTest(k));
        k = new Key(("<http://lod.isi.edu/cs548/person/Ambite>").getBytes());
        assertTrue(schedulePersonBF.membershipTest(k));
        k = new Key(("<http://lod.isi.edu/cs548/person/Szekely>").getBytes());
        assertTrue(schedulePersonBF.membershipTest(k));
        k = new Key(("<http://lod.isi.edu/cs548/person/Knoblock>").getBytes());
        assertTrue(schedulePersonBF.membershipTest(k));
        assertEquals(5, schedulePersonBF.estimateNumberOfHashedValues());

        KR2RMLBloomFilter intersectionBF = new KR2RMLBloomFilter(KR2RMLBloomFilter.defaultVectorSize,
                KR2RMLBloomFilter.defaultnbHash, Hash.JENKINS_HASH);
        intersectionBF.or(peoplePersonWithTwitterIdBF);
        intersectionBF.and(schedulePersonBF);
        assertEquals(3, intersectionBF.estimateNumberOfHashedValues());

        KR2RMLBloomFilter hasInstructorBF = scheduleBloomFilterManager.getBloomFilter(
                "http://isi.edu/integration/karma/dev#RefObjectMap_bb82f923-2953-4bd4-bc7b-d1196e05dbf6");

        k = new Key(("<http://lod.isi.edu/cs548/person/Szekely>").getBytes());
        assertTrue(hasInstructorBF.membershipTest(k));
        intersectionBF = new KR2RMLBloomFilter(KR2RMLBloomFilter.defaultVectorSize,
                KR2RMLBloomFilter.defaultnbHash, Hash.JENKINS_HASH);
        intersectionBF.or(hasInstructorBF);
        intersectionBF.and(peoplePersonWithTwitterIdBF);
        assertEquals(3, intersectionBF.estimateNumberOfHashedValues());

    } catch (Exception e) {
        logger.error("testGenerateRDF1 failed:", e);
        fail("Exception: " + e.getMessage());
    }
}

From source file:edu.isi.karma.rdf.TestJSONRDFGeneratorWithBloomFilters.java

License:Apache License

/**
 * Test method for/*  ww w  .j  ava  2  s.  com*/
 * {@link edu.isi.karma.rdf.JSONRDFGenerator#generateRDF(java.lang.String, java.lang.String, boolean, java.io.PrintWriter)}
 * .
 */
@Test
public void testGenerateRDF1() {
    try {

        KR2RMLBloomFilterManager peopleBloomFilterManager = getBloomFilterManagerForSource("people.json",
                InputType.JSON, "people-model");
        KR2RMLBloomFilterManager scheduleBloomFilterManager = getBloomFilterManagerForSource("schedule.csv",
                InputType.CSV, "schedule-model");
        KR2RMLBloomFilter peoplePersonWithTwitterIdBF = peopleBloomFilterManager.getBloomFilter(
                "http://isi.edu/integration/karma/dev#PredicateObjectMap_1941470a-1dfb-4716-803b-5f07a4af90fd");
        Key k = new Key(("<http://lod.isi.edu/cs548/person/Slepicka>").getBytes());
        assertFalse(peoplePersonWithTwitterIdBF.membershipTest(k));
        k = new Key(("<http://lod.isi.edu/cs548/person/Taheriyan>").getBytes());
        assertTrue(peoplePersonWithTwitterIdBF.membershipTest(k));
        k = new Key(("<http://lod.isi.edu/cs548/person/Kozareva>").getBytes());
        assertFalse(peoplePersonWithTwitterIdBF.membershipTest(k));
        k = new Key(("<http://lod.isi.edu/cs548/person/Ambite>").getBytes());
        assertFalse(peoplePersonWithTwitterIdBF.membershipTest(k));
        k = new Key(("<http://lod.isi.edu/cs548/person/Szekely>").getBytes());
        assertTrue(peoplePersonWithTwitterIdBF.membershipTest(k));
        k = new Key(("<http://lod.isi.edu/cs548/person/Knoblock>").getBytes());
        assertTrue(peoplePersonWithTwitterIdBF.membershipTest(k));
        k = new Key(("<http://lod.isi.edu/cs548/person/Wu>").getBytes());
        assertFalse(peoplePersonWithTwitterIdBF.membershipTest(k));
        assertEquals(3, peoplePersonWithTwitterIdBF.estimateNumberOfHashedValues());
        KR2RMLBloomFilter schedulePersonBF = scheduleBloomFilterManager.getBloomFilter(
                "http://isi.edu/integration/karma/dev#TriplesMap_413a6176-d893-45aa-b1c2-6661b5c491ab");
        k = new Key(("<http://lod.isi.edu/cs548/person/Slepicka>").getBytes());
        assertTrue(schedulePersonBF.membershipTest(k));
        k = new Key(("<http://lod.isi.edu/cs548/person/Taheriyan>").getBytes());
        assertTrue(schedulePersonBF.membershipTest(k));
        k = new Key(("<http://lod.isi.edu/cs548/person/Ambite>").getBytes());
        assertTrue(schedulePersonBF.membershipTest(k));
        k = new Key(("<http://lod.isi.edu/cs548/person/Szekely>").getBytes());
        assertTrue(schedulePersonBF.membershipTest(k));
        k = new Key(("<http://lod.isi.edu/cs548/person/Knoblock>").getBytes());
        assertTrue(schedulePersonBF.membershipTest(k));
        assertEquals(5, schedulePersonBF.estimateNumberOfHashedValues());

        KR2RMLBloomFilter intersectionBF = new KR2RMLBloomFilter(KR2RMLBloomFilter.defaultVectorSize,
                KR2RMLBloomFilter.defaultnbHash, Hash.JENKINS_HASH);
        intersectionBF.or(peoplePersonWithTwitterIdBF);
        intersectionBF.and(schedulePersonBF);
        assertEquals(3, intersectionBF.estimateNumberOfHashedValues());

        KR2RMLBloomFilter hasInstructorBF = scheduleBloomFilterManager.getBloomFilter(
                "http://isi.edu/integration/karma/dev#RefObjectMap_bb82f923-2953-4bd4-bc7b-d1196e05dbf6");

        k = new Key(("<http://lod.isi.edu/cs548/person/Szekely>").getBytes());
        assertTrue(hasInstructorBF.membershipTest(k));
        intersectionBF = new KR2RMLBloomFilter(KR2RMLBloomFilter.defaultVectorSize,
                KR2RMLBloomFilter.defaultnbHash, Hash.JENKINS_HASH);
        intersectionBF.or(hasInstructorBF);
        intersectionBF.and(peoplePersonWithTwitterIdBF);
        assertEquals(3, intersectionBF.estimateNumberOfHashedValues());

    } catch (Exception e) {
        logger.error("testGenerateRDF1 failed:", e);
        fail("Exception: " + e.getMessage());
    }
}

From source file:org.apache.accumulo.core.bloomfilter.Filter.java

License:Open Source License

@Override
public void readFields(final DataInput in) throws IOException {
    final int ver = in.readInt();
    rVersion = ver;/*w  ww  .j a  va2s. c  o m*/
    if (ver > 0) { // old unversioned format
        this.nbHash = ver;
        this.hashType = Hash.JENKINS_HASH;

    } else if (ver == VERSION | ver == VERSION + 1) { // Support for directly serialzing the bitset
        this.nbHash = in.readInt();
        this.hashType = in.readByte();
    } else {
        throw new IOException("Unsupported version: " + ver);
    }
    this.vectorSize = in.readInt();
    this.hash = new HashFunction(this.vectorSize, this.nbHash, this.hashType);
}

From source file:org.apache.giraph.utils.bloom.Filter.java

License:Open Source License

public void readFields(DataInput in) throws IOException {
    int ver = in.readInt();
    if (ver > 0) { // old unversioned format
        this.nbHash = ver;
        this.hashType = Hash.JENKINS_HASH;
    } else if (ver == VERSION) {
        this.nbHash = in.readInt();
        this.hashType = in.readByte();
    } else {//from  w  ww.  j av a2 s. com
        throw new IOException("Unsupported version: " + ver);
    }
    this.vectorSize = in.readInt();
    this.hash = new HashFunction(this.vectorSize, this.nbHash, this.hashType);
}

From source file:org.apache.mahout.utils.nlp.collocations.llr.BloomTokenFilterTest.java

License:Apache License

private static Filter getFilter(String[] tokens) throws IOException {
    Filter filter = new BloomFilter(100, 50, Hash.JENKINS_HASH);
    Key k = new Key();
    for (String s : tokens) {
        setKey(k, s);//  w w  w  .  j  a  v a 2 s  .  c o  m
        filter.add(k);
    }
    return filter;
}

From source file:org.apache.pig.builtin.BuildBloomBase.java

License:Apache License

private int convertHashType(String hashType) {
    if (hashType.toLowerCase().contains("jenkins")) {
        return Hash.JENKINS_HASH;
    } else if (hashType.toLowerCase().contains("murmur")) {
        return Hash.MURMUR_HASH;
    } else {//from  w w  w. ja  v  a 2s .c  o m
        throw new RuntimeException("Unknown hash type " + hashType + ".  Valid values are jenkins and murmur.");
    }
}