bulkload.UploadIndexes.java Source code

Java tutorial

Introduction

Here is the source code for bulkload.UploadIndexes.java

Source

/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package bulkload;

import java.io.*;
import java.util.*;

import com.carrotsearch.hppc.IntArrayList;
import org.apache.cassandra.config.Config;
import org.apache.cassandra.dht.Murmur3Partitioner;
import org.apache.cassandra.exceptions.InvalidRequestException;
import org.apache.cassandra.io.sstable.CQLSSTableWriter;

/**
 * Usage: java bulkload.BulkLoad2
 */
public class UploadIndexes {
    //////////////////////////////////////////////////////////////////////////////////////////////////////////////

    private static final String SCHEMA_2 = String.format("CREATE TABLE %s.%s (" + "predicate int, " + "word int, "
            + "tuples list<int>, " + "PRIMARY KEY ((predicate, word)) " + ") ", Constants.KEYSPACE,
            Constants.CF_INDEX);

    private static final String INSERT_STMT_2 = String.format(
            "INSERT INTO %s.%s (predicate, word, tuples) VALUES (?, ?, ?)", Constants.KEYSPACE, Constants.CF_INDEX);

    //////////////////////////////////////////////////////////////////////////////////////////////////////////////

    private static List<Integer> toIntList(String line) {
        List<Integer> list = new ArrayList<>();
        String[] parts = line.split(",");
        for (String part : parts) {
            int id = Integer.parseInt(part);
            list.add(id);
        }
        return list;
    }

    //////////////////////////////////////////////////////////////////////////////////////////////////////////////

    public static void main(String[] args) {
        if (args.length != 1) {
            System.out.println(
                    "usage: java bulkload.UploadIndexes /path/to/inverted_indexes.txt (see bulkload.CreateInvertedIndices)");
            return;
        }

        String inverted_indexes_file = args[0];

        // magic!
        Config.setClientMode(true);

        // create inverted indexes
        {
            File outputDir2 = new File(Constants.KEYSPACE + File.separator + Constants.CF_INDEX);
            if (!outputDir2.exists() && !outputDir2.mkdirs()) {
                throw new RuntimeException("Cannot create output directory: " + outputDir2);
            }

            // Prepare SSTable writer
            CQLSSTableWriter.Builder builder = CQLSSTableWriter.builder();
            // set output directory
            builder.inDirectory(outputDir2)
                    // set target schema
                    .forTable(SCHEMA_2)
                    // set CQL statement to put data
                    .using(INSERT_STMT_2)
                    // set partitioner if needed
                    // default is Murmur3Partitioner so set if you use different one.
                    .withPartitioner(new Murmur3Partitioner());

            CQLSSTableWriter writer = builder.build();

            try (BufferedReader reader = new BufferedReader(
                    new InputStreamReader(new FileInputStream(inverted_indexes_file)))) {

                int counter = 0;
                String line;
                while ((line = reader.readLine()) != null) {
                    String[] parts = line.split(",");

                    if (parts.length > 1) {

                        long id = Long.parseLong(parts[0]);
                        long l_word_id = id >> 32L;
                        int word_id = (int) l_word_id;
                        int predicate = (int) id;

                        List<Integer> intList = new ArrayList<>();
                        for (int i = 1; i < parts.length; i++) {
                            intList.add(Integer.parseInt(parts[i]));
                        }

                        Collections.sort(intList);

                        if (intList.size() > 0) {
                            try {
                                writer.addRow(predicate, word_id, intList);
                            } catch (InvalidRequestException | IOException e) {
                                e.printStackTrace();
                            }
                        }
                        counter += 1;
                        if ((counter % 1_000_000) == 0) {
                            System.out.println(counter);
                        }

                    }

                } // while loop

            } catch (InvalidRequestException | IOException e) {
                e.printStackTrace();
            }

            try {
                writer.close();
            } catch (IOException ignore) {
            }
        }

        System.out.println("done");
        String path = Constants.KEYSPACE + File.separator + Constants.CF_INDEX;
        System.out.println("you can upload these files to Cassandra: sstableloader -d host " + path);
        System.exit(0);

    } // main

}