part1.IndexBusiness.java Source code

Java tutorial

Introduction

Here is the source code for part1.IndexBusiness.java

Source

package part1;

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.nio.file.Paths;
import java.util.Set;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.BytesRef;

import com.google.gson.Gson;
import com.google.gson.JsonObject;

/**
 *
 * @author tejashree
 */
public class IndexBusiness {

    /**
     * @param args
     *            the command line arguments
     */
    public static void main(String[] args) throws FileNotFoundException, IOException {
        // TODO code application logic here
        Analyzer analyzer = new StandardAnalyzer();
        Directory dir = FSDirectory.open(Paths.get("index_business"));
        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
        iwc.setOpenMode(OpenMode.CREATE);
        IndexWriter writer = new IndexWriter(dir, iwc);

        Gson gson = new Gson();
        File f = new File("/Users/arpitkhandelwal/Downloads/yelp_dataset_challenge_academic_dataset/business.json");
        BufferedReader reader = new BufferedReader(new FileReader(f));
        String line = null;
        int count = 0;
        try {
            while ((line = reader.readLine()) != null) {
                JsonObject r = gson.fromJson(line, JsonObject.class);
                // System.out.println(r.get("attributes").toString());
                JsonObject a = gson.fromJson(r.get("attributes").toString(), JsonObject.class);
                // a.keySet();
                Set alist = a.entrySet();

                Business r1 = gson.fromJson(line, Business.class);
                Document luceneDoc = new Document();
                luceneDoc.add(new TextField("fulladdress", r1.full_address, Field.Store.YES));
                luceneDoc.add(new StringField("businessid", r1.business_id, Field.Store.YES));
                luceneDoc.add(new StringField("city", r1.city, Field.Store.YES));
                luceneDoc.add(new StringField("open", String.valueOf(r1.open), Field.Store.YES));
                luceneDoc.add(new StringField("name", r1.name, Field.Store.YES));
                luceneDoc.add(new StoredField("longitude", r1.longitude));
                luceneDoc.add(new StoredField("latitude", r1.latitude));
                luceneDoc.add(new StoredField("reviewcount", r1.review_count));
                luceneDoc.add(new StringField("state", r1.state, Field.Store.YES));
                luceneDoc.add(new StoredField("stars", r1.stars));
                luceneDoc.add(new StringField("type", r1.type, Field.Store.YES));
                for (String c : r1.categories)
                    luceneDoc.add(new SortedSetDocValuesField("category", new BytesRef(c)));
                for (String c : r1.neighborhoods)
                    luceneDoc.add(new SortedSetDocValuesField("neighborhoods", new BytesRef(c)));
                for (Object attribute : alist) {
                    String attributedetail[] = attribute.toString().split("=");
                    String key = attributedetail[0].trim();
                    String value = attributedetail[1].trim();
                    luceneDoc.add(new TextField(key, value, Field.Store.YES));
                }
                writer.addDocument(luceneDoc);
            }

        } catch (Exception e) {
            System.out.println(line);
            e.printStackTrace();
        } finally {
            reader.close();
            writer.close();
        }

    }

}