com.browseengine.bobo.tools.CarDataDigest.java Source code

Java tutorial

Introduction

Here is the source code for com.browseengine.bobo.tools.CarDataDigest.java

Source

/**
 * Bobo Browse Engine - High performance faceted/parametric search implementation 
 * that handles various types of semi-structured data.  Written in Java.
 * 
 * Copyright (C) 2005-2006  John Wang
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 * 
 * To contact the project administrators for the bobo-browse project, 
 * please go to https://sourceforge.net/projects/bobo-browse/, or 
 * send mail to owner@browseengine.com.
 */

package com.browseengine.bobo.tools;

import java.io.File;
import java.io.IOException;
import java.util.Random;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.store.FSDirectory;

import com.browseengine.bobo.index.digest.FileDigester;

// used to hack up some cars for data
public class CarDataDigest extends FileDigester {

    private static final String[] cars = new String[] { "Asian/Toyota/Prius,Hybrid,Silver,Gold,Blue,White,Magenta",
            "Asian/Toyota/Avalon,Compact,Black,White,Gold,Silver",
            "Asian/Honda/Insight,Hybrid,Black,White,Blue,Silver",
            "Asian/Honda/Civic Hybrid,Hybrid,Black,White,Blue,Silver,Green",
            "European/Volkswagen/Jetta,Compact,Cold,Silver,White,Blue",
            "European/Volkswagen/Passat,Compact,Cold,Silver,White,Blue",
            "European/Volkswagen/Beetle,Compact,Red,Green,Yellow,Blue,Black",
            "European/Volkswagen/Santana,Compact,Red,Blue,Black",
            "North American/Plymouth/Horizon,Compact,Blue,White,Green,Silver",
            "North American/Chevrolet/Corvette,Exotic,Red,Yellow,Black" };

    private static final String[] cities = new String[] { "Australia/Gold Coast", "Australia/Melbourn",
            "Australia/Perth", "Australia/Sydney", "Australia/Wollongong", "Canada/Calgary", "Canada/Montreal",
            "Canada/Toronto", "Canada/Vancouver", "U.S.A./California/Los Angeles", "U.S.A./California/Sacramento",
            "U.S.A./California/San Diego", "U.S.A./California/San Francisco", "U.S.A./California/San Jose",
            "U.S.A./California/Sunnyvale", "U.S.A./Florida/Miami", "U.S.A./Florida/Orlando",
            "U.S.A./Florida/Palm Beach", "U.S.A./Florida/Tampa", "U.S.A./New York/Albany",
            "U.S.A./New York/Binghamton", "U.S.A./New York/Buffalo", "U.S.A./New York/New York",
            "U.S.A./New York/Rochester", "U.S.A./New York/Syracuse", "U.S.A./Taxas/Austin", "U.S.A./Taxas/Dallas",
            "U.S.A./Texas/Houston", "U.S.A./Texas/San Antonio", "U.S.A./Utah/Provo", "U.S.A./Utah/Salt Lake City",
            "U.S.A./Washington D.C." };

    private static void makeCar(Document car, Document doc) {

        car.add(new Field("color", doc.get("color"), Store.YES, Index.NOT_ANALYZED));

        car.add(new Field("category", doc.get("category"), Store.YES, Index.NOT_ANALYZED));
        car.add(new Field("city", doc.get("city"), Store.YES, Index.NOT_ANALYZED));
        car.add(new Field("makemodel", doc.get("makemodel"), Store.YES, Index.NOT_ANALYZED));
        car.add(new Field("price", doc.get("price"), Store.YES, Index.NOT_ANALYZED));
        car.add(new Field("year", doc.get("year"), Store.YES, Index.NOT_ANALYZED));
        car.add(new Field("mileage", doc.get("mileage"), Store.YES, Index.NOT_ANALYZED));
    }

    private static void makeCar(Document car, String carLine) {
        String[] parts = carLine.split(",");
        int numColors = parts.length - 2;
        String make = parts[0];
        String category = parts[1];

        Random rand = new Random();
        int colorIdx = rand.nextInt(numColors);
        int cityIdx = rand.nextInt(cities.length);

        String color = parts[colorIdx + 2];
        String city = cities[cityIdx];

        String year = rand.nextInt(10) + 1993 + "";

        String price = "" + (rand.nextInt(174) + 21) * 100;
        String mileage = "" + (rand.nextInt(80) + 101) * 100;

        car.add(new Field("color", color, Store.YES, Index.NOT_ANALYZED));
        car.add(new Field("category", category, Store.YES, Index.NOT_ANALYZED));
        car.add(new Field("city", city, Store.YES, Index.NOT_ANALYZED));
        car.add(new Field("makemodel", make, Store.YES, Index.NOT_ANALYZED));
        car.add(new Field("price", price, Store.YES, Index.NOT_ANALYZED));
        car.add(new Field("year", year, Store.YES, Index.NOT_ANALYZED));
        car.add(new Field("mileage", mileage, Store.YES, Index.NOT_ANALYZED));
    }

    public CarDataDigest(File file) {
        super(file);
    }

    public void digest(DataHandler handler) throws IOException {
        int numcars = getMaxDocs();
        Random rand = new Random();

        IndexReader reader = null;
        try {
            reader = IndexReader.open(FSDirectory.open(getDataFile()), true);
            int carcount = reader.maxDoc();

            Document[] docCache = new Document[carcount];
            for (int i = 0; i < carcount; ++i) {
                docCache[i] = reader.document(i);
            }

            for (int i = 0; i < numcars; ++i) {
                if (i != 0 && i % 1000 == 0) {
                    System.out.println(i + " cars indexed.");
                }
                Document doc = new Document();
                int n = rand.nextInt(10);
                if (n == 0) {
                    makeCar(doc, cars[rand.nextInt(cars.length)]);
                } else {
                    Document srcDoc = docCache[rand.nextInt(carcount)];
                    makeCar(doc, srcDoc);
                }

                populateDocument(doc, null);
                handler.handleDocument(doc);
            }
        } finally {
            if (reader != null) {
                reader.close();
            }
        }
    }

}