SegmentIndexWriter.java :  » Search » bobo-browse » com » browseengine » local » service » tiger » Java Open Source

Java Open Source » Search » bobo browse 
bobo browse » com » browseengine » local » service » tiger » SegmentIndexWriter.java
/**
 * Bobo Browse Engine - High performance faceted/parametric search implementation 
 * that handles various types of semi-structured data.  Written in Java.
 * 
 * Copyright (C) 2006  Spackle
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 * 
 * To contact the project administrators for the bobo-browse project, 
 * please go to https://sourceforge.net/projects/bobo-browse/.
 */

package com.browseengine.local.service.tiger;

import java.io.File;
import java.io.IOException;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;

import com.browseengine.local.service.index.IndexConstants;
import com.browseengine.local.service.index.NamesFields;
import com.browseengine.local.service.index.RangesFields;
import com.browseengine.local.service.index.SegmentsFields;

/**
 * @author spackle
 *
 */
public class SegmentIndexWriter {
  private IndexWriter _segmentsWriter;
  private IndexWriter _namesWriter;
  private IndexWriter _rangesWriter;

  /**
   * assumes we can either create, or are in append mode
   * @param path
   * @throws TigerDataException
   * @throws IOException
   */
  public SegmentIndexWriter(File path) throws TigerDataException, IOException {
    if (path.exists()) {
      if (!path.isDirectory()) {
        throw new TigerDataException("path: "+path.getAbsolutePath()+" is not a valid directory");
      }
    } else if (!path.mkdirs()) {
      throw new TigerDataException("unable to create directory at path: "+path.getAbsolutePath());
    }
    Analyzer a = new AddressAnalyzer();
    // they should either all exist, or none exist
    File sdir = new File(path, IndexConstants.SEGMENTS_INDEX);
    File ndir = new File(path, IndexConstants.NAMES_INDEX);
    File rdir = new File(path, IndexConstants.RANGES_INDEX);
    boolean create;
    if (sdir.exists()) {
      if (!(sdir.exists() && sdir.isDirectory() &&
        ndir.exists() && ndir.isDirectory() &&
        rdir.exists() && rdir.isDirectory())) {
        throw new TigerDataException("one of sdir, ndir, rdir exists without the others, or one is not a directory, at path: "+path.getAbsolutePath());
      }
      // we have a valid existing set of directories
      create = false;
    } else {
      if (ndir.exists() || rdir.exists()) {
        throw new TigerDataException("one of sdir, ndir, rdir exists without the others, at path: "+path.getAbsolutePath());
      }
      // we have a valid set of directories to create
      create = true;
    }
    _segmentsWriter = new IndexWriter(sdir, a, create);
    _namesWriter = new IndexWriter(ndir, a, create);
    _rangesWriter = new IndexWriter(rdir, a, create);
  }
  
  public void addSegment(StorableSegment segment) throws IOException {
    Document doc = new Document();
    doc.add(new Field(SegmentsFields.TLID.getField(), ""+segment.getTLID(), 
        Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO));
    // NO_NORMS will not tokenize the city or state abbrev, so it will need to be an exact match
    if (segment.getPlaceL() != null) {
      doc.add(new Field(SegmentsFields.PLACEL.getField(), segment.getPlaceL(),
        Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO));
      doc.add(new Field(SegmentsFields.TOKENIZED_PLACEL.getField(), segment.getPlaceL(),
          Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO));      
    }
    if (segment.getPlaceR() != null) {
      doc.add(new Field(SegmentsFields.PLACER.getField(), segment.getPlaceR(),
          Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO));
      doc.add(new Field(SegmentsFields.TOKENIZED_PLACER.getField(), segment.getPlaceR(),
          Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO));
    }
    if (segment.getStateL() != null) {
      doc.add(new Field(SegmentsFields.STATEL.getField(), segment.getStateL(),
        Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO));
    }
    if (segment.getStateR() != null) {
      doc.add(new Field(SegmentsFields.STATER.getField(), segment.getStateR(),
        Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO));
    }
    doc.add(new Field(SegmentsFields.START_LON.getField(), 
        ""+((long)segment.getStartLon()),
        Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO));
    doc.add(new Field(SegmentsFields.START_LAT.getField(), 
        ""+((long)segment.getStartLat()),
        Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO));
    doc.add(new Field(SegmentsFields.END_LON.getField(), 
        ""+((long)segment.getEndLon()),
        Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO));
    doc.add(new Field(SegmentsFields.END_LAT.getField(), 
        ""+((long)segment.getEndLat()),
        Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO));
    this._segmentsWriter.addDocument(doc);
    
    // store the names and address ranges.
    StorableSegment.Name[] names = segment.getNames();
    if (names != null) {
      for (StorableSegment.Name name : names) {
        doc = new Document();
        doc.add(new Field(NamesFields.TLID.getField(), ""+segment.getTLID(), 
            Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO));
        if (name.getPrefix() != null) {
          doc.add(new Field(NamesFields.PREFIX.getField(), name.getPrefix(), 
            Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO));
        }
        // TODO: FOR NOW, WE SKIP OVER MISSING TYPE IN A NAME, THOSE WITH 
        // NO NAMES OR NO ADDRESS RANGES
        doc.add(new Field(NamesFields.NAME.getField(), name.getName(),
            Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO));
        doc.add(new Field(NamesFields.TOKENIZED_NAME.getField(), name.getName(),
            Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.NO));
        doc.add(new Field(NamesFields.TYPE.getField(), name.getType(),
            Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO));
        if (name.getSuffix() != null) {
          doc.add(new Field(NamesFields.SUFFIX.getField(), name.getSuffix(), 
              Field.Store.YES, Field.Index.TOKENIZED));
        }
        this._namesWriter.addDocument(doc);
      }
    }
    
    StorableSegment.NumberAndZip[] ranges;
    boolean isLeft;
    for (int i = 0; i < 2; i++) {
      isLeft = (i%2 == 0 ? true : false);
      ranges = (isLeft ? segment.getLefts() : segment.getRights());
      if (ranges != null) {
        for (StorableSegment.NumberAndZip range : ranges) {
          doc = new Document();
          doc.add(new Field(RangesFields.TLID.getField(), ""+segment.getTLID(), 
              Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO));
          doc.add(new Field(RangesFields.LEFT.getField(), (isLeft ? "1" : "0"), 
              Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO));
          doc.add(new Field(RangesFields.IS_NUMERIC.getField(), (range.isNumeric() ? "1" : "0"), 
              Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO));
          if (range.getFrAdd() != null) {
            doc.add(new Field(RangesFields.FROM.getField(), range.getFrAdd(), 
                Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.YES));
          }
          if (range.getToAdd() != null) {
            doc.add(new Field(RangesFields.TO.getField(), range.getToAdd(), 
                Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.YES));
          }
          if (range.getZip5() > 0) {
            doc.add(new Field(RangesFields.ZIP5.getField(), pad(range.getZip5(),5), 
                Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO));
          }
          this._rangesWriter.addDocument(doc);
        }
      }
    }
  }

  public synchronized void optimize() throws IOException {
    if (_segmentsWriter != null) {
      _segmentsWriter.optimize();
    }
    if (_namesWriter != null) {
      _namesWriter.optimize();
    }
    if (_rangesWriter != null) {
      _rangesWriter.optimize();
    }
  }
  
  public synchronized void close() throws IOException {
    try {
      if (_segmentsWriter != null) {
        _segmentsWriter.close();
      }
    } finally {
      try {
        if (_namesWriter != null) {
          _namesWriter.close();
        }
      } finally {
        try {
          if (_rangesWriter != null) {
            _rangesWriter.close();
          }
        } finally {
          _segmentsWriter = _namesWriter = _rangesWriter = null;
        }
      }
    }
  }
  
  /**
   * assumes: i > 0 && ndigits > 0 && ndigits < log(Integer.MAX_INT)
   * 
   * @param i
   * @param ndigits
   * @return
   */
  private static String pad(int i, int ndigits) {
    int val = 1;
    while (--ndigits > 0) {
      val *= 10;
    }
    StringBuilder buf = new StringBuilder();
    while (i < val) {
      // we are in a pad situation
      buf.append('0');
      val /= 10;
    }
    return buf.toString()+i;
  }
  
  public static void main(String[] argv) {
    int i;
    int ndigits = 5;
    
    i = 2215;
    System.out.println("for zip: "+i+", pad is: "+pad(i, ndigits));

    i = 94108;
    System.out.println("for zip: "+i+", pad is: "+pad(i, ndigits));

    i = 156;
    System.out.println("for zip: "+i+", pad is: "+pad(i, ndigits));
    i = 10;
    System.out.println("for zip: "+i+", pad is: "+pad(i, ndigits));
    i = 9;
    System.out.println("for zip: "+i+", pad is: "+pad(i, ndigits));
    i = 1;
    System.out.println("for zip: "+i+", pad is: "+pad(i, ndigits));
  }
}
java2s.com  | Contact Us | Privacy Policy
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.