com.dasasian.chok.lucene.DocumentFrequencyWritable.java Source code

Java tutorial

Introduction

Here is the source code for com.dasasian.chok.lucene.DocumentFrequencyWritable.java

Source

/**
 * Copyright (C) 2014 Dasasian (damith@dasasian.com)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.dasasian.chok.lucene;

import com.google.common.base.MoreObjects;
import org.apache.hadoop.io.Writable;
import org.mortbay.log.Log;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.locks.ReadWriteLock;
import java.util.concurrent.locks.ReentrantReadWriteLock;

public class DocumentFrequencyWritable implements Writable {
    private ReadWriteLock _frequenciesLock = new ReentrantReadWriteLock(true);
    private Map<TermWritable, Integer> _frequencies = new HashMap<>();

    private AtomicLong _numDocs = new AtomicLong();

    public void put(final String field, final String term, final int frequency) {
        _frequenciesLock.writeLock().lock();
        try {
            add(new TermWritable(field, term), frequency);
        } finally {
            _frequenciesLock.writeLock().unlock();
        }
    }

    /**
     * Assumes a write lock is already in place.
     *
     * @param key       The item that has a frequency.
     * @param frequency The frequency of the key.
     */
    private void add(final TermWritable key, final int frequency) {
        int result = frequency;
        final Integer frequencyObject = _frequencies.get(key);
        if (frequencyObject != null) {
            result += frequencyObject;
        }
        _frequencies.put(key, result);
    }

    public void putAll(final Map<TermWritable, Integer> frequencyMap) {
        _frequenciesLock.writeLock().lock();
        try {
            final Set<TermWritable> keySet = frequencyMap.keySet();
            for (final TermWritable key : keySet) {
                add(key, frequencyMap.get(key));
            }
        } finally {
            _frequenciesLock.writeLock().unlock();
        }
    }

    public Integer get(final String field, final String term) {
        return get(new TermWritable(field, term));
    }

    public void addNumDocs(long numDocs) {
        if (Long.MAX_VALUE - numDocs - _numDocs.get() < 0) {
            Log.warn("max number of documents exceeded " + _numDocs.get() + " + " + numDocs);
            numDocs = Long.MAX_VALUE;
        }
        _numDocs.addAndGet(numDocs);
    }

    public Integer get(final TermWritable key) {
        _frequenciesLock.readLock().lock();
        try {
            return _frequencies.get(key);
        } finally {
            _frequenciesLock.readLock().unlock();
        }
    }

    public Map<TermWritable, Integer> getAll() {
        return Collections.unmodifiableMap(_frequencies);
    }

    public void readFields(final DataInput in) throws IOException {
        _frequenciesLock.writeLock().lock();
        try {
            final int size = in.readInt();
            for (int i = 0; i < size; i++) {
                final TermWritable term = new TermWritable();
                term.readFields(in);
                final int frequency = in.readInt();
                _frequencies.put(term, frequency);
            }
            _numDocs.set(in.readLong());
        } finally {
            _frequenciesLock.writeLock().unlock();
        }
    }

    public void write(final DataOutput out) throws IOException {
        _frequenciesLock.readLock().lock();
        try {
            out.writeInt(_frequencies.size());
            for (final TermWritable key : _frequencies.keySet()) {
                key.write(out);
                final Integer frequency = _frequencies.get(key);
                out.writeInt(frequency);
            }
            out.writeLong(_numDocs.get());
        } finally {
            _frequenciesLock.readLock().unlock();
        }
    }

    public long getNumDocs() {
        return _numDocs.get();
    }

    public int getNumDocsAsInteger() {
        if (_numDocs.get() > Integer.MAX_VALUE) {
            return Integer.MAX_VALUE;
        }
        return (int) _numDocs.get();
    }

    @Override
    public String toString() {
        return MoreObjects.toStringHelper(this).add("totalNumberOfDocs", getNumDocs())
                .add("termFrequencies", getAll()).toString();
    }
}