001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.collections4.bloomfilter;
018
019import java.util.Objects;
020import java.util.TreeSet;
021import java.util.function.IntPredicate;
022import java.util.function.LongPredicate;
023
024/**
025 * A bloom filter using a TreeSet of integers to track enabled bits. This is a standard
026 * implementation and should work well for most low cardinality Bloom filters.
027 *
028 * @since 4.5.0-M1
029 */
030public final class SparseBloomFilter implements BloomFilter<SparseBloomFilter> {
031
032    /**
033     * The bitSet that defines this BloomFilter.
034     */
035    private final TreeSet<Integer> indices;
036
037    /**
038     * The shape of this BloomFilter.
039     */
040    private final Shape shape;
041
042    /**
043     * Constructs an empty BitSetBloomFilter.
044     *
045     * @param shape The shape of the filter.
046     */
047    public SparseBloomFilter(final Shape shape) {
048        Objects.requireNonNull(shape, "shape");
049        this.shape = shape;
050        this.indices = new TreeSet<>();
051    }
052
053    private SparseBloomFilter(final SparseBloomFilter source) {
054        shape = source.shape;
055        indices = new TreeSet<>(source.indices);
056    }
057
058    /**
059     * Adds the index to the indices.
060     *
061     * @param idx the index to add.
062     * @return {@code true} always
063     */
064    private boolean add(final int idx) {
065        indices.add(idx);
066        return true;
067    }
068
069    @Override
070    public long[] asBitMapArray() {
071        final long[] result = BitMaps.newBitMap(shape);
072        for (final int i : indices) {
073            BitMaps.set(result, i);
074        }
075        return result;
076    }
077
078    @Override
079    public int cardinality() {
080        return indices.size();
081    }
082
083    @Override
084    public int characteristics() {
085        return SPARSE;
086    }
087
088    @Override
089    public void clear() {
090        indices.clear();
091    }
092
093    @Override
094    public boolean contains(final BitMapExtractor bitMapExtractor) {
095        return contains(IndexExtractor.fromBitMapExtractor(bitMapExtractor));
096    }
097
098    @Override
099    public boolean contains(final IndexExtractor indexExtractor) {
100        return indexExtractor.processIndices(indices::contains);
101    }
102
103    /**
104     * Creates a new instance of this {@link SparseBloomFilter} with the same properties as the current one.
105     *
106     * @return a copy of this {@link SparseBloomFilter}.
107     */
108    @Override
109    public SparseBloomFilter copy() {
110        return new SparseBloomFilter(this);
111    }
112
113    @Override
114    public Shape getShape() {
115        return shape;
116    }
117
118    @Override
119    public boolean isEmpty() {
120        return indices.isEmpty();
121    }
122
123    @Override
124    public boolean merge(final BitMapExtractor bitMapExtractor) {
125        Objects.requireNonNull(bitMapExtractor, "bitMapExtractor");
126        return this.merge(IndexExtractor.fromBitMapExtractor(bitMapExtractor));
127    }
128
129    @Override
130    public boolean merge(final BloomFilter<?> other) {
131        Objects.requireNonNull(other, "other");
132        final IndexExtractor indexExtractor = (other.characteristics() & SPARSE) != 0 ? (IndexExtractor) other : IndexExtractor.fromBitMapExtractor(other);
133        merge(indexExtractor);
134        return true;
135    }
136
137    @Override
138    public boolean merge(final Hasher hasher) {
139        Objects.requireNonNull(hasher, "hasher");
140        merge(hasher.indices(shape));
141        return true;
142    }
143
144    @Override
145    public boolean merge(final IndexExtractor indexExtractor) {
146        Objects.requireNonNull(indexExtractor, "indexExtractor");
147        indexExtractor.processIndices(this::add);
148        if (!indices.isEmpty()) {
149            if (indices.last() >= shape.getNumberOfBits()) {
150                throw new IllegalArgumentException(String.format("Value in list %s is greater than maximum value (%s)",
151                        indices.last(), shape.getNumberOfBits() - 1));
152            }
153            if (indices.first() < 0) {
154                throw new IllegalArgumentException(
155                        String.format("Value in list %s is less than 0", indices.first()));
156            }
157        }
158        return true;
159    }
160
161    @Override
162    public boolean processBitMaps(final LongPredicate consumer) {
163        Objects.requireNonNull(consumer, "consumer");
164        final int limit = BitMaps.numberOfBitMaps(shape);
165        //
166        // because our indices are always in order we can shorten the time necessary to
167        // create the longs for the consumer
168        //
169        // the currently constructed bitMap
170        long bitMap = 0;
171        // the bitmap we are working on
172        int idx = 0;
173        for (final int i : indices) {
174            while (BitMaps.getLongIndex(i) != idx) {
175                if (!consumer.test(bitMap)) {
176                    return false;
177                }
178                bitMap = 0;
179                idx++;
180            }
181            bitMap |= BitMaps.getLongBit(i);
182        }
183        // we fall through with data in the bitMap
184        if (!consumer.test(bitMap)) {
185            return false;
186        }
187        // account for hte bitMap in the previous block + the next one
188        idx++;
189        // while there are more blocks to generate send zero to the consumer.
190        while (idx < limit) {
191            if (!consumer.test(0L)) {
192                return false;
193            }
194            idx++;
195        }
196        return true;
197    }
198
199    @Override
200    public boolean processIndices(final IntPredicate consumer) {
201        Objects.requireNonNull(consumer, "consumer");
202        return indices.stream().allMatch(consumer::test);
203    }
204}