001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.text;
018
019import org.apache.commons.lang3.Validate;
020
021import java.util.ArrayList;
022import java.util.HashSet;
023import java.util.List;
024import java.util.Set;
025import java.util.Collections;
026import java.util.concurrent.ThreadLocalRandom;
027
028/**
029 * <p>
030 * Generates random Unicode strings containing the specified number of code points.
031 * Instances are created using a builder class, which allows the
032 * callers to define the properties of the generator. See the documentation for the
033 * {@link Builder} class to see available properties.
034 * </p>
035 * <pre>
036 * // Generates a 20 code point string, using only the letters a-z
037 * RandomStringGenerator generator = new RandomStringGenerator.Builder()
038 *     .withinRange('a', 'z').build();
039 * String randomLetters = generator.generate(20);
040 * </pre>
041 * <pre>
042 * // Using Apache Commons RNG for randomness
043 * UniformRandomProvider rng = RandomSource.create(...);
044 * // Generates a 20 code point string, using only the letters a-z
045 * RandomStringGenerator generator = new RandomStringGenerator.Builder()
046 *     .withinRange('a', 'z')
047 *     .usingRandom(rng::nextInt) // uses Java 8 syntax
048 *     .build();
049 * String randomLetters = generator.generate(20);
050 * </pre>
051 * <p>
052 * {@code RandomStringBuilder} instances are thread-safe when using the
053 * default random number generator (RNG). If a custom RNG is set by calling the method
054 * {@link Builder#usingRandom(TextRandomProvider) Builder.usingRandom(TextRandomProvider)}, thread-safety
055 * must be ensured externally.
056 * </p>
057 * @since 1.1
058 */
059public final class RandomStringGenerator {
060
061    /**
062     * The smallest allowed code point (inclusive).
063     */
064    private final int minimumCodePoint;
065
066    /**
067     * The largest allowed code point (inclusive).
068     */
069    private final int maximumCodePoint;
070
071    /**
072     * Filters for code points.
073     */
074    private final Set<CharacterPredicate> inclusivePredicates;
075
076    /**
077     * The source of randomness for this generator.
078     */
079    private final TextRandomProvider random;
080
081    /**
082     * The source of provided characters.
083     */
084    private final List<Character> characterList;
085
086    /**
087     * Constructs the generator.
088     *
089     * @param minimumCodePoint
090     *            smallest allowed code point (inclusive)
091     * @param maximumCodePoint
092     *            largest allowed code point (inclusive)
093     * @param inclusivePredicates
094     *            filters for code points
095     * @param random
096     *            source of randomness
097     * @param characterList list of predefined set of characters.
098     */
099    private RandomStringGenerator(final int minimumCodePoint, final int maximumCodePoint,
100                                  final Set<CharacterPredicate> inclusivePredicates, final TextRandomProvider random,
101                                  final List<Character> characterList) {
102        this.minimumCodePoint = minimumCodePoint;
103        this.maximumCodePoint = maximumCodePoint;
104        this.inclusivePredicates = inclusivePredicates;
105        this.random = random;
106        this.characterList = characterList;
107    }
108
109    /**
110     * Generates a random number within a range, using a {@link ThreadLocalRandom} instance
111     * or the user-supplied source of randomness.
112     *
113     * @param minInclusive
114     *            the minimum value allowed
115     * @param maxInclusive
116     *            the maximum value allowed
117     * @return the random number.
118     */
119    private int generateRandomNumber(final int minInclusive, final int maxInclusive) {
120        if (random != null) {
121            return random.nextInt(maxInclusive - minInclusive + 1) + minInclusive;
122        }
123        return ThreadLocalRandom.current().nextInt(minInclusive, maxInclusive + 1);
124    }
125
126    /**
127     * Generates a random number within a range, using a {@link ThreadLocalRandom} instance
128     * or the user-supplied source of randomness.
129     *
130     * @param characterList predefined char list.
131     * @return the random number.
132     */
133    private int generateRandomNumber(final List<Character> characterList) {
134        final int listSize = characterList.size();
135        if (random != null) {
136            return String.valueOf(characterList.get(random.nextInt(listSize))).codePointAt(0);
137        }
138        return String.valueOf(characterList.get(ThreadLocalRandom.current().nextInt(0, listSize))).codePointAt(0);
139    }
140
141    /**
142     * <p>
143     * Generates a random string, containing the specified number of code points.
144     * </p>
145     * <p>Code points are randomly selected between the minimum and maximum values defined
146     * in the generator.
147     * Surrogate and private use characters are not returned, although the
148     * resulting string may contain pairs of surrogates that together encode a
149     * supplementary character.
150     * </p>
151     * <p>
152     * Note: the number of {@code char} code units generated will exceed
153     * {@code length} if the string contains supplementary characters. See the
154     * {@link Character} documentation to understand how Java stores Unicode
155     * values.
156     * </p>
157     *
158     * @param length
159     *            the number of code points to generate
160     * @return the generated string
161     * @throws IllegalArgumentException
162     *             if {@code length < 0}
163     */
164    public String generate(final int length) {
165        if (length == 0) {
166            return "";
167        }
168        Validate.isTrue(length > 0, "Length %d is smaller than zero.", length);
169
170        final StringBuilder builder = new StringBuilder(length);
171        long remaining = length;
172
173        do {
174            int codePoint;
175            if (characterList != null && !characterList.isEmpty()) {
176                codePoint = generateRandomNumber(characterList);
177            } else {
178                codePoint = generateRandomNumber(minimumCodePoint, maximumCodePoint);
179            }
180            switch (Character.getType(codePoint)) {
181            case Character.UNASSIGNED:
182            case Character.PRIVATE_USE:
183            case Character.SURROGATE:
184                continue;
185            default:
186            }
187
188            if (inclusivePredicates != null) {
189                boolean matchedFilter = false;
190                for (final CharacterPredicate predicate : inclusivePredicates) {
191                    if (predicate.test(codePoint)) {
192                        matchedFilter = true;
193                        break;
194                    }
195                }
196                if (!matchedFilter) {
197                    continue;
198                }
199            }
200
201            builder.appendCodePoint(codePoint);
202            remaining--;
203
204        } while (remaining != 0);
205
206        return builder.toString();
207    }
208
209    /**
210     * Generates a random string, containing between the minimum (inclusive) and the maximum (inclusive)
211     * number of code points.
212     *
213     * @param minLengthInclusive
214     *            the minimum (inclusive) number of code points to generate
215     * @param maxLengthInclusive
216     *            the maximum (inclusive) number of code points to generate
217     * @return the generated string
218     * @throws IllegalArgumentException
219     *             if {@code minLengthInclusive < 0}, or {@code maxLengthInclusive < minLengthInclusive}
220     * @see RandomStringGenerator#generate(int)
221     * @since 1.2
222     */
223    public String generate(final int minLengthInclusive, final int maxLengthInclusive) {
224        Validate.isTrue(minLengthInclusive >= 0, "Minimum length %d is smaller than zero.", minLengthInclusive);
225        Validate.isTrue(minLengthInclusive <= maxLengthInclusive,
226                "Maximum length %d is smaller than minimum length %d.", maxLengthInclusive, minLengthInclusive);
227        return generate(generateRandomNumber(minLengthInclusive, maxLengthInclusive));
228    }
229
230    /**
231     * <p>A builder for generating {@code RandomStringGenerator} instances.</p>
232     * <p>The behaviour of a generator is controlled by properties set by this
233     * builder. Each property has a default value, which can be overridden by
234     * calling the methods defined in this class, prior to calling {@link #build()}.</p>
235     *
236     * <p>All the property setting methods return the {@code Builder} instance to allow for method chaining.</p>
237     *
238     * <p>The minimum and maximum code point values are defined using {@link #withinRange(int, int)}. The
239     * default values are {@code 0} and {@link Character#MAX_CODE_POINT} respectively.</p>
240     *
241     * <p>The source of randomness can be set using {@link #usingRandom(TextRandomProvider)},
242     * otherwise {@link ThreadLocalRandom} is used.</p>
243     *
244     * <p>The type of code points returned can be filtered using {@link #filteredBy(CharacterPredicate...)},
245     * which defines a collection of tests that are applied to the randomly generated code points.
246     * The code points will only be included in the result if they pass at least one of the tests.
247     * Some commonly used predicates are provided by the {@link CharacterPredicates} enum.</p>
248     *
249     * <p>This class is not thread safe.</p>
250     * @since 1.1
251     */
252    public static class Builder implements org.apache.commons.text.Builder<RandomStringGenerator> {
253
254        /**
255         * The default maximum code point allowed: {@link Character#MAX_CODE_POINT}
256         * ({@value}).
257         */
258        public static final int DEFAULT_MAXIMUM_CODE_POINT = Character.MAX_CODE_POINT;
259
260        /**
261         * The default string length produced by this builder: {@value}.
262         */
263        public static final int DEFAULT_LENGTH = 0;
264
265        /**
266         * The default minimum code point allowed: {@value}.
267         */
268        public static final int DEFAULT_MINIMUM_CODE_POINT = 0;
269
270        /**
271         * The minimum code point allowed.
272         */
273        private int minimumCodePoint = DEFAULT_MINIMUM_CODE_POINT;
274
275        /**
276         * The maximum code point allowed.
277         */
278        private int maximumCodePoint = DEFAULT_MAXIMUM_CODE_POINT;
279
280        /**
281         * Filters for code points.
282         */
283        private Set<CharacterPredicate> inclusivePredicates;
284
285        /**
286         * The source of randomness.
287         */
288        private TextRandomProvider random;
289
290        /**
291         * The source of provided characters.
292         */
293        private List<Character> characterList;
294
295        /**
296         * <p>
297         * Specifies the minimum and maximum code points allowed in the
298         * generated string.
299         * </p>
300         *
301         * @param minimumCodePoint
302         *            the smallest code point allowed (inclusive)
303         * @param maximumCodePoint
304         *            the largest code point allowed (inclusive)
305         * @return {@code this}, to allow method chaining
306         * @throws IllegalArgumentException
307         *             if {@code maximumCodePoint >}
308         *             {@link Character#MAX_CODE_POINT}
309         * @throws IllegalArgumentException
310         *             if {@code minimumCodePoint < 0}
311         * @throws IllegalArgumentException
312         *             if {@code minimumCodePoint > maximumCodePoint}
313         */
314        public Builder withinRange(final int minimumCodePoint, final int maximumCodePoint) {
315            Validate.isTrue(minimumCodePoint <= maximumCodePoint,
316                    "Minimum code point %d is larger than maximum code point %d", minimumCodePoint, maximumCodePoint);
317            Validate.isTrue(minimumCodePoint >= 0, "Minimum code point %d is negative", minimumCodePoint);
318            Validate.isTrue(maximumCodePoint <= Character.MAX_CODE_POINT,
319                    "Value %d is larger than Character.MAX_CODE_POINT.", maximumCodePoint);
320
321            this.minimumCodePoint = minimumCodePoint;
322            this.maximumCodePoint = maximumCodePoint;
323            return this;
324        }
325
326        /**
327         * <p>
328         * Specifies the array of minimum and maximum char allowed in the
329         * generated string.
330         * </p>
331         *
332         * For example:
333         * <pre>
334         * {@code
335         *     char [][] pairs = {{'0','9'}};
336         *     char [][] pairs = {{'a','z'}};
337         *     char [][] pairs = {{'a','z'},{'0','9'}};
338         * }
339         * </pre>
340         *
341         * @param pairs array of characters array, expected is to pass min, max pairs through this arg.
342         * @return {@code this}, to allow method chaining.
343         */
344        public Builder withinRange(final char[]... pairs) {
345            characterList = new ArrayList<>();
346            for (final char[] pair :  pairs) {
347                Validate.isTrue(pair.length == 2,
348                      "Each pair must contain minimum and maximum code point");
349                final int minimumCodePoint = pair[0];
350                final int maximumCodePoint = pair[1];
351                Validate.isTrue(minimumCodePoint <= maximumCodePoint,
352                    "Minimum code point %d is larger than maximum code point %d", minimumCodePoint, maximumCodePoint);
353
354                for (int index = minimumCodePoint; index <= maximumCodePoint; index++) {
355                    characterList.add((char) index);
356                }
357            }
358            return this;
359
360        }
361
362        /**
363         * <p>
364         * Limits the characters in the generated string to those that match at
365         * least one of the predicates supplied.
366         * </p>
367         *
368         * <p>
369         * Passing {@code null} or an empty array to this method will revert to the
370         * default behaviour of allowing any character. Multiple calls to this
371         * method will replace the previously stored predicates.
372         * </p>
373         *
374         * @param predicates
375         *            the predicates, may be {@code null} or empty
376         * @return {@code this}, to allow method chaining
377         */
378        public Builder filteredBy(final CharacterPredicate... predicates) {
379            if (predicates == null || predicates.length == 0) {
380                inclusivePredicates = null;
381                return this;
382            }
383
384            if (inclusivePredicates == null) {
385                inclusivePredicates = new HashSet<>();
386            } else {
387                inclusivePredicates.clear();
388            }
389
390            Collections.addAll(inclusivePredicates, predicates);
391
392            return this;
393        }
394
395        /**
396         * <p>
397         * Overrides the default source of randomness.  It is highly
398         * recommended that a random number generator library like
399         * <a href="http://commons.apache.org/proper/commons-rng/">Apache Commons RNG</a>
400         * be used to provide the random number generation.
401         * </p>
402         *
403         * <p>
404         * When using Java 8 or later, {@link TextRandomProvider} is a
405         * functional interface and need not be explicitly implemented:
406         * </p>
407         * <pre>
408         * {@code
409         *     UniformRandomProvider rng = RandomSource.create(...);
410         *     RandomStringGenerator gen = new RandomStringGenerator.Builder()
411         *         .usingRandom(rng::nextInt)
412         *         // additional builder calls as needed
413         *         .build();
414         * }
415         * </pre>
416         *
417         * <p>
418         * Passing {@code null} to this method will revert to the default source of
419         * randomness.
420         * </p>
421         *
422         * @param random
423         *            the source of randomness, may be {@code null}
424         * @return {@code this}, to allow method chaining
425         */
426        public Builder usingRandom(final TextRandomProvider random) {
427            this.random = random;
428            return this;
429        }
430
431        /**
432         * <p>
433         * Limits the characters in the generated string to those who match at
434         * supplied list of Character.
435         * </p>
436         *
437         * <p>
438         * Passing {@code null} or an empty array to this method will revert to the
439         * default behaviour of allowing any character. Multiple calls to this
440         * method will replace the previously stored Character.
441         * </p>
442         *
443         * @param chars set of predefined Characters for random string generation
444         *            the Character can be, may be {@code null} or empty
445         * @return {@code this}, to allow method chaining
446         * @since 1.2
447         */
448        public Builder selectFrom(final char... chars) {
449            characterList = new ArrayList<>();
450            for (final char c : chars) {
451                characterList.add(c);
452            }
453            return this;
454        }
455
456        /**
457         * <p>Builds the {@code RandomStringGenerator} using the properties specified.</p>
458         * @return the configured {@code RandomStringGenerator}
459         */
460        @Override
461        public RandomStringGenerator build() {
462            return new RandomStringGenerator(minimumCodePoint, maximumCodePoint, inclusivePredicates,
463                    random, characterList);
464        }
465    }
466}