001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.text; 018 019import java.util.Arrays; 020 021import org.apache.commons.text.matcher.StringMatcherFactory; 022 023/** 024 * A matcher class that can be queried to determine if a character array 025 * portion matches. 026 * <p> 027 * This class comes complete with various factory methods. 028 * If these do not suffice, you can subclass and implement your own matcher. 029 * 030 * @since 1.0 031 * @deprecated Deprecated as of 1.3, use {@link StringMatcherFactory} instead. This class will be removed in 2.0. 032 */ 033@Deprecated 034public abstract class StrMatcher { 035 036 /** 037 * Matches the comma character. 038 */ 039 private static final StrMatcher COMMA_MATCHER = new CharMatcher(','); 040 041 /** 042 * Matches the tab character. 043 */ 044 private static final StrMatcher TAB_MATCHER = new CharMatcher('\t'); 045 046 /** 047 * Matches the space character. 048 */ 049 private static final StrMatcher SPACE_MATCHER = new CharMatcher(' '); 050 051 /** 052 * Matches the same characters as StringTokenizer, 053 * namely space, tab, newline, form feed. 054 */ 055 private static final StrMatcher SPLIT_MATCHER = new CharSetMatcher(" \t\n\r\f".toCharArray()); 056 057 /** 058 * Matches the String trim() whitespace characters. 059 */ 060 private static final StrMatcher TRIM_MATCHER = new TrimMatcher(); 061 062 /** 063 * Matches the double quote character. 064 */ 065 private static final StrMatcher SINGLE_QUOTE_MATCHER = new CharMatcher('\''); 066 067 /** 068 * Matches the double quote character. 069 */ 070 private static final StrMatcher DOUBLE_QUOTE_MATCHER = new CharMatcher('"'); 071 072 /** 073 * Matches the single or double quote character. 074 */ 075 private static final StrMatcher QUOTE_MATCHER = new CharSetMatcher("'\"".toCharArray()); 076 077 /** 078 * Matches no characters. 079 */ 080 private static final StrMatcher NONE_MATCHER = new NoMatcher(); 081 082 // ----------------------------------------------------------------------- 083 084 /** 085 * Returns a matcher which matches the comma character. 086 * 087 * @return a matcher for a comma 088 */ 089 public static StrMatcher commaMatcher() { 090 return COMMA_MATCHER; 091 } 092 093 /** 094 * Returns a matcher which matches the tab character. 095 * 096 * @return a matcher for a tab 097 */ 098 public static StrMatcher tabMatcher() { 099 return TAB_MATCHER; 100 } 101 102 /** 103 * Returns a matcher which matches the space character. 104 * 105 * @return a matcher for a space 106 */ 107 public static StrMatcher spaceMatcher() { 108 return SPACE_MATCHER; 109 } 110 111 /** 112 * Matches the same characters as StringTokenizer, 113 * namely space, tab, newline and form feed. 114 * 115 * @return the split matcher 116 */ 117 public static StrMatcher splitMatcher() { 118 return SPLIT_MATCHER; 119 } 120 121 /** 122 * Matches the String trim() whitespace characters. 123 * 124 * @return the trim matcher 125 */ 126 public static StrMatcher trimMatcher() { 127 return TRIM_MATCHER; 128 } 129 130 /** 131 * Returns a matcher which matches the single quote character. 132 * 133 * @return a matcher for a single quote 134 */ 135 public static StrMatcher singleQuoteMatcher() { 136 return SINGLE_QUOTE_MATCHER; 137 } 138 139 /** 140 * Returns a matcher which matches the double quote character. 141 * 142 * @return a matcher for a double quote 143 */ 144 public static StrMatcher doubleQuoteMatcher() { 145 return DOUBLE_QUOTE_MATCHER; 146 } 147 148 /** 149 * Returns a matcher which matches the single or double quote character. 150 * 151 * @return a matcher for a single or double quote 152 */ 153 public static StrMatcher quoteMatcher() { 154 return QUOTE_MATCHER; 155 } 156 157 /** 158 * Matches no characters. 159 * 160 * @return a matcher that matches nothing 161 */ 162 public static StrMatcher noneMatcher() { 163 return NONE_MATCHER; 164 } 165 166 /** 167 * Creates a matcher from a character. 168 * 169 * @param ch the character to match, must not be null 170 * @return a new Matcher for the given char 171 */ 172 public static StrMatcher charMatcher(final char ch) { 173 return new CharMatcher(ch); 174 } 175 176 /** 177 * Creates a matcher from a set of characters. 178 * 179 * @param chars the characters to match, null or empty matches nothing 180 * @return a new matcher for the given char[] 181 */ 182 public static StrMatcher charSetMatcher(final char... chars) { 183 if (chars == null || chars.length == 0) { 184 return NONE_MATCHER; 185 } 186 if (chars.length == 1) { 187 return new CharMatcher(chars[0]); 188 } 189 return new CharSetMatcher(chars); 190 } 191 192 /** 193 * Creates a matcher from a string representing a set of characters. 194 * 195 * @param chars the characters to match, null or empty matches nothing 196 * @return a new Matcher for the given characters 197 */ 198 public static StrMatcher charSetMatcher(final String chars) { 199 if (chars == null || chars.length() == 0) { 200 return NONE_MATCHER; 201 } 202 if (chars.length() == 1) { 203 return new CharMatcher(chars.charAt(0)); 204 } 205 return new CharSetMatcher(chars.toCharArray()); 206 } 207 208 /** 209 * Creates a matcher from a string. 210 * 211 * @param str the string to match, null or empty matches nothing 212 * @return a new Matcher for the given String 213 */ 214 public static StrMatcher stringMatcher(final String str) { 215 if (str == null || str.length() == 0) { 216 return NONE_MATCHER; 217 } 218 return new StringMatcher(str); 219 } 220 221 //----------------------------------------------------------------------- 222 /** 223 * Constructor. 224 */ 225 protected StrMatcher() { 226 super(); 227 } 228 229 /** 230 * Returns the number of matching characters, or zero if there is no match. 231 * <p> 232 * This method is called to check for a match. 233 * The parameter <code>pos</code> represents the current position to be 234 * checked in the string <code>buffer</code> (a character array which must 235 * not be changed). 236 * The API guarantees that <code>pos</code> is a valid index for <code>buffer</code>. 237 * <p> 238 * The character array may be larger than the active area to be matched. 239 * Only values in the buffer between the specified indices may be accessed. 240 * <p> 241 * The matching code may check one character or many. 242 * It may check characters preceding <code>pos</code> as well as those 243 * after, so long as no checks exceed the bounds specified. 244 * <p> 245 * It must return zero for no match, or a positive number if a match was found. 246 * The number indicates the number of characters that matched. 247 * 248 * @param buffer the text content to match against, do not change 249 * @param pos the starting position for the match, valid for buffer 250 * @param bufferStart the first active index in the buffer, valid for buffer 251 * @param bufferEnd the end index (exclusive) of the active buffer, valid for buffer 252 * @return the number of matching characters, or zero if there is no match 253 */ 254 public abstract int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd); 255 256 /** 257 * Returns the number of matching characters, or zero if there is no match. 258 * <p> 259 * This method is called to check for a match. 260 * The parameter <code>pos</code> represents the current position to be 261 * checked in the string <code>buffer</code> (a character array which must 262 * not be changed). 263 * The API guarantees that <code>pos</code> is a valid index for <code>buffer</code>. 264 * <p> 265 * The matching code may check one character or many. 266 * It may check characters preceding <code>pos</code> as well as those after. 267 * <p> 268 * It must return zero for no match, or a positive number if a match was found. 269 * The number indicates the number of characters that matched. 270 * 271 * @param buffer the text content to match against, do not change 272 * @param pos the starting position for the match, valid for buffer 273 * @return the number of matching characters, or zero if there is no match 274 */ 275 public int isMatch(final char[] buffer, final int pos) { 276 return isMatch(buffer, pos, 0, buffer.length); 277 } 278 279 //----------------------------------------------------------------------- 280 /** 281 * Class used to define a set of characters for matching purposes. 282 */ 283 static final class CharSetMatcher extends StrMatcher { 284 /** The set of characters to match. */ 285 private final char[] chars; 286 287 /** 288 * Constructor that creates a matcher from a character array. 289 * 290 * @param chars the characters to match, must not be null 291 */ 292 CharSetMatcher(final char[] chars) { 293 super(); 294 this.chars = chars.clone(); 295 Arrays.sort(this.chars); 296 } 297 298 /** 299 * Returns whether or not the given character matches. 300 * 301 * @param buffer the text content to match against, do not change 302 * @param pos the starting position for the match, valid for buffer 303 * @param bufferStart the first active index in the buffer, valid for buffer 304 * @param bufferEnd the end index of the active buffer, valid for buffer 305 * @return the number of matching characters, or zero if there is no match 306 */ 307 @Override 308 public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) { 309 return Arrays.binarySearch(chars, buffer[pos]) >= 0 ? 1 : 0; 310 } 311 } 312 313 //----------------------------------------------------------------------- 314 /** 315 * Class used to define a character for matching purposes. 316 */ 317 static final class CharMatcher extends StrMatcher { 318 /** The character to match. */ 319 private final char ch; 320 321 /** 322 * Constructor that creates a matcher that matches a single character. 323 * 324 * @param ch the character to match 325 */ 326 CharMatcher(final char ch) { 327 super(); 328 this.ch = ch; 329 } 330 331 /** 332 * Returns whether or not the given character matches. 333 * 334 * @param buffer the text content to match against, do not change 335 * @param pos the starting position for the match, valid for buffer 336 * @param bufferStart the first active index in the buffer, valid for buffer 337 * @param bufferEnd the end index of the active buffer, valid for buffer 338 * @return the number of matching characters, or zero if there is no match 339 */ 340 @Override 341 public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) { 342 return ch == buffer[pos] ? 1 : 0; 343 } 344 } 345 346 //----------------------------------------------------------------------- 347 /** 348 * Class used to define a set of characters for matching purposes. 349 */ 350 static final class StringMatcher extends StrMatcher { 351 /** The string to match, as a character array. */ 352 private final char[] chars; 353 354 /** 355 * Constructor that creates a matcher from a String. 356 * 357 * @param str the string to match, must not be null 358 */ 359 StringMatcher(final String str) { 360 super(); 361 chars = str.toCharArray(); 362 } 363 364 /** 365 * Returns whether or not the given text matches the stored string. 366 * 367 * @param buffer the text content to match against, do not change 368 * @param pos the starting position for the match, valid for buffer 369 * @param bufferStart the first active index in the buffer, valid for buffer 370 * @param bufferEnd the end index of the active buffer, valid for buffer 371 * @return the number of matching characters, or zero if there is no match 372 */ 373 @Override 374 public int isMatch(final char[] buffer, int pos, final int bufferStart, final int bufferEnd) { 375 final int len = chars.length; 376 if (pos + len > bufferEnd) { 377 return 0; 378 } 379 for (int i = 0; i < chars.length; i++, pos++) { 380 if (chars[i] != buffer[pos]) { 381 return 0; 382 } 383 } 384 return len; 385 } 386 387 @Override 388 public String toString() { 389 return super.toString() + ' ' + Arrays.toString(chars); 390 } 391 392 } 393 394 //----------------------------------------------------------------------- 395 /** 396 * Class used to match no characters. 397 */ 398 static final class NoMatcher extends StrMatcher { 399 400 /** 401 * Constructs a new instance of <code>NoMatcher</code>. 402 */ 403 NoMatcher() { 404 super(); 405 } 406 407 /** 408 * Always returns <code>false</code>. 409 * 410 * @param buffer the text content to match against, do not change 411 * @param pos the starting position for the match, valid for buffer 412 * @param bufferStart the first active index in the buffer, valid for buffer 413 * @param bufferEnd the end index of the active buffer, valid for buffer 414 * @return the number of matching characters, or zero if there is no match 415 */ 416 @Override 417 public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) { 418 return 0; 419 } 420 } 421 422 //----------------------------------------------------------------------- 423 /** 424 * Class used to match whitespace as per trim(). 425 */ 426 static final class TrimMatcher extends StrMatcher { 427 428 /** 429 * Constructs a new instance of <code>TrimMatcher</code>. 430 */ 431 TrimMatcher() { 432 super(); 433 } 434 435 /** 436 * Returns whether or not the given character matches. 437 * 438 * @param buffer the text content to match against, do not change 439 * @param pos the starting position for the match, valid for buffer 440 * @param bufferStart the first active index in the buffer, valid for buffer 441 * @param bufferEnd the end index of the active buffer, valid for buffer 442 * @return the number of matching characters, or zero if there is no match 443 */ 444 @Override 445 public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) { 446 return buffer[pos] <= 32 ? 1 : 0; 447 } 448 } 449 450}