001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017 package org.apache.commons.lang;
018
019 import org.apache.commons.lang.text.StrBuilder;
020
021 /**
022 * <p>Operations on <code>CharSet</code>s.</p>
023 *
024 * <p>This class handles <code>null</code> input gracefully.
025 * An exception will not be thrown for a <code>null</code> input.
026 * Each method documents its behaviour in more detail.</p>
027 *
028 * <p>#ThreadSafe#</p>
029 * @see CharSet
030 * @author Apache Software Foundation
031 * @author Phil Steitz
032 * @author Gary Gregory
033 * @since 1.0
034 * @version $Id: CharSetUtils.java 1057072 2011-01-10 01:55:57Z niallp $
035 */
036 public class CharSetUtils {
037
038 /**
039 * <p>CharSetUtils instances should NOT be constructed in standard programming.
040 * Instead, the class should be used as <code>CharSetUtils.evaluateSet(null);</code>.</p>
041 *
042 * <p>This constructor is public to permit tools that require a JavaBean instance
043 * to operate.</p>
044 */
045 public CharSetUtils() {
046 super();
047 }
048
049 // Factory
050 //-----------------------------------------------------------------------
051 /**
052 * <p>Creates a <code>CharSet</code> instance which allows a certain amount of
053 * set logic to be performed.</p>
054 * <p>The syntax is:</p>
055 * <ul>
056 * <li>"aeio" which implies 'a','e',..</li>
057 * <li>"^e" implies not e.</li>
058 * <li>"ej-m" implies e,j->m. e,j,k,l,m.</li>
059 * </ul>
060 *
061 * <pre>
062 * CharSetUtils.evaluateSet(null) = null
063 * CharSetUtils.evaluateSet([]) = CharSet matching nothing
064 * CharSetUtils.evaluateSet(["a-e"]) = CharSet matching a,b,c,d,e
065 * </pre>
066 *
067 * @param set the set, may be null
068 * @return a CharSet instance, <code>null</code> if null input
069 * @deprecated Use {@link CharSet#getInstance(String[])}.
070 * Method will be removed in Commons Lang 3.0.
071 */
072 public static CharSet evaluateSet(String[] set) {
073 if (set == null) {
074 return null;
075 }
076 return new CharSet(set);
077 }
078
079 // Squeeze
080 //-----------------------------------------------------------------------
081 /**
082 * <p>Squeezes any repetitions of a character that is mentioned in the
083 * supplied set.</p>
084 *
085 * <pre>
086 * CharSetUtils.squeeze(null, *) = null
087 * CharSetUtils.squeeze("", *) = ""
088 * CharSetUtils.squeeze(*, null) = *
089 * CharSetUtils.squeeze(*, "") = *
090 * CharSetUtils.squeeze("hello", "k-p") = "helo"
091 * CharSetUtils.squeeze("hello", "a-e") = "hello"
092 * </pre>
093 *
094 * @see CharSet#getInstance(java.lang.String) for set-syntax.
095 * @param str the string to squeeze, may be null
096 * @param set the character set to use for manipulation, may be null
097 * @return modified String, <code>null</code> if null string input
098 */
099 public static String squeeze(String str, String set) {
100 if (StringUtils.isEmpty(str) || StringUtils.isEmpty(set)) {
101 return str;
102 }
103 String[] strs = new String[1];
104 strs[0] = set;
105 return squeeze(str, strs);
106 }
107
108 /**
109 * <p>Squeezes any repetitions of a character that is mentioned in the
110 * supplied set.</p>
111 *
112 * <p>An example is:</p>
113 * <ul>
114 * <li>squeeze("hello", {"el"}) => "helo"</li>
115 * </ul>
116 *
117 * @see CharSet#getInstance(java.lang.String) for set-syntax.
118 * @param str the string to squeeze, may be null
119 * @param set the character set to use for manipulation, may be null
120 * @return modified String, <code>null</code> if null string input
121 */
122 public static String squeeze(String str, String[] set) {
123 if (StringUtils.isEmpty(str) || ArrayUtils.isEmpty(set)) {
124 return str;
125 }
126 CharSet chars = CharSet.getInstance(set);
127 StrBuilder buffer = new StrBuilder(str.length());
128 char[] chrs = str.toCharArray();
129 int sz = chrs.length;
130 char lastChar = ' ';
131 char ch = ' ';
132 for (int i = 0; i < sz; i++) {
133 ch = chrs[i];
134 if (chars.contains(ch)) {
135 if ((ch == lastChar) && (i != 0)) {
136 continue;
137 }
138 }
139 buffer.append(ch);
140 lastChar = ch;
141 }
142 return buffer.toString();
143 }
144
145 // Count
146 //-----------------------------------------------------------------------
147 /**
148 * <p>Takes an argument in set-syntax, see evaluateSet,
149 * and returns the number of characters present in the specified string.</p>
150 *
151 * <pre>
152 * CharSetUtils.count(null, *) = 0
153 * CharSetUtils.count("", *) = 0
154 * CharSetUtils.count(*, null) = 0
155 * CharSetUtils.count(*, "") = 0
156 * CharSetUtils.count("hello", "k-p") = 3
157 * CharSetUtils.count("hello", "a-e") = 1
158 * </pre>
159 *
160 * @see CharSet#getInstance(java.lang.String) for set-syntax.
161 * @param str String to count characters in, may be null
162 * @param set String set of characters to count, may be null
163 * @return character count, zero if null string input
164 */
165 public static int count(String str, String set) {
166 if (StringUtils.isEmpty(str) || StringUtils.isEmpty(set)) {
167 return 0;
168 }
169 String[] strs = new String[1];
170 strs[0] = set;
171 return count(str, strs);
172 }
173
174 /**
175 * <p>Takes an argument in set-syntax, see evaluateSet,
176 * and returns the number of characters present in the specified string.</p>
177 *
178 * <p>An example would be:</p>
179 * <ul>
180 * <li>count("hello", {"c-f", "o"}) returns 2.</li>
181 * </ul>
182 *
183 * @see CharSet#getInstance(java.lang.String) for set-syntax.
184 * @param str String to count characters in, may be null
185 * @param set String[] set of characters to count, may be null
186 * @return character count, zero if null string input
187 */
188 public static int count(String str, String[] set) {
189 if (StringUtils.isEmpty(str) || ArrayUtils.isEmpty(set)) {
190 return 0;
191 }
192 CharSet chars = CharSet.getInstance(set);
193 int count = 0;
194 char[] chrs = str.toCharArray();
195 int sz = chrs.length;
196 for(int i=0; i<sz; i++) {
197 if(chars.contains(chrs[i])) {
198 count++;
199 }
200 }
201 return count;
202 }
203
204 // Keep
205 //-----------------------------------------------------------------------
206 /**
207 * <p>Takes an argument in set-syntax, see evaluateSet,
208 * and keeps any of characters present in the specified string.</p>
209 *
210 * <pre>
211 * CharSetUtils.keep(null, *) = null
212 * CharSetUtils.keep("", *) = ""
213 * CharSetUtils.keep(*, null) = ""
214 * CharSetUtils.keep(*, "") = ""
215 * CharSetUtils.keep("hello", "hl") = "hll"
216 * CharSetUtils.keep("hello", "le") = "ell"
217 * </pre>
218 *
219 * @see CharSet#getInstance(java.lang.String) for set-syntax.
220 * @param str String to keep characters from, may be null
221 * @param set String set of characters to keep, may be null
222 * @return modified String, <code>null</code> if null string input
223 * @since 2.0
224 */
225 public static String keep(String str, String set) {
226 if (str == null) {
227 return null;
228 }
229 if (str.length() == 0 || StringUtils.isEmpty(set)) {
230 return "";
231 }
232 String[] strs = new String[1];
233 strs[0] = set;
234 return keep(str, strs);
235 }
236
237 /**
238 * <p>Takes an argument in set-syntax, see evaluateSet,
239 * and keeps any of characters present in the specified string.</p>
240 *
241 * <p>An example would be:</p>
242 * <ul>
243 * <li>keep("hello", {"c-f", "o"})
244 * returns "eo"</li>
245 * </ul>
246 *
247 * @see CharSet#getInstance(java.lang.String) for set-syntax.
248 * @param str String to keep characters from, may be null
249 * @param set String[] set of characters to keep, may be null
250 * @return modified String, <code>null</code> if null string input
251 * @since 2.0
252 */
253 public static String keep(String str, String[] set) {
254 if (str == null) {
255 return null;
256 }
257 if (str.length() == 0 || ArrayUtils.isEmpty(set)) {
258 return "";
259 }
260 return modify(str, set, true);
261 }
262
263 // Delete
264 //-----------------------------------------------------------------------
265 /**
266 * <p>Takes an argument in set-syntax, see evaluateSet,
267 * and deletes any of characters present in the specified string.</p>
268 *
269 * <pre>
270 * CharSetUtils.delete(null, *) = null
271 * CharSetUtils.delete("", *) = ""
272 * CharSetUtils.delete(*, null) = *
273 * CharSetUtils.delete(*, "") = *
274 * CharSetUtils.delete("hello", "hl") = "eo"
275 * CharSetUtils.delete("hello", "le") = "ho"
276 * </pre>
277 *
278 * @see CharSet#getInstance(java.lang.String) for set-syntax.
279 * @param str String to delete characters from, may be null
280 * @param set String set of characters to delete, may be null
281 * @return modified String, <code>null</code> if null string input
282 */
283 public static String delete(String str, String set) {
284 if (StringUtils.isEmpty(str) || StringUtils.isEmpty(set)) {
285 return str;
286 }
287 String[] strs = new String[1];
288 strs[0] = set;
289 return delete(str, strs);
290 }
291
292 /**
293 * <p>Takes an argument in set-syntax, see evaluateSet,
294 * and deletes any of characters present in the specified string.</p>
295 *
296 * <p>An example would be:</p>
297 * <ul>
298 * <li>delete("hello", {"c-f", "o"}) returns
299 * "hll"</li>
300 * </ul>
301 *
302 * @see CharSet#getInstance(java.lang.String) for set-syntax.
303 * @param str String to delete characters from, may be null
304 * @param set String[] set of characters to delete, may be null
305 * @return modified String, <code>null</code> if null string input
306 */
307 public static String delete(String str, String[] set) {
308 if (StringUtils.isEmpty(str) || ArrayUtils.isEmpty(set)) {
309 return str;
310 }
311 return modify(str, set, false);
312 }
313
314 //-----------------------------------------------------------------------
315 /**
316 * Implementation of delete and keep
317 *
318 * @param str String to modify characters within
319 * @param set String[] set of characters to modify
320 * @param expect whether to evaluate on match, or non-match
321 * @return modified String
322 */
323 private static String modify(String str, String[] set, boolean expect) {
324 CharSet chars = CharSet.getInstance(set);
325 StrBuilder buffer = new StrBuilder(str.length());
326 char[] chrs = str.toCharArray();
327 int sz = chrs.length;
328 for(int i=0; i<sz; i++) {
329 if(chars.contains(chrs[i]) == expect) {
330 buffer.append(chrs[i]);
331 }
332 }
333 return buffer.toString();
334 }
335
336 // Translate
337 //-----------------------------------------------------------------------
338 /**
339 * <p>Translate characters in a String.
340 * This is a multi character search and replace routine.</p>
341 *
342 * <p>An example is:</p>
343 * <ul>
344 * <li>translate("hello", "ho", "jy")
345 * => jelly</li>
346 * </ul>
347 *
348 * <p>If the length of characters to search for is greater than the
349 * length of characters to replace, then the last character is
350 * used.</p>
351 *
352 * <pre>
353 * CharSetUtils.translate(null, *, *) = null
354 * CharSetUtils.translate("", *, *) = ""
355 * </pre>
356 *
357 * @param str String to replace characters in, may be null
358 * @param searchChars a set of characters to search for, must not be null
359 * @param replaceChars a set of characters to replace, must not be null or empty ("")
360 * @return translated String, <code>null</code> if null string input
361 * @throws NullPointerException if <code>searchChars</code> or <code>replaceChars</code>
362 * is <code>null</code>
363 * @throws ArrayIndexOutOfBoundsException if <code>replaceChars</code> is empty ("")
364 * @deprecated Use {@link StringUtils#replaceChars(String, String, String)}.
365 * Method will be removed in Commons Lang 3.0.
366 * NOTE: StringUtils#replaceChars behaves differently when 'searchChars' is longer
367 * than 'replaceChars'. CharSetUtils#translate will use the last char of the replacement
368 * string whereas StringUtils#replaceChars will delete
369 */
370 public static String translate(String str, String searchChars, String replaceChars) {
371 if (StringUtils.isEmpty(str)) {
372 return str;
373 }
374 StrBuilder buffer = new StrBuilder(str.length());
375 char[] chrs = str.toCharArray();
376 char[] withChrs = replaceChars.toCharArray();
377 int sz = chrs.length;
378 int withMax = replaceChars.length() - 1;
379 for(int i=0; i<sz; i++) {
380 int idx = searchChars.indexOf(chrs[i]);
381 if(idx != -1) {
382 if(idx > withMax) {
383 idx = withMax;
384 }
385 buffer.append(withChrs[idx]);
386 } else {
387 buffer.append(chrs[i]);
388 }
389 }
390 return buffer.toString();
391 }
392
393 }