Friday, May 17, 2013

Pattern matching.

How to find the phone numbers in 50,000 txt file and replace.

The phone numbers could be in several formats like below

"***-*******"
"**********"
"*** *******"
"***-***-****"


final String regex = "[\\s](\\({0,1}\\d{3}\\){0,1}" +
"[- \\.]\\d{3}[- \\.]\\d{4})|" +
"(\\+\\d{2}-\\d{2,4}-\\d{3,4}-\\d{3,4})";
final Pattern phonePattern = Pattern.compile(regex);

/* The result set */
Set<File> files = new HashSet<File>();

File dir = new File("/initDirPath");
if (!dir.isDirectory()) return;

for (File file : dir.listFiles()) {
if (file.isDirectory()) continue;

BufferedReader reader = new BufferedReader(new FileReader(file));

String line;
boolean found = false;
while ((line = reader.readLine()) != null
&& !found) {

Matcher matcher = phonePattern.matcher(line);
if (found = matcher.find()) {
matcher.replaceAll("xxxxxxxxxxx");
}
}
}

for (File file : files) {
System.out.println(file.getAbsolutePath());
}

No comments: