Friday, March 14, 2014

Java regular expressions sample code

This is sample code of how to do regular expression's pattern matching in Java. I used it to get the thumbnails, image files and captions from this webpage - Click Here. It uses Apache Commons IO to read the file and the rest you can figure out. It was pretty easy when I finally figured out how to use the Matcher class.

The output of this program is the html source for this blog post Pictures of North American Inuit peoples life style

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.io.FileUtils;

/**
 * @author ranjit sandhu
 * @date Fri, Mar 14, 2014  4:02:04 PM
 */
public class htmlParser {

 public static void main(String[] args) throws IOException {
  File ff = new File("c:\\ranjit\\code\\java_read_file.txt");
  String input = FileUtils.readFileToString(ff); 
  
  Pattern pat = Pattern.compile("http://s.imwx.com/dru/2014/02/.+_980x551.jpg");
  Matcher mat = pat.matcher(input);  
  printMat(mat);
  ArrayList bigImages = retMatches(mat);
  
  pat = Pattern.compile("http://s.imwx.com/dru/2014/02/.+_85x64.jpg");
  mat = pat.matcher(input);
  printMat(mat);
  ArrayList smallImages = retMatches(mat);
  
  pat = Pattern.compile("caption\":\".+\\(");
  mat = pat.matcher(input);
  printMat(mat);
  ArrayList captions = retMatches(mat);
  
  StringBuilder sb = new StringBuilder();
  for (int i=0; i < smallImages.size(); i++) {
   sb.append("<img src=\"").append(smallImages.get(i)).append("\" class=\"smallImage\" onClick=\"jump(")
   .append(i).append(");\">  ");
  }
  sb.append("<br><br>");
  String caption = new String();
  for (int i=0; i < captions.size(); i++) {
   caption = (String)captions.get(i);
   caption = caption.substring(10,caption.length()-2);
   sb.append("<div class=\"caption\" id=\"").append(i).append("\">").append(caption).append("</div>");
   sb.append("<img src=\"").append(bigImages.get(i)).append("\" class=\"bigImage\"").
   append(">  <a href='#top'>Back to Top</a><br><br>");
  }
  System.out.println(sb);
 } // end main
 
 public static void printMat(Matcher mat) {
  int index = 0;
  int matchNumber = 0;
  while (mat.find(index)) {
   matchNumber++;
   System.out.println("match number: " + matchNumber);
   System.out.println("match start/end: " + mat.start() + "," + mat.end());
   System.out.println("match value: " + mat.group());
   index = mat.end();
  }
 } // end printMat
 
 public static ArrayList retMatches(Matcher mat) {
  ArrayList ar = new ArrayList();
  int index = 0;
  while (mat.find(index)) {
   ar.add(mat.group());
   index = mat.end();
  }
  return ar;
 } // end retMatches
} // end class

No comments:

Post a Comment