The output of this program is the html source for this blog post Pictures of North American Inuit peoples life style
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.io.FileUtils;
/**
* @author ranjit sandhu
* @date Fri, Mar 14, 2014 4:02:04 PM
*/
public class htmlParser {
public static void main(String[] args) throws IOException {
File ff = new File("c:\\ranjit\\code\\java_read_file.txt");
String input = FileUtils.readFileToString(ff);
Pattern pat = Pattern.compile("http://s.imwx.com/dru/2014/02/.+_980x551.jpg");
Matcher mat = pat.matcher(input);
printMat(mat);
ArrayList bigImages = retMatches(mat);
pat = Pattern.compile("http://s.imwx.com/dru/2014/02/.+_85x64.jpg");
mat = pat.matcher(input);
printMat(mat);
ArrayList smallImages = retMatches(mat);
pat = Pattern.compile("caption\":\".+\\(");
mat = pat.matcher(input);
printMat(mat);
ArrayList captions = retMatches(mat);
StringBuilder sb = new StringBuilder();
for (int i=0; i < smallImages.size(); i++) {
sb.append("<img src=\"").append(smallImages.get(i)).append("\" class=\"smallImage\" onClick=\"jump(")
.append(i).append(");\"> ");
}
sb.append("<br><br>");
String caption = new String();
for (int i=0; i < captions.size(); i++) {
caption = (String)captions.get(i);
caption = caption.substring(10,caption.length()-2);
sb.append("<div class=\"caption\" id=\"").append(i).append("\">").append(caption).append("</div>");
sb.append("<img src=\"").append(bigImages.get(i)).append("\" class=\"bigImage\"").
append("> <a href='#top'>Back to Top</a><br><br>");
}
System.out.println(sb);
} // end main
public static void printMat(Matcher mat) {
int index = 0;
int matchNumber = 0;
while (mat.find(index)) {
matchNumber++;
System.out.println("match number: " + matchNumber);
System.out.println("match start/end: " + mat.start() + "," + mat.end());
System.out.println("match value: " + mat.group());
index = mat.end();
}
} // end printMat
public static ArrayList retMatches(Matcher mat) {
ArrayList ar = new ArrayList();
int index = 0;
while (mat.find(index)) {
ar.add(mat.group());
index = mat.end();
}
return ar;
} // end retMatches
} // end class
No comments:
Post a Comment