The output of this program is the html source for this blog post Pictures of North American Inuit peoples life style
import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.commons.io.FileUtils; /** * @author ranjit sandhu * @date Fri, Mar 14, 2014 4:02:04 PM */ public class htmlParser { public static void main(String[] args) throws IOException { File ff = new File("c:\\ranjit\\code\\java_read_file.txt"); String input = FileUtils.readFileToString(ff); Pattern pat = Pattern.compile("http://s.imwx.com/dru/2014/02/.+_980x551.jpg"); Matcher mat = pat.matcher(input); printMat(mat); ArrayList bigImages = retMatches(mat); pat = Pattern.compile("http://s.imwx.com/dru/2014/02/.+_85x64.jpg"); mat = pat.matcher(input); printMat(mat); ArrayList smallImages = retMatches(mat); pat = Pattern.compile("caption\":\".+\\("); mat = pat.matcher(input); printMat(mat); ArrayList captions = retMatches(mat); StringBuilder sb = new StringBuilder(); for (int i=0; i < smallImages.size(); i++) { sb.append("<img src=\"").append(smallImages.get(i)).append("\" class=\"smallImage\" onClick=\"jump(") .append(i).append(");\"> "); } sb.append("<br><br>"); String caption = new String(); for (int i=0; i < captions.size(); i++) { caption = (String)captions.get(i); caption = caption.substring(10,caption.length()-2); sb.append("<div class=\"caption\" id=\"").append(i).append("\">").append(caption).append("</div>"); sb.append("<img src=\"").append(bigImages.get(i)).append("\" class=\"bigImage\""). append("> <a href='#top'>Back to Top</a><br><br>"); } System.out.println(sb); } // end main public static void printMat(Matcher mat) { int index = 0; int matchNumber = 0; while (mat.find(index)) { matchNumber++; System.out.println("match number: " + matchNumber); System.out.println("match start/end: " + mat.start() + "," + mat.end()); System.out.println("match value: " + mat.group()); index = mat.end(); } } // end printMat public static ArrayList retMatches(Matcher mat) { ArrayList ar = new ArrayList(); int index = 0; while (mat.find(index)) { ar.add(mat.group()); index = mat.end(); } return ar; } // end retMatches } // end class
No comments:
Post a Comment