Java regex filter for HTML img tag
November 11, 2011 10:05:22 Last update: November 11, 2011 10:12:01
This is an HTML image tag filter using Java regex. It takes a string, finds the
Test file:
img tags, replaces the src attribute with one provided by the filter, then adds a class name to the class attribute.
import java.util.regex.*; import java.io.*; public class ImgTagFilter { private static final Pattern IMG_PATTERN = Pattern.compile( "<img(\\s+.*?)(?:src\\s*=\\s*(?:'|\")(.*?)(?:'|\"))(.*?)/>", Pattern.DOTALL|Pattern.CASE_INSENSITIVE); private static final Pattern CLASS_PATTERN = Pattern.compile( "class\\s*=\\s*(?:'|\")(.*?)(?:'|\")", Pattern.DOTALL|Pattern.CASE_INSENSITIVE); public static void main(String[] args) throws Exception { ImgTagFilter filter = new ImgTagFilter(); String str = filter.getFileContents("img-parse-test.html"); System.out.println(filter.rewriteImgTag(str)); } public String rewriteImgTag(String str) { StringBuilder sb = new StringBuilder(); Matcher m = IMG_PATTERN.matcher(str); int start = 0, end = 0; while (m.find()) { // find next match end = m.start(); sb.append(str.substring(start, end)); sb.append("<img"); boolean classExists = false; for (int i = 1; i < m.groupCount() + 1; i++) { if (i == 2) { // image src sb.append(" src=\"MY_IMAGE_SRC_REPLACEMENT\""); continue; } Matcher mc = CLASS_PATTERN.matcher(m.group(i)); if (mc.find()) { classExists = true; sb.append(m.group(i).substring(0, mc.start())); sb.append(" class=\"" + mc.group(1) + " MY_CLASS\""); sb.append(m.group(i).substring(mc.end())); } else { sb.append(m.group(i)); } } if (!classExists) { sb.append(" class=\"MY_CLASS\""); } start = m.end(); } sb.append(str.substring(start, str.length())); return sb.toString(); } String getFileContents(String fileName) throws Exception { File theFile = new File(fileName); byte[] bytes = new byte[(int) theFile.length()]; InputStream in = new FileInputStream(theFile); int m = 0, n = 0; while (m < bytes.length) { n = in.read(bytes, m, bytes.length - m); m += n; } in.close(); return new String(bytes); } }
Test file:
<div id="HTML snippet"> <img src="img/big/txt-marquee-shop-online.png" class="slider-1-2-3" alt="Simplify your online experience with 3-click checkout!" /> <p>somthignkg</p> <ul> <li>List 1</li> <li>List 2</li> <li>List 3</li> </ul> <img class="my class" src="images/theImage.gif" alt="Title"/> <img style="border: #ddd 1px solid; color: #aaa;" src="http://www.ags.com/img/myImage.png" alt="another title"/> trailer </div>