2007-06-19
URL中汉字和utf编码转换
java 代码
- //转换为%E4%BD%A0形式
- public static String toUtf8String(String s) {
- StringBuffer sb = new StringBuffer();
- for (int i = 0; i < s.length(); i++) {
- char c = s.charAt(i);
- if (c >= 0 && c <= 255) {
- sb.append(c);
- } else {
- byte[] b;
- try {
- b = String.valueOf(c).getBytes("utf-8");
- } catch (Exception ex) {
- System.out.println(ex);
- b = new byte[0];
- }
- for (int j = 0; j < b.length; j++) {
- int k = b[j];
- if (k < 0)
- k += 256;
- sb.append("%" + Integer.toHexString(k).toUpperCase());
- }
- }
- }
- return sb.toString();
- }
- //将%E4%BD%A0转换为汉字
- public static String unescape(String s) {
- StringBuffer sbuf = new StringBuffer () ;
- int l = s.length() ;
- int ch = -1 ;
- int b, sumb = 0;
- for (int i = 0, more = -1 ; i < l ; i++) {
- /* Get next byte b from URL segment s */
- switch (ch = s.charAt(i)) {
- case '%':
- ch = s.charAt (++i) ;
- int hb = (Character.isDigit ((char) ch)
- ? ch - '0'
- : 10+Character.toLowerCase((char) ch) - 'a') & 0xF ;
- ch = s.charAt (++i) ;
- int lb = (Character.isDigit ((char) ch)
- ? ch - '0'
- : 10+Character.toLowerCase ((char) ch)-'a') & 0xF ;
- b = (hb << 4) | lb ;
- break ;
- case '+':
- b = ' ' ;
- break ;
- default:
- b = ch ;
- }
- /* Decode byte b as UTF-8, sumb collects incomplete chars */
- if ((b & 0xc0) == 0x80) { // 10xxxxxx (continuation byte)
- sumb = (sumb << 6) | (b & 0x3f) ; // Add 6 bits to sumb
- if (--more == 0) sbuf.append((char) sumb) ; // Add char to sbuf
- } else if ((b & 0x80) == 0x00) { // 0xxxxxxx (yields 7 bits)
- sbuf.append((char) b) ; // Store in sbuf
- } else if ((b & 0xe0) == 0xc0) { // 110xxxxx (yields 5 bits)
- sumb = b & 0x1f;
- more = 1; // Expect 1 more byte
- } else if ((b & 0xf0) == 0xe0) { // 1110xxxx (yields 4 bits)
- sumb = b & 0x0f;
- more = 2; // Expect 2 more bytes
- } else if ((b & 0xf8) == 0xf0) { // 11110xxx (yields 3 bits)
- sumb = b & 0x07;
- more = 3; // Expect 3 more bytes
- } else if ((b & 0xfc) == 0xf8) { // 111110xx (yields 2 bits)
- sumb = b & 0x03;
- more = 4; // Expect 4 more bytes
- } else /*if ((b & 0xfe) == 0xfc)*/ { // 1111110x (yields 1 bit)
- sumb = b & 0x01;
- more = 5; // Expect 5 more bytes
- }
- /* We don't test if the UTF-8 encoding is well-formed */
- }
- return sbuf.toString() ;
- }
发表评论
- 浏览: 15650 次
- 性别:

- 来自: 北京

- 详细资料
搜索本博客
最近加入圈子
链接
最新评论
-
IBM MQ API以及其他的资料
狂顶你.
-- by pooslife -
websphere 集群问题
robbin 写道这么容易求证的问题,你怎么不自己动手试试看呢?我试了啊。是不行 ...
-- by shikonglaike -
websphere 集群问题
这么容易求证的问题,你怎么不自己动手试试看呢?
-- by robbin -
职场心得
very good
-- by tt -
IBM MQ API以及其他的资料
thanks,i need this!
-- by archt






评论排行榜