原创

Java 存储mysql数据库时如何进行Emoji表情转换和处理

当保存字符串到数据库里出现\xF0\x9F\x92\x94类似问题时,发现竟是因为输入了Emoji表情的原因,由于我的mysql数据库是utf8字符集,而且Emoji表情等特殊符号要占四个字节,所以导致数据库不能正常存入。
找了很多方法,都没有从根源上解决问题,很是头疼。最后还是发现github上有个很牛逼的轻量级开源工具叫emoji-java,通过这个工具类基本上解决了我大部分的问题,同时配合前端限制Emoji表情输入,才算把问题解决。github地址:https://github.com/vdurmont/emoji-java。
下面附上完整的java解决Emoji表情工具类:

package com.im.app.api.util;

import com.github.binarywang.java.emoji.EmojiConverter;
import com.vdurmont.emoji.EmojiParser;
import org.apache.commons.lang3.StringUtils;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * @Author: guo
 * @Description: ${description}
 * @Date: 2019/4/18 18:48
 * @Version: 1.0
 */
public class EmojiFilterUtil {
    private static EmojiConverter emojiConverter = EmojiConverter.getInstance();

    /**
     * 判断字符串中是否含有表情
     * @param source
     * @return
    */
    public static boolean containsEmoji(String source) {
        int len = source.length();
        boolean isEmoji = false;
        for (int i = 0; i < len; i++) {
            char hs = source.charAt(i);
            if (0xd800 <= hs && hs <= 0xdbff) {
                if (source.length() > 1) {
                    char ls = source.charAt(i + 1);
                    int uc = ((hs - 0xd800) * 0x400) + (ls - 0xdc00) + 0x10000;
                    if (0x1d000 <= uc && uc <= 0x1f77f) {
                        return true;
                    }
                }
            } else {
                // non surrogate
                if (0x2100 <= hs && hs <= 0x27ff && hs != 0x263b) {
                    return true;
                } else if (0x2B05 <= hs && hs <= 0x2b07) {
                    return true;
                } else if (0x2934 <= hs && hs <= 0x2935) {
                    return true;
                } else if (0x3297 <= hs && hs <= 0x3299) {
                    return true;
                } else if (hs == 0xa9 || hs == 0xae || hs == 0x303d
                        || hs == 0x3030 || hs == 0x2b55 || hs == 0x2b1c
                        || hs == 0x2b1b || hs == 0x2b50 || hs == 0x231a) {
                    return true;
                }
                if (!isEmoji && source.length() > 1 && i < source.length() - 1) {
                    char ls = source.charAt(i + 1);
                    if (ls == 0x20e3) {
                        return true;
                    }
                }
            }
        }
        return isEmoji;
    }

    /**
     * 判断某个字符是不是表情
     * @param codePoint
     * @return
     */
    private static boolean isEmojiCharacter(char codePoint) {
        return (codePoint == 0x0) || (codePoint == 0x9) || (codePoint == 0xA)
                || (codePoint == 0xD)
                || ((codePoint >= 0x20) && (codePoint <= 0xD7FF))
                || ((codePoint >= 0xE000) && (codePoint <= 0xFFFD))
                || ((codePoint >= 0x10000) && (codePoint <= 0x10FFFF));
    }

    /**
     * 过滤emoji 或者 其他非文字类型的字符
     *
     * @param source
     * @return
     */
    public static String filterEmoji(String source) {
        if (StringUtils.isBlank(source)) {
            return source;
        }
        StringBuilder buf = null;
        int len = source.length();
        for (int i = 0; i < len; i++) {
            char codePoint = source.charAt(i);
            if (isEmojiCharacter(codePoint)) {
                if (buf == null) {
                    buf = new StringBuilder(source.length());
                }
                buf.append(codePoint);
            }
        }
        if (buf == null) {
            return source;
        } else {
            if (buf.length() == len) {
                buf = null;
                return source;
            } else {
                return buf.toString();
            }
        }
    }

    /**
     * 判断字符串中是否含有表情
     * @param source
     * @return
     */
    public static boolean hasEmoji(String source){
        if (containsEmoji(source)){
            return true;
        }
        source = StringUtils.isEmpty(source) ? "" : source.trim();
        String noEmojiStr = EmojiParser.removeAllEmojis(source);//移除所有的表情之后的字符串8
        if (noEmojiStr.length()<source.length()){
            return true;
        }
        return false;
    }
    /**
     * 将emojiStr转为 带有表情的字符
     * @param emojiStr
     * @return
     */
    public static String emojiConverterUnicodeStr(String emojiStr){
        String result = emojiConverter.toUnicode(emojiStr);
        return result;
    }

    /**
     * 带有表情的字符串转换为编码
     * @param str
     * @return
     */
    public static String emojiConverterToAlias(String str){
        String result=emojiConverter.toAlias(str);
        return result;
    }
    public static void main(String[] args) {
        String string = "✌";
        System.out.println(containsEmoji(string));
        System.out.println(filterEmoji(string));
        System.out.println(hasEmoji(string));
        System.out.println(emojiConverterToAlias(string));
        System.out.println(emojiConverterUnicodeStr(string));

    }
}

其中pom.xml文件如下:

        <!-- 后加的关于过滤表情的,可以去掉 -->
        <dependency>
            <groupId>com.github.binarywang</groupId>
            <artifactId>java-emoji-converter</artifactId>
            <version>0.1.1</version>
        </dependency>
正文到此结束
该篇文章的评论功能已被站长关闭
本文目录