提示:文章写完后,目录可以自动生成,如何生成可参考右边的帮助文档
文章目录
- 背景
- 具体实现
背景
为了更准确地分析用户下单行为的来源渠道,并实现精细化运营与风险控制,我们希望在用户下单时,能够通过请求中的 User-Agent(UA)信息 提取其使用的客户端 App 的包名(Package Name)
具体实现
import com.ejoined.commons.plugin.utils.StringUtils;
import java.net.URLDecoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* UserAgent包名识别工具
*/
public class ComprehensiveUserAgentParserUtil {
// 应用分类枚举
private enum AppCategory {
SOCIAL, ECOMMERCE, FOOD_DELIVERY, VIDEO, UTILITY
}
// 预定义应用库(关键词 -> 应用信息)
private static final Map<String, AppInfo> PREDEFINED_APP_LIBRARY = new HashMap<>();
// 浏览器包名集合(用于误匹配过滤)
private static final Set<String> BROWSER_PACKAGE_SET = new HashSet<>(Arrays.asList(
"com.android.chrome", "com.UCMobile", "com.tencent.mtt", "com.apple.mobilesafari"
));
static {
// 社交类应用注册(支持中英文关键词)
registerApp("微信", "MicroMessenger", "com.tencent.mm", AppCategory.SOCIAL);
registerApp("微信", "微信", "com.tencent.mm", AppCategory.SOCIAL);
registerApp("微信", "WeChat", "com.tencent.mm", AppCategory.SOCIAL);
registerApp("QQ", "QQ", "com.tencent.mobileqq", AppCategory.SOCIAL);
registerApp("微博", "Weibo", "com.sina.weibo", AppCategory.SOCIAL);
registerApp("快手", "Kwai", "com.kuaishou.nebula", AppCategory.SOCIAL);
registerApp("快手极速版", "KwaiLite", "com.kuaishou.nebula.lite", AppCategory.SOCIAL);
registerApp("小红书", "xingin", "com.xingin.xhs", AppCategory.SOCIAL);
registerApp("知乎", "Zhihu", "com.zhihu.android", AppCategory.SOCIAL);
// 电商类应用注册
registerApp("淘宝", "AliApp(TB", "com.taobao.taobao", AppCategory.ECOMMERCE);
registerApp("天猫", "AliApp(TM", "com.tmall.wireless", AppCategory.ECOMMERCE);
registerApp("京东", "JD4iPhone", "com.jingdong.app.mall", AppCategory.ECOMMERCE);
registerApp("拼多多", "pinduoduo", "com.xunmeng.pinduoduo", AppCategory.ECOMMERCE);
registerApp("亚马逊", "Amazon", "com.amazon.mShop.android", AppCategory.ECOMMERCE);
registerApp("唯品会", "vipshop", "com.achievo.vipshop", AppCategory.ECOMMERCE);
// 外卖类应用注册
registerApp("美团", "waimai", "com.sankuai.meituan", AppCategory.FOOD_DELIVERY);
registerApp("饿了么", "Eleme", "me.ele", AppCategory.FOOD_DELIVERY);
registerApp("百度外卖", "baidu.waimai", "com.baidu.waimai", AppCategory.FOOD_DELIVERY);
// 视频类应用注册
registerApp("抖音", "Aweme", "com.ss.android.ugc.aweme", AppCategory.VIDEO);
registerApp("TikTok", "com.zhiliaoapp.musically", "com.zhiliaoapp.musically", AppCategory.VIDEO);
registerApp("爱奇艺", "IQIYI", "com.qiyi.video", AppCategory.VIDEO);
registerApp("B站", "BiliBili", "tv.danmaku.bili", AppCategory.VIDEO);
registerApp("优酷", "Youku", "com.youku.phone", AppCategory.VIDEO);
registerApp("腾讯视频", "TencentVideo", "com.tencent.qqlive", AppCategory.VIDEO);
// 工具类应用注册
registerApp("支付宝", "AlipayClient", "com.eg.android.AlipayGphone", AppCategory.UTILITY);
// registerApp("Chrome浏览器", "Chrome", "com.android.chrome", AppCategory.UTILITY);
registerApp("华为应用商店", "com.huawei.appmarket", "com.huawei.appmarket", AppCategory.UTILITY);
}
// 应用信息结构体
private static class AppInfo {
final String appName;
final String packageName;
final Pattern matchPattern;
public AppInfo(String appName, String keyword, String packageName) {
this.appName = appName;
this.packageName = packageName;
this.matchPattern = Pattern.compile(
Pattern.quote(keyword) + "/?([^\\s\\(\\)]+)",
Pattern.CASE_INSENSITIVE
);
}
}
// 注册应用方法
private static void registerApp(String appName, String keyword, String packageName, AppCategory category) {
PREDEFINED_APP_LIBRARY.put(keyword, new AppInfo(appName, keyword, packageName));
}
// 严格包名匹配模式(至少包含两个点)
private static final Pattern STRICT_PACKAGE_PATTERN = Pattern.compile(
"^[a-zA-Z_][a-zA-Z0-9_]*(\\.[a-zA-Z_][a-zA-Z0-9_]*){2,}$"
);
// 系统词汇黑名单(过滤干扰词)
private static final Set<String> SYSTEM_WORD_BLACKLIST = new HashSet<>(Arrays.asList(
"Linux", "Android", "iPhone", "iPad", "Windows", "Mac", "iOS", "CPU", "Build",
"NetType", "Language", "Version", "Mobile", "Safari", "WebKit", "wv", "KHTML"
));
/**
* 主解析方法:提取应用包名
* @param userAgent 用户代理字符串
* @return 解析出的应用包名,未匹配则返回空字符串
*/
public static String extractPackageName(String userAgent) {
if (StringUtils.isBlank(userAgent)) {
return "";
}
// 1. 后缀路径解析(处理类似 /版本号/包名 的结构)
String packageName = parseSuffixPackage(userAgent);
if (StringUtils.isNotBlank(packageName) && !isBrowserPackage(packageName)) {
return packageName;
}
// 2. 预定义应用匹配
packageName = matchPredefinedApps(userAgent);
if (StringUtils.isNotBlank(packageName) && !isBrowserPackage(packageName)) {
return packageName;
}
// 3. 应用商店格式解析(如 (com.huawei.appmarket; 版本号))
packageName = parseAppStoreFormatPackage(userAgent);
if (StringUtils.isNotBlank(packageName) && !isBrowserPackage(packageName)) {
return packageName;
}
// 4. 启发式匹配(版本号关联、括号内容)
packageName = parseHeuristicPackage(userAgent);
if (StringUtils.isNotBlank(packageName) && !isBrowserPackage(packageName)) {
return packageName;
}
// 5. 严格模式兜底(全局匹配合法包名)
packageName = parseStrictModePackage(userAgent);
if (StringUtils.isNotBlank(packageName) && !isBrowserPackage(packageName)) {
return packageName;
} else {
return "";
}
}
/**
* 解析后缀路径中的包名(如 open_news_u_s/6817/cn.copper.fokapi.mysterious)
*/
private static String parseSuffixPackage(String userAgent) {
try {
String decodedUa = URLDecoder.decode(userAgent);
Pattern pattern = Pattern.compile("/\\d+/([\\w\\.]+)(?:\\s|$)");
Matcher matcher = pattern.matcher(decodedUa);
if (matcher.find()) {
String candidate = matcher.group(1);
return isValidPackage(candidate) ? candidate : "";
}
} catch (Exception e) {
// 忽略解码异常
}
return "";
}
/**
* 匹配预定义应用库
*/
private static String matchPredefinedApps(String userAgent) {
for (AppInfo appInfo : PREDEFINED_APP_LIBRARY.values()) {
Matcher matcher = appInfo.matchPattern.matcher(userAgent);
if (matcher.find()) {
return appInfo.packageName;
}
}
return "";
}
/**
* 解析应用商店格式包名(如 (com.huawei.appmarket; 11.0.0))
*/
private static String parseAppStoreFormatPackage(String userAgent) {
Pattern pattern = Pattern.compile("\\(([^;]+);", Pattern.CASE_INSENSITIVE);
Matcher matcher = pattern.matcher(userAgent);
if (matcher.find()) {
String candidate = matcher.group(1).trim();
return isValidPackage(candidate) ? candidate : "";
}
return "";
}
/**
* 启发式匹配(版本号关联、括号内容解析)
*/
private static String parseHeuristicPackage(String userAgent) {
// 模式1:包名/版本号 格式匹配
Pattern versionPattern = Pattern.compile(
"([a-zA-Z_][a-zA-Z0-9_\\.]*)/[0-9\\.]+",
Pattern.CASE_INSENSITIVE
);
Matcher versionMatcher = versionPattern.matcher(userAgent);
if (versionMatcher.find()) {
String candidate = versionMatcher.group(1);
if (isValidPackage(candidate)) {
return candidate;
}
}
// 模式2:括号内内容解析
Pattern bracketPattern = Pattern.compile("\\(([^)]+)\\)", Pattern.CASE_INSENSITIVE);
Matcher bracketMatcher = bracketPattern.matcher(userAgent);
while (bracketMatcher.find()) {
String content = bracketMatcher.group(1);
for (String part : content.split(";|,| ")) {
if (isValidPackage(part)) {
return part;
}
}
}
return "";
}
/**
* 严格模式匹配合法包名(全局搜索)
*/
private static String parseStrictModePackage(String userAgent) {
Matcher matcher = STRICT_PACKAGE_PATTERN.matcher(userAgent);
List<String> validCandidates = new ArrayList<>();
while (matcher.find()) {
String candidate = matcher.group();
if (isValidPackage(candidate)) {
validCandidates.add(candidate);
}
}
// 按长度降序排序,取最长合法包名
return validCandidates.stream()
.max(Comparator.comparingInt(String::length))
.orElse("");
}
/**
* 包名有效性验证
*/
private static boolean isValidPackage(String packageName) {
if (packageName == null || packageName.length() < 6) {
return false;
}
if (!STRICT_PACKAGE_PATTERN.matcher(packageName).matches()) {
return false;
}
String firstSegment = packageName.split("\\.")[0];
return !SYSTEM_WORD_BLACKLIST.contains(firstSegment.toUpperCase());
}
/**
* 浏览器包名判断
*/
private static boolean isBrowserPackage(String packageName) {
return BROWSER_PACKAGE_SET.contains(packageName);
}
}