Java 正则表达式的命名捕获组

一、正则分组查询

1. 普通捕获组

从正则表达式左侧开始,每出现一个左括号 “(” 记做一个分组,分组编号从 1 开始。0 代表整个表达式。

1
2
3
4
5
6
7
8
9
public static void main(String[] args) {
String text = "2021-12-31";
Pattern pattern = Pattern.compile("(\\d{4})-(\\d{2})-(\\d{2})");
Matcher matcher = pattern.matcher(text);
matcher.find(); // 必须要有这句
System.out.println(matcher.group(1));
System.out.println(matcher.group(2));
System.out.println(matcher.group(3));
}

2. 命名捕获组

每个以左括号开始的捕获组,都紧跟着 ?\,而后才是正则表达式。

例如:2021-12-31 的正则表达式如下

1
2
3
4
5
6
7
8
9
public static void main(String[] args) {
String text = "2021-12-31";
Pattern pattern = Pattern.compile("(?<year>\\d{4})-(?<month>\\d{2})-(?<day>\\d{2})");
Matcher matcher = pattern.matcher(text);
matcher.find(); // 必须要有这句
System.out.println(matcher.group("year"));
System.out.println(matcher.group("month"));
System.out.println(matcher.group("day"));
}

二、RegularUtils 工具类

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
package io.github.talelin.latticy.utils;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;

import java.util.Collections;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

public class RegularUtils {
private static Pattern namedGroupCompile = Pattern.compile("\\(\\?<([a-zA-Z][a-zA-Z0-9]*)>");

public static JSONObject matchByGroupName(String text, String regex) {
Pattern pattern = Pattern.compile(regex);
Matcher matcher = pattern.matcher(text);
JSONObject jsonObject = new JSONObject(true);
Set<String> namedGroupCandidates = getNamedGroupCandidates(regex);
List<String> collect = namedGroupCandidates.stream().sorted(Collections.reverseOrder()).collect(Collectors.toList());
if (matcher.find()) {
for (String groupName : collect) {
jsonObject.put(groupName, matcher.group(groupName));
}
return jsonObject;
} else {
return jsonObject;
}
}

/**
* 命名捕获组
* 正则编写好,可以直接匹配到需要的内容,不用多处理
*
* @param regex
* @param content
* @param group
* @return
*/
public static String matchStr(String content, String regex, String group) {
Pattern pattern = Pattern.compile(regex);
Matcher matcher = pattern.matcher(content);
if (matcher.find()) {
return matcher.group(group);
}
return null;
}

/**
* 获取正则表达式对应的命名捕获组(name capture)
* 输入:"(?<year>\\\\d{4})-(?<month>\\\\d{2})-(?<day>\\\\d{2}))"
* 输出:[year,month,day]
*
* @param regex
* @return
*/
private static Set<String> getNamedGroupCandidates(String regex) {
Set<String> namedGroups = new TreeSet<String>();
Matcher m = namedGroupCompile.matcher(regex);
while (m.find()) {
namedGroups.add(m.group(1));
}
return namedGroups;
}

public static void main(String[] args) {
String email = "2021-12-31";
String regex = "(?<year>\\d{4})-(?<month>\\d{2})-(?<day>\\d{2})";
JSONObject jsonObject = matchByGroupName(email, regex);
System.out.println(JSON.toJSONString(jsonObject, true));
System.out.println(jsonObject.get("year"));
System.out.println(jsonObject.get("month"));
System.out.println(jsonObject.get("day"));
}
}

三、参考资料