支持颠倒的英语 (#4527)

This commit is contained in:
Glavo
2025-09-22 22:07:06 +08:00
committed by GitHub
parent cbe6554390
commit 36d71bd14e
28 changed files with 1488 additions and 332 deletions

View File

@@ -30,7 +30,7 @@ import java.util.ResourceBundle;
/// - For all Chinese locales, `zh-CN` is always added to the candidate list. If `zh-Hans` already exists in the candidate list,
/// `zh-CN` is inserted before `zh`; otherwise, it is inserted after `zh`.
/// - For all Traditional Chinese locales, `zh-TW` is always added to the candidate list (before `zh`).
/// - For all [supported][LocaleUtils#mapToISO1Language(String)] ISO 639-3 language code (such as `eng`, `zho`, `lzh`, etc.),
/// - For all [supported][LocaleUtils#mapToISO2Language(String)] ISO 639-3 language code (such as `eng`, `zho`, `lzh`, etc.),
/// a candidate list with the language code replaced by the ISO 639-1 (Macro)language code is added to the end of the candidate list.
///
/// @author Glavo
@@ -45,4 +45,11 @@ public class DefaultResourceBundleControl extends ResourceBundle.Control {
public List<Locale> getCandidateLocales(String baseName, Locale locale) {
return LocaleUtils.getCandidateLocales(locale);
}
@Override
public Locale getFallbackLocale(String baseName, Locale locale) {
// By default, when only the base bundle is found, it will attempt to fall back to Locale.getDefault() for further lookup.
// Since we always use the base bundle as the English resource file, we want to suppress this behavior.
return null;
}
}

View File

@@ -23,6 +23,8 @@ import org.jetbrains.annotations.Nullable;
import org.jetbrains.annotations.Unmodifiable;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.*;
@@ -44,6 +46,56 @@ public final class LocaleUtils {
public static final String DEFAULT_LANGUAGE_KEY = "default";
private static final Map<String, String> subLanguageToParent = new HashMap<>();
private static final Map<String, String> iso3To2 = new HashMap<>();
static {
try (InputStream input = LocaleUtils.class.getResourceAsStream("/assets/lang/sublanguages.csv")) {
if (input != null) {
new String(input.readAllBytes()).lines()
.filter(line -> !line.startsWith("#") && !line.isBlank())
.forEach(line -> {
String[] languages = line.split(",");
if (languages.length < 2)
LOG.warning("Invalid line in sublanguages.csv: " + line);
String parent = languages[0];
for (int i = 1; i < languages.length; i++) {
subLanguageToParent.put(languages[i], parent);
}
});
}
} catch (Throwable e) {
LOG.warning("Failed to load sublanguages.csv", e);
}
// Line Format:
// (?<iso2>[a-z]{2}),(?<iso3>[a-z]{3})
try (InputStream input = LocaleUtils.class.getResourceAsStream("/assets/lang/iso_languages.csv")) {
if (input != null) {
int lineLength = 2 + 1 + 3;
byte[] bytes = input.readAllBytes();
for (int offset = 0; offset < bytes.length; ) {
if (offset > bytes.length - lineLength)
break;
if (bytes[offset + 2] != ',')
throw new IOException("iso_languages.csv format invalid");
String iso2 = new String(bytes, offset, 2, StandardCharsets.US_ASCII);
String iso3 = new String(bytes, offset + 3, 3, StandardCharsets.US_ASCII);
iso3To2.put(iso3, iso2);
offset += (lineLength + 1);
}
}
} catch (Throwable e) {
LOG.warning("Failed to load iso_languages.csv", e);
}
}
private static Locale getInstance(String language, String script, String region,
String variant) {
Locale.Builder builder = new Locale.Builder();
@@ -64,7 +116,7 @@ public final class LocaleUtils {
: locale.stripExtensions().toLanguageTag();
}
public static @NotNull String getISO1Language(Locale locale) {
public static @NotNull String getISO2Language(Locale locale) {
String language = locale.getLanguage();
if (language.isEmpty()) return "en";
if (language.length() <= 2)
@@ -75,7 +127,7 @@ public final class LocaleUtils {
if (lang.length() <= 2)
return lang;
else {
String iso1 = mapToISO1Language(lang);
String iso1 = mapToISO2Language(lang);
if (iso1 != null)
return iso1;
}
@@ -88,6 +140,12 @@ public final class LocaleUtils {
/// the script will be inferred based on the language, the region and the variant.
public static @NotNull String getScript(Locale locale) {
if (locale.getScript().isEmpty()) {
if (isEnglish(locale)) {
if ("UD".equals(locale.getCountry())) {
return "Qabs";
}
}
if (isChinese(locale)) {
if (CHINESE_LATN_VARIANTS.contains(locale.getVariant()))
return "Latn";
@@ -130,7 +188,7 @@ public final class LocaleUtils {
} else if (language.length() <= 2) {
languages = List.of(language);
} else {
String iso1Language = mapToISO1Language(language);
String iso1Language = mapToISO2Language(language);
languages = iso1Language != null
? List.of(language, iso1Language)
: List.of(language);
@@ -294,37 +352,26 @@ public final class LocaleUtils {
// ---
/// Map ISO 639-3 language codes to ISO 639-1 language codes.
public static @Nullable String mapToISO1Language(String iso3Language) {
return switch (iso3Language) {
case "eng" -> "en";
case "spa" -> "es";
case "jpa" -> "ja";
case "rus" -> "ru";
case "ukr" -> "uk";
case "zho" -> "zh";
default -> null;
};
/// Map ISO 639 alpha-3 language codes to ISO 639 alpha-2 language codes.
public static @Nullable String mapToISO2Language(String iso3Language) {
return iso3To2.get(iso3Language);
}
public static @Nullable String getParentLanguage(String language) {
return switch (language) {
case "cmn", "lzh", "cdo", "cjy", "cpx", "czh",
"gan", "hak", "hsn", "mnp", "nan", "wuu", "yue" -> "zh";
case "" -> null;
default -> "";
};
return !language.isEmpty()
? subLanguageToParent.getOrDefault(language, "")
: null;
}
public static boolean isEnglish(Locale locale) {
return "en".equals(getISO1Language(locale));
return "en".equals(getISO2Language(locale));
}
public static final Set<String> CHINESE_TRADITIONAL_REGIONS = Set.of("TW", "HK", "MO");
public static final Set<String> CHINESE_LATN_VARIANTS = Set.of("pinyin", "wadegile", "tongyong");
public static boolean isChinese(Locale locale) {
return "zh".equals(getISO1Language(locale));
return "zh".equals(getISO2Language(locale));
}
private LocaleUtils() {

View File

@@ -0,0 +1,187 @@
aa,aar
ab,abk
ae,ave
af,afr
ak,aka
am,amh
an,arg
ar,ara
as,asm
av,ava
ay,aym
az,aze
ba,bak
be,bel
bg,bul
bh,bih
bi,bis
bm,bam
bn,ben
bo,bod
br,bre
bs,bos
ca,cat
ce,che
ch,cha
co,cos
cr,cre
cs,ces
cu,chu
cv,chv
cy,cym
da,dan
de,deu
dv,div
dz,dzo
ee,ewe
el,ell
en,eng
eo,epo
es,spa
et,est
eu,eus
fa,fas
ff,ful
fi,fin
fj,fij
fo,fao
fr,fra
fy,fry
ga,gle
gd,gla
gl,glg
gn,grn
gu,guj
gv,glv
ha,hau
he,heb
hi,hin
ho,hmo
hr,hrv
ht,hat
hu,hun
hy,hye
hz,her
ia,ina
id,ind
ie,ile
ig,ibo
ii,iii
ik,ipk
in,ind
io,ido
is,isl
it,ita
iu,iku
iw,heb
ja,jpn
ji,yid
jv,jav
ka,kat
kg,kon
ki,kik
kj,kua
kk,kaz
kl,kal
km,khm
kn,kan
ko,kor
kr,kau
ks,kas
ku,kur
kv,kom
kw,cor
ky,kir
la,lat
lb,ltz
lg,lug
li,lim
ln,lin
lo,lao
lt,lit
lu,lub
lv,lav
mg,mlg
mh,mah
mi,mri
mk,mkd
ml,mal
mn,mon
mo,mol
mr,mar
ms,msa
mt,mlt
my,mya
na,nau
nb,nob
nd,nde
ne,nep
ng,ndo
nl,nld
nn,nno
no,nor
nr,nbl
nv,nav
ny,nya
oc,oci
oj,oji
om,orm
or,ori
os,oss
pa,pan
pi,pli
pl,pol
ps,pus
pt,por
qu,que
rm,roh
rn,run
ro,ron
ru,rus
rw,kin
sa,san
sc,srd
sd,snd
se,sme
sg,sag
si,sin
sk,slk
sl,slv
sm,smo
sn,sna
so,som
sq,sqi
sr,srp
ss,ssw
st,sot
su,sun
sv,swe
sw,swa
ta,tam
te,tel
tg,tgk
th,tha
ti,tir
tk,tuk
tl,tgl
tn,tsn
to,ton
tr,tur
ts,tso
tt,tat
tw,twi
ty,tah
ug,uig
uk,ukr
ur,urd
uz,uzb
ve,ven
vi,vie
vo,vol
wa,wln
wo,wol
xh,xho
yi,yid
za,zha
zh,zho
zu,zul
1 aa aar
2 ab abk
3 ae ave
4 af afr
5 ak aka
6 am amh
7 an arg
8 ar ara
9 as asm
10 av ava
11 ay aym
12 az aze
13 ba bak
14 be bel
15 bg bul
16 bh bih
17 bi bis
18 bm bam
19 bn ben
20 bo bod
21 br bre
22 bs bos
23 ca cat
24 ce che
25 ch cha
26 co cos
27 cr cre
28 cs ces
29 cu chu
30 cv chv
31 cy cym
32 da dan
33 de deu
34 dv div
35 dz dzo
36 ee ewe
37 el ell
38 en eng
39 eo epo
40 es spa
41 et est
42 eu eus
43 fa fas
44 ff ful
45 fi fin
46 fj fij
47 fo fao
48 fr fra
49 fy fry
50 ga gle
51 gd gla
52 gl glg
53 gn grn
54 gu guj
55 gv glv
56 ha hau
57 he heb
58 hi hin
59 ho hmo
60 hr hrv
61 ht hat
62 hu hun
63 hy hye
64 hz her
65 ia ina
66 id ind
67 ie ile
68 ig ibo
69 ii iii
70 ik ipk
71 in ind
72 io ido
73 is isl
74 it ita
75 iu iku
76 iw heb
77 ja jpn
78 ji yid
79 jv jav
80 ka kat
81 kg kon
82 ki kik
83 kj kua
84 kk kaz
85 kl kal
86 km khm
87 kn kan
88 ko kor
89 kr kau
90 ks kas
91 ku kur
92 kv kom
93 kw cor
94 ky kir
95 la lat
96 lb ltz
97 lg lug
98 li lim
99 ln lin
100 lo lao
101 lt lit
102 lu lub
103 lv lav
104 mg mlg
105 mh mah
106 mi mri
107 mk mkd
108 ml mal
109 mn mon
110 mo mol
111 mr mar
112 ms msa
113 mt mlt
114 my mya
115 na nau
116 nb nob
117 nd nde
118 ne nep
119 ng ndo
120 nl nld
121 nn nno
122 no nor
123 nr nbl
124 nv nav
125 ny nya
126 oc oci
127 oj oji
128 om orm
129 or ori
130 os oss
131 pa pan
132 pi pli
133 pl pol
134 ps pus
135 pt por
136 qu que
137 rm roh
138 rn run
139 ro ron
140 ru rus
141 rw kin
142 sa san
143 sc srd
144 sd snd
145 se sme
146 sg sag
147 si sin
148 sk slk
149 sl slv
150 sm smo
151 sn sna
152 so som
153 sq sqi
154 sr srp
155 ss ssw
156 st sot
157 su sun
158 sv swe
159 sw swa
160 ta tam
161 te tel
162 tg tgk
163 th tha
164 ti tir
165 tk tuk
166 tl tgl
167 tn tsn
168 to ton
169 tr tur
170 ts tso
171 tt tat
172 tw twi
173 ty tah
174 ug uig
175 uk ukr
176 ur urd
177 uz uzb
178 ve ven
179 vi vie
180 vo vol
181 wa wln
182 wo wol
183 xh xho
184 yi yid
185 za zha
186 zh zho
187 zu zul

View File

@@ -0,0 +1 @@
zh,cmn,lzh,cdo,cjy,cpx,czh,gan,hak,hsn,mnp,nan,wuu,yue
1 zh cmn lzh cdo cjy cpx czh gan hak hsn mnp nan wuu yue