通过解析 IANA 语言子标签注册表增强本地化功能 (#4675)

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
Glavo
2025-10-21 15:37:32 +08:00
committed by GitHub
parent 27e1e021d7
commit d7c6a23dbe
12 changed files with 670 additions and 275 deletions

3
.gitignore vendored
View File

@@ -19,6 +19,9 @@ hmcl-exported-logs-*
/.local/ /.local/
/.cache/ /.cache/
# IANA Language Subtag Registry
language-subtag-registry
# gradle build # gradle build
/build/ /build/
/HMCL/build/ /HMCL/build/

View File

@@ -30,7 +30,7 @@ import java.util.ResourceBundle;
/// - For all Chinese locales, `zh-CN` is always added to the candidate list. If `zh-Hans` already exists in the candidate list, /// - For all Chinese locales, `zh-CN` is always added to the candidate list. If `zh-Hans` already exists in the candidate list,
/// `zh-CN` is inserted before `zh`; otherwise, it is inserted after `zh`. /// `zh-CN` is inserted before `zh`; otherwise, it is inserted after `zh`.
/// - For all Traditional Chinese locales, `zh-TW` is always added to the candidate list (before `zh`). /// - For all Traditional Chinese locales, `zh-TW` is always added to the candidate list (before `zh`).
/// - For all [supported][LocaleUtils#mapToISO2Language(String)] ISO 639-3 language code (such as `eng`, `zho`, `lzh`, etc.), /// - For all supported ISO 639-3 language code (such as `eng`, `zho`, `lzh`, etc.),
/// a candidate list with the language code replaced by the ISO 639-1 (Macro)language code is added to the end of the candidate list. /// a candidate list with the language code replaced by the ISO 639-1 (Macro)language code is added to the end of the candidate list.
/// ///
/// @author Glavo /// @author Glavo

View File

@@ -17,9 +17,7 @@
*/ */
package org.jackhuang.hmcl.util.i18n; package org.jackhuang.hmcl.util.i18n;
import org.jackhuang.hmcl.util.Lang;
import org.jackhuang.hmcl.util.StringUtils; import org.jackhuang.hmcl.util.StringUtils;
import org.jackhuang.hmcl.util.io.IOUtils;
import org.jackhuang.hmcl.util.platform.NativeUtils; import org.jackhuang.hmcl.util.platform.NativeUtils;
import org.jackhuang.hmcl.util.platform.OperatingSystem; import org.jackhuang.hmcl.util.platform.OperatingSystem;
import org.jackhuang.hmcl.util.platform.windows.Kernel32; import org.jackhuang.hmcl.util.platform.windows.Kernel32;
@@ -29,6 +27,8 @@ import org.jetbrains.annotations.Nullable;
import org.jetbrains.annotations.Unmodifiable; import org.jetbrains.annotations.Unmodifiable;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files; import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
import java.time.Duration; import java.time.Duration;
@@ -48,66 +48,74 @@ public final class LocaleUtils {
public static final Locale SYSTEM_DEFAULT = Locale.getDefault(); public static final Locale SYSTEM_DEFAULT = Locale.getDefault();
public static final boolean IS_CHINA_MAINLAND = isChinaMainland();
private static boolean isChinaMainland() {
if ("Asia/Shanghai".equals(ZoneId.systemDefault().getId()))
return true;
// Check if the time zone is UTC+8
if (ZonedDateTime.now().getOffset().getTotalSeconds() == Duration.ofHours(8).toSeconds()) {
if ("CN".equals(LocaleUtils.SYSTEM_DEFAULT.getCountry()))
return true;
if (OperatingSystem.CURRENT_OS == OperatingSystem.WINDOWS && NativeUtils.USE_JNA) {
Kernel32 kernel32 = Kernel32.INSTANCE;
// https://learn.microsoft.com/windows/win32/intl/table-of-geographical-locations
if (kernel32 != null && kernel32.GetUserGeoID(WinConstants.GEOCLASS_NATION) == 45) // China
return true;
}
}
return false;
}
public static final Locale LOCALE_ZH_HANS = Locale.forLanguageTag("zh-Hans"); public static final Locale LOCALE_ZH_HANS = Locale.forLanguageTag("zh-Hans");
public static final Locale LOCALE_ZH_HANT = Locale.forLanguageTag("zh-Hant"); public static final Locale LOCALE_ZH_HANT = Locale.forLanguageTag("zh-Hant");
public static final String DEFAULT_LANGUAGE_KEY = "default"; public static final String DEFAULT_LANGUAGE_KEY = "default";
private static final Map<String, String> subLanguageToParent = new HashMap<>(); private static final Map<String, String> PARENT_LANGUAGE = loadCSV("sublanguages.csv");
private static final Map<String, String> iso3To2 = new HashMap<>(); private static final Map<String, String> NORMALIZED_TAG = loadCSV("language_aliases.csv");
private static final Set<String> rtl = new HashSet<>(); private static final Map<String, String> DEFAULT_SCRIPT = loadCSV("default_script.csv");
private static final Map<String, String> PREFERRED_LANGUAGE = Map.of("zh", "cmn");
private static final Set<String> RTL_SCRIPTS = Set.of("Qabs", "Arab", "Hebr");
private static final Set<String> CHINESE_TRADITIONAL_REGIONS = Set.of("TW", "HK", "MO");
static { /// Load CSV files located in `/assets/lang/`.
try { /// Each line in these files contains at least two elements.
for (String line : Lang.toIterable(IOUtils.readFullyAsString(LocaleUtils.class.getResourceAsStream("/assets/lang/sublanguages.csv")).lines())) { ///
if (line.startsWith("#") || line.isBlank()) { /// For example, if a file contains `value0,value1,value2`, the return value will be `{value1=value0, value2=value0}`.
continue; private static Map<String, String> loadCSV(String fileName) {
InputStream resource = LocaleUtils.class.getResourceAsStream("/assets/lang/" + fileName);
if (resource == null) {
LOG.warning("Can't find file: " + fileName);
return Map.of();
} }
String[] languages = line.split(","); HashMap<String, String> result = new HashMap<>();
if (languages.length < 2) { try (resource) {
LOG.warning("Invalid line in sublanguages.csv: " + line); new String(resource.readAllBytes(), StandardCharsets.UTF_8).lines().forEach(line -> {
continue; if (line.startsWith("#") || line.isBlank())
return;
String[] items = line.split(",");
if (items.length < 2) {
LOG.warning("Invalid line in " + fileName + ": " + line);
return;
} }
String parent = languages[0]; String parent = items[0];
for (int i = 1; i < languages.length; i++) { for (int i = 1; i < items.length; i++) {
subLanguageToParent.put(languages[i], parent); result.put(items[i], parent);
}
} }
});
} catch (Throwable e) { } catch (Throwable e) {
LOG.warning("Failed to load sublanguages.csv", e); LOG.warning("Failed to load " + fileName, e);
} }
try { return Map.copyOf(result);
// Line Format: (?<iso2>[a-z]{2}),(?<iso3>[a-z]{3})
for (String line : Lang.toIterable(IOUtils.readFullyAsString(LocaleUtils.class.getResourceAsStream("/assets/lang/iso_languages.csv")).lines())) {
if (line.startsWith("#") || line.isBlank()) {
continue;
}
String[] parts = line.split(",", 3);
if (parts.length != 2) {
LOG.warning("Invalid line in iso_languages.csv: " + line);
continue;
}
iso3To2.put(parts[1], parts[0]);
}
} catch (Throwable e) {
LOG.warning("Failed to load iso_languages.csv", e);
}
try {
for (String line : Lang.toIterable(IOUtils.readFullyAsString(LocaleUtils.class.getResourceAsStream("/assets/lang/rtl.txt")).lines())) {
if (line.startsWith("#") || line.isBlank()) {
continue;
}
rtl.add(line.trim());
}
} catch (Throwable e) {
LOG.warning("Failed to load rtl.txt", e);
}
} }
private static Locale getInstance(String language, String script, String region, private static Locale getInstance(String language, String script, String region,
@@ -130,6 +138,31 @@ public final class LocaleUtils {
: locale.stripExtensions().toLanguageTag(); : locale.stripExtensions().toLanguageTag();
} }
public static boolean isEnglish(Locale locale) {
return "en".equals(getRootLanguage(locale));
}
public static boolean isChinese(Locale locale) {
return "zh".equals(getRootLanguage(locale));
}
// ---
/// Normalize the language code to the code in the IANA Language Subtag Registry.
/// Typically, it normalizes ISO 639 alpha-3 codes to ISO 639 alpha-2 codes.
public static @NotNull String normalizeLanguage(String language) {
return language.isEmpty()
? "en"
: NORMALIZED_TAG.getOrDefault(language, language);
}
/// If `language` is a sublanguage of a [macrolanguage](https://en.wikipedia.org/wiki/ISO_639_macrolanguage),
/// return the macrolanguage; otherwise, return `null`.
public static @Nullable String getParentLanguage(String language) {
return PARENT_LANGUAGE.get(language);
}
/// @see #getRootLanguage(String)
public static @NotNull String getRootLanguage(Locale locale) { public static @NotNull String getRootLanguage(Locale locale) {
return getRootLanguage(locale.getLanguage()); return getRootLanguage(locale.getLanguage());
} }
@@ -140,54 +173,54 @@ public final class LocaleUtils {
/// - If `language` is empty, return `en`; /// - If `language` is empty, return `en`;
/// - Otherwise, return the `language`. /// - Otherwise, return the `language`.
public static @NotNull String getRootLanguage(String language) { public static @NotNull String getRootLanguage(String language) {
if (language.isEmpty()) return "en"; language = normalizeLanguage(language);
if (language.length() <= 2)
return language;
String iso2 = mapToISO2Language(language);
if (iso2 != null)
return iso2;
String parent = getParentLanguage(language); String parent = getParentLanguage(language);
return parent != null ? parent : language; return parent != null ? parent : language;
} }
/// If `language` is a macrolanguage, try to map it to the most commonly used individual language.
///
/// For example, if `language` is `zh`, this method will return `cmn`.
public static @NotNull String getPreferredLanguage(String language) {
language = normalizeLanguage(language);
return PREFERRED_LANGUAGE.getOrDefault(language, language);
}
/// Get the script of the locale. If the script is empty and the language is Chinese, /// Get the script of the locale. If the script is empty and the language is Chinese,
/// the script will be inferred based on the language, the region and the variant. /// the script will be inferred based on the language, the region and the variant.
public static @NotNull String getScript(Locale locale) { public static @NotNull String getScript(Locale locale) {
if (locale.getScript().isEmpty()) { if (locale.getScript().isEmpty()) {
if (isEnglish(locale)) { if (!locale.getVariant().isEmpty()) {
String script = DEFAULT_SCRIPT.get(locale.getVariant());
if (script != null)
return script;
}
if ("UD".equals(locale.getCountry())) { if ("UD".equals(locale.getCountry())) {
return "Qabs"; return "Qabs";
} }
}
String script = DEFAULT_SCRIPT.get(normalizeLanguage(locale.getLanguage()));
if (script != null)
return script;
if (isChinese(locale)) { if (isChinese(locale)) {
if (CHINESE_LATN_VARIANTS.contains(locale.getVariant())) return CHINESE_TRADITIONAL_REGIONS.contains(locale.getCountry())
return "Latn"; ? "Hant"
if (locale.getLanguage().equals("lzh") || CHINESE_TRADITIONAL_REGIONS.contains(locale.getCountry())) : "Hans";
return "Hant";
else
return "Hans";
} }
return "";
} }
return locale.getScript(); return locale.getScript();
} }
public static @NotNull TextDirection getTextDirection(Locale locale) { public static @NotNull TextDirection getTextDirection(Locale locale) {
TextDirection direction = rtl.contains(getRootLanguage(locale)) return RTL_SCRIPTS.contains(getScript(locale))
? TextDirection.RIGHT_TO_LEFT ? TextDirection.RIGHT_TO_LEFT
: TextDirection.LEFT_TO_RIGHT; : TextDirection.LEFT_TO_RIGHT;
if ("Qabs".equals(getScript(locale))) {
direction = switch (direction) {
case RIGHT_TO_LEFT -> TextDirection.LEFT_TO_RIGHT;
case LEFT_TO_RIGHT -> TextDirection.RIGHT_TO_LEFT;
};
}
return direction;
} }
private static final ConcurrentMap<Locale, List<Locale>> CANDIDATE_LOCALES = new ConcurrentHashMap<>(); private static final ConcurrentMap<Locale, List<Locale>> CANDIDATE_LOCALES = new ConcurrentHashMap<>();
@@ -196,13 +229,8 @@ public final class LocaleUtils {
return CANDIDATE_LOCALES.computeIfAbsent(locale, LocaleUtils::createCandidateLocaleList); return CANDIDATE_LOCALES.computeIfAbsent(locale, LocaleUtils::createCandidateLocaleList);
} }
// -------------
private static List<Locale> createCandidateLocaleList(Locale locale) { private static List<Locale> createCandidateLocaleList(Locale locale) {
String language = locale.getLanguage(); String language = getPreferredLanguage(locale.getLanguage());
if (language.isEmpty())
return List.of(Locale.ENGLISH, Locale.ROOT);
String script = getScript(locale); String script = getScript(locale);
String region = locale.getCountry(); String region = locale.getCountry();
List<String> variants = locale.getVariant().isEmpty() List<String> variants = locale.getVariant().isEmpty()
@@ -211,18 +239,7 @@ public final class LocaleUtils {
ArrayList<Locale> result = new ArrayList<>(); ArrayList<Locale> result = new ArrayList<>();
do { do {
String currentLanguage; addCandidateLocales(result, language, script, region, variants);
if (language.length() <= 2) {
currentLanguage = language;
} else {
String iso2 = mapToISO2Language(language);
currentLanguage = iso2 != null
? iso2
: language;
}
addCandidateLocales(result, currentLanguage, script, region, variants);
} while ((language = getParentLanguage(language)) != null); } while ((language = getParentLanguage(language)) != null);
result.add(Locale.ROOT); result.add(Locale.ROOT);
@@ -367,54 +384,6 @@ public final class LocaleUtils {
return Map.of(); return Map.of();
} }
// ---
/// Map ISO 639 alpha-3 language codes to ISO 639 alpha-2 language codes.
/// Returns `null` if there is no corresponding ISO 639 alpha-2 language code.
public static @Nullable String mapToISO2Language(String iso3Language) {
return iso3To2.get(iso3Language);
}
/// If `language` is a sublanguage of a [macrolanguage](https://en.wikipedia.org/wiki/ISO_639_macrolanguage),
/// return the macrolanguage; otherwise, return `null`.
public static @Nullable String getParentLanguage(String language) {
return subLanguageToParent.get(language);
}
public static boolean isEnglish(Locale locale) {
return "en".equals(getRootLanguage(locale));
}
public static final Set<String> CHINESE_TRADITIONAL_REGIONS = Set.of("TW", "HK", "MO");
public static final Set<String> CHINESE_LATN_VARIANTS = Set.of("pinyin", "wadegile", "tongyong");
public static boolean isChinese(Locale locale) {
return "zh".equals(getRootLanguage(locale));
}
public static final boolean IS_CHINA_MAINLAND = isChinaMainland();
private static boolean isChinaMainland() {
if ("Asia/Shanghai".equals(ZoneId.systemDefault().getId()))
return true;
// Check if the time zone is UTC+8
if (ZonedDateTime.now().getOffset().getTotalSeconds() == Duration.ofHours(8).toSeconds()) {
if ("CN".equals(LocaleUtils.SYSTEM_DEFAULT.getCountry()))
return true;
if (OperatingSystem.CURRENT_OS == OperatingSystem.WINDOWS && NativeUtils.USE_JNA) {
Kernel32 kernel32 = Kernel32.INSTANCE;
// https://learn.microsoft.com/windows/win32/intl/table-of-geographical-locations
if (kernel32 != null && kernel32.GetUserGeoID(WinConstants.GEOCLASS_NATION) == 45) // China
return true;
}
}
return false;
}
private LocaleUtils() { private LocaleUtils() {
} }
} }

View File

@@ -0,0 +1,29 @@
Arab,ar,fa,ps,ur
Armn,hy
Beng,as,bn
Blis,zbl
Cyrl,ab,be,bg,kk,mk,ru,uk
Deva,hi,mr,ne,kok,mai
Ethi,am,ti
Geor,ka
Grek,el
Gujr,gu
Guru,pa
Hant,lzh
Hebr,he,yi
Jpan,ja
Khmr,km
Knda,kn
Kore,ko
Laoo,lo
Latn,af,ay,bs,ca,ch,cs,cy,da,de,en,eo,es,et,eu,fi,fj,fo,fr,fy,ga,gl,gn,gv,hr,ht,hu,id,is,it,kl,la,lb,ln,lt,lv,mg,mh,ms,mt,na,nb,nd,nl,nn,no,nr,ny,om,pl,pt,qu,rm,rn,ro,rw,sg,sk,sl,sm,so,sq,ss,st,sv,sw,tl,tn,to,tr,ts,ve,vi,xh,zu,dsb,frr,frs,gsw,hsb,men,nds,niu,nso,tem,tkl,tmh,tpi,tvl,tailo,pinyin,hepburn,pehoeji,tongyong,wadegile
Mlym,ml
Mymr,my
Nkoo,nqo
Orya,or
Sinh,si
Taml,ta
Telu,te
Thaa,dv
Thai,th
Tibt,dz
1 Arab,ar,fa,ps,ur
2 Armn,hy
3 Beng,as,bn
4 Blis,zbl
5 Cyrl,ab,be,bg,kk,mk,ru,uk
6 Deva,hi,mr,ne,kok,mai
7 Ethi,am,ti
8 Geor,ka
9 Grek,el
10 Gujr,gu
11 Guru,pa
12 Hant,lzh
13 Hebr,he,yi
14 Jpan,ja
15 Khmr,km
16 Knda,kn
17 Kore,ko
18 Laoo,lo
19 Latn,af,ay,bs,ca,ch,cs,cy,da,de,en,eo,es,et,eu,fi,fj,fo,fr,fy,ga,gl,gn,gv,hr,ht,hu,id,is,it,kl,la,lb,ln,lt,lv,mg,mh,ms,mt,na,nb,nd,nl,nn,no,nr,ny,om,pl,pt,qu,rm,rn,ro,rw,sg,sk,sl,sm,so,sq,ss,st,sv,sw,tl,tn,to,tr,ts,ve,vi,xh,zu,dsb,frr,frs,gsw,hsb,men,nds,niu,nso,tem,tkl,tmh,tpi,tvl,tailo,pinyin,hepburn,pehoeji,tongyong,wadegile
20 Mlym,ml
21 Mymr,my
22 Nkoo,nqo
23 Orya,or
24 Sinh,si
25 Taml,ta
26 Telu,te
27 Thaa,dv
28 Thai,th
29 Tibt,dz

View File

@@ -73,7 +73,6 @@ io,ido
is,isl is,isl
it,ita it,ita
iu,iku iu,iku
iw,heb
ja,jpn ja,jpn
ji,yid ji,yid
jv,jav jv,jav
1 aa aar
73 is isl
74 it ita
75 iu iku
iw heb
76 ja jpn
77 ji yid
78 jv jav

View File

@@ -1,6 +0,0 @@
ar
fa
he
ps
ur
yi

View File

@@ -1 +1,63 @@
ak,tw,fat
ar,aao,abh,abv,acm,acq,acw,acx,acy,adf,aeb,aec,afb,apc,apd,arb,arq,ars,ary,arz,auz,avl,ayh,ayl,ayn,ayp,pga,shu,ssh
ay,ayc,ayr
az,azb,azj
cr,crj,crk,crl,crm,csw,cwd
et,ekk,vro
fa,pes,prs
ff,ffm,fub,fuc,fue,fuf,fuh,fui,fuq,fuv
gn,gnw,gug,gui,gun,nhd
ik,esi,esk
iu,ike,ikt
kg,kng,kwy,ldi
kr,kby,knc,krt
ku,ckb,kmr,sdh
kv,koi,kpv
lv,ltg,lvs
mg,bhr,bmm,bzc,msh,plt,skg,tdx,tkg,txy,xmv,xmw
mn,khk,mvf
ms,id,bjn,btj,bve,bvu,coa,dup,hji,jak,jax,kvb,kvr,kxd,lce,lcf,liw,max,meo,mfa,mfb,min,mqg,msi,mui,orn,ors,pel,pse,tmw,urk,vkk,vkt,xmm,zlm,zmi,zsm
ne,dty,npi
no,nb,nn
oj,ciw,ojb,ojc,ojg,ojs,ojw,otw
om,gax,gaz,hae,orc
or,ory,spv
ps,pbt,pbu,pst
qu,qub,qud,quf,qug,quh,quk,qul,qup,qur,qus,quw,qux,quy,quz,qva,qvc,qve,qvh,qvi,qvj,qvl,qvm,qvn,qvo,qvp,qvs,qvw,qvz,qwa,qwc,qwh,qws,qxa,qxc,qxh,qxl,qxn,qxo,qxp,qxr,qxt,qxu,qxw
sa,cls,vsn
sc,sdc,sdn,src,sro
sh,bs,hr,sr,cnr
sq,aae,aat,aln,als
sw,swc,swh
uz,uzn,uzs
yi,ydd,yih
za,zch,zeh,zgb,zgm,zgn,zhd,zhn,zlj,zln,zlq,zqe,zyb,zyg,zyj,zyn,zzj
zh,cdo,cjy,cmn,cnp,cpx,csp,czh,czo,gan,hak,hnm,hsn,luh,lzh,mnp,nan,sjc,wuu,yue zh,cdo,cjy,cmn,cnp,cpx,csp,czh,czo,gan,hak,hnm,hsn,luh,lzh,mnp,nan,sjc,wuu,yue
bal,bcc,bgn,bgp
bik,bcl,bln,bto,cts,fbl,lbl,rbl,ubl
bnc,ebk,lbk,obk,rbk,vbk
bua,bxm,bxr,bxu
chm,mhr,mrj
del,umu,unm
den,scs,xsl
din,dib,dik,dip,diw,dks
doi,dgo,xnr
gba,bdt,gbp,gbq,gmm,gso,gya
gon,esg,gno,wsg
grb,gbo,gec,grj,grv,gry
hai,hax,hdn
hmn,cqd,hea,hma,hmc,hmd,hme,hmg,hmh,hmi,hmj,hml,hmm,hmp,hmq,hms,hmw,hmy,hmz,hnj,hrm,huj,mmr,muq,mww,sfm
jrb,aju,jye,yhd,yud
kln,enb,eyo,niq,oki,pko,sgc,spy,tec,tuy
kok,gom,knn
kpe,gkp,xpe
lah,hnd,hno,jat,phr,pnb,skr,xhe
luy,bxk,ida,lkb,lko,lks,lri,lrm,lsm,lto,lts,lwg,nle,nyd,rag
man,emk,mku,mlq,mnk,msc,mwk
mwr,dhd,mtr,mve,rwr,swv,wry
raj,bgq,gda,gju,hoj,mup,wbr
rom,rmc,rmf,rml,rmn,rmo,rmw,rmy
syr,aii,cld
tmh,taq,thv,thz,ttq
zap,zaa,zab,zac,zad,zae,zaf,zai,zam,zao,zaq,zar,zas,zat,zav,zaw,zax,zca,zcd,zoo,zpa,zpb,zpc,zpd,zpe,zpf,zpg,zph,zpi,zpj,zpk,zpl,zpm,zpn,zpo,zpp,zpq,zpr,zps,zpt,zpu,zpv,zpw,zpx,zpy,zpz,zsr,zte,ztg,ztl,ztm,ztn,ztp,ztq,zts,ztt,ztu,ztx,zty
zza,diq,kiu
1 zh ak,tw,fat cdo cjy cmn cnp cpx csp czh czo gan hak hnm hsn luh lzh mnp nan sjc wuu yue
1 ak,tw,fat
2 ar,aao,abh,abv,acm,acq,acw,acx,acy,adf,aeb,aec,afb,apc,apd,arb,arq,ars,ary,arz,auz,avl,ayh,ayl,ayn,ayp,pga,shu,ssh
3 ay,ayc,ayr
4 az,azb,azj
5 cr,crj,crk,crl,crm,csw,cwd
6 et,ekk,vro
7 fa,pes,prs
8 ff,ffm,fub,fuc,fue,fuf,fuh,fui,fuq,fuv
9 gn,gnw,gug,gui,gun,nhd
10 ik,esi,esk
11 iu,ike,ikt
12 kg,kng,kwy,ldi
13 kr,kby,knc,krt
14 ku,ckb,kmr,sdh
15 kv,koi,kpv
16 lv,ltg,lvs
17 mg,bhr,bmm,bzc,msh,plt,skg,tdx,tkg,txy,xmv,xmw
18 mn,khk,mvf
19 ms,id,bjn,btj,bve,bvu,coa,dup,hji,jak,jax,kvb,kvr,kxd,lce,lcf,liw,max,meo,mfa,mfb,min,mqg,msi,mui,orn,ors,pel,pse,tmw,urk,vkk,vkt,xmm,zlm,zmi,zsm
20 ne,dty,npi
21 no,nb,nn
22 oj,ciw,ojb,ojc,ojg,ojs,ojw,otw
23 om,gax,gaz,hae,orc
24 or,ory,spv
25 ps,pbt,pbu,pst
26 qu,qub,qud,quf,qug,quh,quk,qul,qup,qur,qus,quw,qux,quy,quz,qva,qvc,qve,qvh,qvi,qvj,qvl,qvm,qvn,qvo,qvp,qvs,qvw,qvz,qwa,qwc,qwh,qws,qxa,qxc,qxh,qxl,qxn,qxo,qxp,qxr,qxt,qxu,qxw
27 sa,cls,vsn
28 sc,sdc,sdn,src,sro
29 sh,bs,hr,sr,cnr
30 sq,aae,aat,aln,als
31 sw,swc,swh
32 uz,uzn,uzs
33 yi,ydd,yih
34 za,zch,zeh,zgb,zgm,zgn,zhd,zhn,zlj,zln,zlq,zqe,zyb,zyg,zyj,zyn,zzj
35 zh zh,cdo,cjy,cmn,cnp,cpx,csp,czh,czo,gan,hak,hnm,hsn,luh,lzh,mnp,nan,sjc,wuu,yue cdo cjy cmn cnp cpx csp czh czo gan hak hnm hsn luh lzh mnp nan sjc wuu yue
36 bal,bcc,bgn,bgp
37 bik,bcl,bln,bto,cts,fbl,lbl,rbl,ubl
38 bnc,ebk,lbk,obk,rbk,vbk
39 bua,bxm,bxr,bxu
40 chm,mhr,mrj
41 del,umu,unm
42 den,scs,xsl
43 din,dib,dik,dip,diw,dks
44 doi,dgo,xnr
45 gba,bdt,gbp,gbq,gmm,gso,gya
46 gon,esg,gno,wsg
47 grb,gbo,gec,grj,grv,gry
48 hai,hax,hdn
49 hmn,cqd,hea,hma,hmc,hmd,hme,hmg,hmh,hmi,hmj,hml,hmm,hmp,hmq,hms,hmw,hmy,hmz,hnj,hrm,huj,mmr,muq,mww,sfm
50 jrb,aju,jye,yhd,yud
51 kln,enb,eyo,niq,oki,pko,sgc,spy,tec,tuy
52 kok,gom,knn
53 kpe,gkp,xpe
54 lah,hnd,hno,jat,phr,pnb,skr,xhe
55 luy,bxk,ida,lkb,lko,lks,lri,lrm,lsm,lto,lts,lwg,nle,nyd,rag
56 man,emk,mku,mlq,mnk,msc,mwk
57 mwr,dhd,mtr,mve,rwr,swv,wry
58 raj,bgq,gda,gju,hoj,mup,wbr
59 rom,rmc,rmf,rml,rmn,rmo,rmw,rmy
60 syr,aii,cld
61 tmh,taq,thv,thz,ttq
62 zap,zaa,zab,zac,zad,zae,zaf,zai,zam,zao,zaq,zar,zas,zat,zav,zaw,zax,zca,zcd,zoo,zpa,zpb,zpc,zpd,zpe,zpf,zpg,zph,zpi,zpj,zpk,zpl,zpm,zpn,zpo,zpp,zpq,zpr,zps,zpt,zpu,zpv,zpw,zpx,zpy,zpz,zsr,zte,ztg,ztl,ztm,ztn,ztp,ztq,zts,ztt,ztu,ztx,zty
63 zza,diq,kiu

View File

@@ -28,7 +28,6 @@ import java.util.List;
import java.util.Locale; import java.util.Locale;
import java.util.Map; import java.util.Map;
import java.util.Set; import java.util.Set;
import java.util.stream.Collectors;
import static org.junit.jupiter.api.Assertions.*; import static org.junit.jupiter.api.Assertions.*;
@@ -41,55 +40,76 @@ public final class LocaleUtilsTest {
LocaleUtils.getCandidateLocales(Locale.forLanguageTag(languageTag)) LocaleUtils.getCandidateLocales(Locale.forLanguageTag(languageTag))
.stream() .stream()
.map(Locale::toLanguageTag) .map(Locale::toLanguageTag)
.collect(Collectors.toList())); .toList());
}
private static void assertCandidateLocalesEquals(String l1, String l2) {
assertEquals(
LocaleUtils.getCandidateLocales(Locale.forLanguageTag(l1)),
LocaleUtils.getCandidateLocales(Locale.forLanguageTag(l2))
);
} }
@Test @Test
public void testGetCandidateLocales() { public void testGetCandidateLocales() {
assertCandidateLocales("zh", List.of("zh-Hans", "zh-CN", "zh", "und")); // English
assertCandidateLocales("zh-CN", List.of("zh-Hans-CN", "zh-Hans", "zh-CN", "zh", "und"));
assertCandidateLocales("zh-SG", List.of("zh-Hans-SG", "zh-Hans", "zh-SG", "zh-CN", "zh", "und")); assertCandidateLocales("en", List.of("en-Latn", "en", "und"));
assertCandidateLocales("zh-MY", List.of("zh-Hans-MY", "zh-Hans", "zh-MY", "zh-CN", "zh", "und")); assertCandidateLocales("en-US", List.of("en-Latn-US", "en-Latn", "en-US", "en", "und"));
assertCandidateLocales("zh-US", List.of("zh-Hans-US", "zh-Hans", "zh-US", "zh-CN", "zh", "und")); assertCandidateLocalesEquals("en", "eng");
assertCandidateLocales("zh-TW", List.of("zh-Hant-TW", "zh-Hant", "zh-TW", "zh", "zh-CN", "und")); assertCandidateLocalesEquals("en-US", "eng-US");
assertCandidateLocales("zh-HK", List.of("zh-Hant-HK", "zh-Hant", "zh-HK", "zh-TW", "zh", "zh-CN", "und")); assertCandidateLocalesEquals("und", "en");
assertCandidateLocales("zh-MO", List.of("zh-Hant-MO", "zh-Hant", "zh-MO", "zh-TW", "zh", "zh-CN", "und"));
assertCandidateLocales("zh-Hans", List.of("zh-Hans", "zh-CN", "zh", "und")); // Spanish
assertCandidateLocales("zh-Hant", List.of("zh-Hant", "zh-TW", "zh", "zh-CN", "und"));
assertCandidateLocales("zh-Hans-US", List.of("zh-Hans-US", "zh-Hans", "zh-US", "zh-CN", "zh", "und")); assertCandidateLocales("es", List.of("es-Latn", "es", "und"));
assertCandidateLocales("zh-Hant-CN", List.of("zh-Hant-CN", "zh-Hant", "zh-CN", "zh-TW", "zh", "und")); assertCandidateLocalesEquals("es", "spa");
assertCandidateLocales("zh-Hans-TW", List.of("zh-Hans-TW", "zh-Hans", "zh-TW", "zh-CN", "zh", "und"));
assertCandidateLocales("zh-Latn", List.of("zh-Latn", "zh", "zh-CN", "und")); // Japanese
assertCandidateLocales("zh-Latn-CN", List.of("zh-Latn-CN", "zh-Latn", "zh-CN", "zh", "und"));
assertCandidateLocales("zh-pinyin", List.of("zh-Latn-pinyin", "zh-Latn", "zh-pinyin", "zh", "zh-CN", "und")); assertCandidateLocales("ja", List.of("ja-Jpan", "ja", "und"));
assertCandidateLocales("zho", List.of("zh-Hans", "zh-CN", "zh", "und")); assertCandidateLocales("ja-JP", List.of("ja-Jpan-JP", "ja-Jpan", "ja-JP", "ja", "und"));
assertCandidateLocalesEquals("ja", "jpn");
assertCandidateLocalesEquals("ja-JP", "jpn-JP");
// Russian
assertCandidateLocales("ru", List.of("ru-Cyrl", "ru", "und"));
assertCandidateLocalesEquals("ru", "rus");
// Ukrainian
assertCandidateLocales("uk", List.of("uk-Cyrl", "uk", "und"));
assertCandidateLocalesEquals("uk", "ukr");
// Chinese
assertCandidateLocales("zh", List.of("cmn-Hans", "cmn", "zh-Hans", "zh-CN", "zh", "und"));
assertCandidateLocales("zh-CN", List.of("cmn-Hans-CN", "cmn-Hans", "cmn-CN", "cmn", "zh-Hans-CN", "zh-Hans", "zh-CN", "zh", "und"));
assertCandidateLocales("zh-SG", List.of("cmn-Hans-SG", "cmn-Hans", "cmn-SG", "cmn", "zh-Hans-SG", "zh-Hans", "zh-SG", "zh-CN", "zh", "und"));
assertCandidateLocales("zh-TW", List.of("cmn-Hant-TW", "cmn-Hant", "cmn-TW", "cmn", "zh-Hant-TW", "zh-Hant", "zh-TW", "zh", "zh-CN", "und"));
assertCandidateLocales("zh-HK", List.of("cmn-Hant-HK", "cmn-Hant", "cmn-HK", "cmn", "zh-Hant-HK", "zh-Hant", "zh-HK", "zh-TW", "zh", "zh-CN", "und"));
assertCandidateLocales("zh-Hant-CN", List.of("cmn-Hant-CN", "cmn-Hant", "cmn-CN", "cmn", "zh-Hant-CN", "zh-Hant", "zh-CN", "zh-TW", "zh", "und"));
assertCandidateLocales("zh-pinyin", List.of("cmn-Latn-pinyin", "cmn-Latn", "cmn-pinyin", "cmn", "zh-Latn-pinyin", "zh-Latn", "zh-pinyin", "zh", "zh-CN", "und"));
assertCandidateLocales("lzh", List.of("lzh-Hant", "lzh", "zh-Hant", "zh-TW", "zh", "zh-CN", "und")); assertCandidateLocales("lzh", List.of("lzh-Hant", "lzh", "zh-Hant", "zh-TW", "zh", "zh-CN", "und"));
assertCandidateLocales("lzh-Hant", List.of("lzh-Hant", "lzh", "zh-Hant", "zh-TW", "zh", "zh-CN", "und"));
assertCandidateLocales("lzh-Hans", List.of("lzh-Hans", "lzh", "zh-Hans", "zh-CN", "zh", "und"));
assertCandidateLocales("cmn", List.of("cmn-Hans", "cmn", "zh-Hans", "zh-CN", "zh", "und"));
assertCandidateLocales("cmn-Hans", List.of("cmn-Hans", "cmn", "zh-Hans", "zh-CN", "zh", "und"));
assertCandidateLocales("yue", List.of("yue-Hans", "yue", "zh-Hans", "zh-CN", "zh", "und")); assertCandidateLocales("yue", List.of("yue-Hans", "yue", "zh-Hans", "zh-CN", "zh", "und"));
assertCandidateLocales("ja", List.of("ja", "und")); assertCandidateLocalesEquals("zh", "cmn-Hans");
assertCandidateLocales("jpn", List.of("ja", "und")); assertCandidateLocalesEquals("zh-CN", "cmn-Hans-CN");
assertCandidateLocales("ja-JP", List.of("ja-JP", "ja", "und")); assertCandidateLocalesEquals("zh-SG", "cmn-Hans-SG");
assertCandidateLocales("jpn-JP", List.of("ja-JP", "ja", "und")); assertCandidateLocalesEquals("zh-MY", "cmn-Hans-MY");
assertCandidateLocalesEquals("zh-TW", "cmn-Hant-TW");
assertCandidateLocales("en", List.of("en", "und")); assertCandidateLocalesEquals("zh-HK", "cmn-Hant-HK");
assertCandidateLocales("eng", List.of("en", "und")); assertCandidateLocalesEquals("zh-Hans", "cmn-Hans");
assertCandidateLocales("en-US", List.of("en-US", "en", "und")); assertCandidateLocalesEquals("zh-Hant", "cmn-Hant");
assertCandidateLocales("eng-US", List.of("en-US", "en", "und")); assertCandidateLocalesEquals("zh-Hant-CN", "cmn-Hant-CN");
assertCandidateLocalesEquals("zh-Hant-SG", "cmn-Hant-SG");
assertCandidateLocales("es", List.of("es", "und")); assertCandidateLocalesEquals("zh-Latn", "cmn-Latn");
assertCandidateLocales("spa", List.of("es", "und")); assertCandidateLocalesEquals("zh-pinyin", "cmn-Latn-pinyin");
assertCandidateLocalesEquals("zho", "zh");
assertCandidateLocales("ru", List.of("ru", "und"));
assertCandidateLocales("rus", List.of("ru", "und"));
assertCandidateLocales("uk", List.of("uk", "und"));
assertCandidateLocales("ukr", List.of("uk", "und"));
assertCandidateLocales("und", List.of("en", "und"));
} }
@Test @Test
@@ -134,7 +154,9 @@ public final class LocaleUtilsTest {
assertEquals("Hant", LocaleUtils.getScript(Locale.forLanguageTag("lzh-Hant"))); assertEquals("Hant", LocaleUtils.getScript(Locale.forLanguageTag("lzh-Hant")));
assertEquals("Hant", LocaleUtils.getScript(Locale.forLanguageTag("lzh-CN"))); assertEquals("Hant", LocaleUtils.getScript(Locale.forLanguageTag("lzh-CN")));
assertEquals("Latn", LocaleUtils.getScript(Locale.forLanguageTag("en")));
assertEquals("Latn", LocaleUtils.getScript(Locale.forLanguageTag("zh-pinyin"))); assertEquals("Latn", LocaleUtils.getScript(Locale.forLanguageTag("zh-pinyin")));
assertEquals("Latn", LocaleUtils.getScript(Locale.forLanguageTag("ja-hepburn")));
} }
@Test @Test
@@ -192,20 +214,17 @@ public final class LocaleUtilsTest {
} }
@Test @Test
public void testMapToISO2Language() { public void testNormalizeLanguage() {
assertEquals("en", LocaleUtils.mapToISO2Language("eng")); assertEquals("en", LocaleUtils.normalizeLanguage(""));
assertEquals("es", LocaleUtils.mapToISO2Language("spa")); assertEquals("en", LocaleUtils.normalizeLanguage("eng"));
assertEquals("ja", LocaleUtils.mapToISO2Language("jpn")); assertEquals("es", LocaleUtils.normalizeLanguage("spa"));
assertEquals("ru", LocaleUtils.mapToISO2Language("rus")); assertEquals("ja", LocaleUtils.normalizeLanguage("jpn"));
assertEquals("uk", LocaleUtils.mapToISO2Language("ukr")); assertEquals("ru", LocaleUtils.normalizeLanguage("rus"));
assertEquals("zh", LocaleUtils.mapToISO2Language("zho")); assertEquals("uk", LocaleUtils.normalizeLanguage("ukr"));
assertEquals("zu", LocaleUtils.mapToISO2Language("zul")); assertEquals("zh", LocaleUtils.normalizeLanguage("zho"));
assertEquals("zu", LocaleUtils.normalizeLanguage("zul"));
assertNull(LocaleUtils.mapToISO2Language(null)); assertEquals("en", LocaleUtils.normalizeLanguage(""));
assertNull(LocaleUtils.mapToISO2Language("")); assertEquals("cmn", LocaleUtils.normalizeLanguage("cmn"));
assertNull(LocaleUtils.mapToISO2Language("cmn"));
assertNull(LocaleUtils.mapToISO2Language("lzh"));
assertNull(LocaleUtils.mapToISO2Language("tlh"));
} }
@Test @Test
@@ -228,10 +247,11 @@ public final class LocaleUtilsTest {
assertEquals(TextDirection.LEFT_TO_RIGHT, LocaleUtils.getTextDirection(Locale.forLanguageTag("zh"))); assertEquals(TextDirection.LEFT_TO_RIGHT, LocaleUtils.getTextDirection(Locale.forLanguageTag("zh")));
assertEquals(TextDirection.LEFT_TO_RIGHT, LocaleUtils.getTextDirection(Locale.forLanguageTag("zh-Hans"))); assertEquals(TextDirection.LEFT_TO_RIGHT, LocaleUtils.getTextDirection(Locale.forLanguageTag("zh-Hans")));
assertEquals(TextDirection.LEFT_TO_RIGHT, LocaleUtils.getTextDirection(Locale.forLanguageTag("zh-CN"))); assertEquals(TextDirection.LEFT_TO_RIGHT, LocaleUtils.getTextDirection(Locale.forLanguageTag("zh-CN")));
assertEquals(TextDirection.LEFT_TO_RIGHT, LocaleUtils.getTextDirection(Locale.forLanguageTag("ar-Qabs")));
assertEquals(TextDirection.RIGHT_TO_LEFT, LocaleUtils.getTextDirection(Locale.forLanguageTag("en-Qabs"))); assertEquals(TextDirection.RIGHT_TO_LEFT, LocaleUtils.getTextDirection(Locale.forLanguageTag("en-Qabs")));
assertEquals(TextDirection.RIGHT_TO_LEFT, LocaleUtils.getTextDirection(Locale.forLanguageTag("ar"))); assertEquals(TextDirection.RIGHT_TO_LEFT, LocaleUtils.getTextDirection(Locale.forLanguageTag("ar")));
assertEquals(TextDirection.RIGHT_TO_LEFT, LocaleUtils.getTextDirection(Locale.forLanguageTag("ara"))); assertEquals(TextDirection.RIGHT_TO_LEFT, LocaleUtils.getTextDirection(Locale.forLanguageTag("ara")));
assertEquals(TextDirection.RIGHT_TO_LEFT, LocaleUtils.getTextDirection(Locale.forLanguageTag("he")));
assertEquals(TextDirection.RIGHT_TO_LEFT, LocaleUtils.getTextDirection(Locale.forLanguageTag("heb")));
} }
} }

View File

@@ -1,5 +1,6 @@
import org.jackhuang.hmcl.gradle.ci.CheckUpdate import org.jackhuang.hmcl.gradle.ci.CheckUpdate
import org.jackhuang.hmcl.gradle.docs.UpdateDocuments import org.jackhuang.hmcl.gradle.docs.UpdateDocuments
import org.jackhuang.hmcl.gradle.l10n.ParseLanguageSubtagRegistry
plugins { plugins {
id("checkstyle") id("checkstyle")
@@ -76,6 +77,12 @@ org.jackhuang.hmcl.gradle.javafx.JavaFXUtils.register(rootProject)
defaultTasks("clean", "build") defaultTasks("clean", "build")
tasks.register<ParseLanguageSubtagRegistry>("parseLanguageSubtagRegistry") {
languageSubtagRegistryFile.set(layout.projectDirectory.file("language-subtag-registry"))
sublanguagesFile.set(layout.projectDirectory.file("HMCLCore/src/main/resources/assets/lang/sublanguages.csv"))
defaultScriptFile.set(layout.projectDirectory.file("HMCLCore/src/main/resources/assets/lang/default_script.csv"))
}
tasks.register<UpdateDocuments>("updateDocuments") { tasks.register<UpdateDocuments>("updateDocuments") {
documentsDir.set(layout.projectDirectory.dir("docs")) documentsDir.set(layout.projectDirectory.dir("docs"))

View File

@@ -19,36 +19,42 @@ package org.jackhuang.hmcl.gradle.l10n;
import org.gradle.api.GradleException; import org.gradle.api.GradleException;
import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.*; import java.util.*;
final class LocalizationUtils { final class LocalizationUtils {
public static final Map<String, String> subLanguageToParent; public static final Map<String, String> subLanguageToParent = loadCSV("sublanguages.csv");
static { private static Map<String, String> loadCSV(String fileName) {
InputStream input = LocalizationUtils.class.getResourceAsStream("sublanguages.csv"); InputStream resource = LocalizationUtils.class.getResourceAsStream(fileName);
if (input == null) if (resource == null) {
throw new GradleException("Missing sublanguages.csv file"); throw new GradleException("Resource not found: " + fileName);
}
Map<String, String> map = new HashMap<>(); HashMap<String, String> result = new HashMap<>();
try (input) { try (resource) {
new String(input.readAllBytes()).lines() new String(resource.readAllBytes(), StandardCharsets.UTF_8).lines().forEach(line -> {
.filter(line -> !line.startsWith("#") && !line.isBlank()) if (line.startsWith("#") || line.isBlank())
.forEach(line -> { return;
String[] languages = line.split(",");
if (languages.length < 2) String[] items = line.split(",");
if (items.length < 2) {
throw new GradleException("Invalid line in sublanguages.csv: " + line); throw new GradleException("Invalid line in sublanguages.csv: " + line);
}
String parent = languages[0]; String parent = items[0];
for (int i = 1; i < languages.length; i++) { for (int i = 1; i < items.length; i++) {
map.put(languages[i], parent); result.put(items[i], parent);
} }
}); });
} catch (IOException e) { } catch (RuntimeException | Error e) {
throw new GradleException("Failed to read sublanguages.csv", e); throw e;
} catch (Throwable e) {
throw new GradleException("Failed to load " + fileName, e);
} }
subLanguageToParent = Collections.unmodifiableMap(map);
return Map.copyOf(result);
} }
private static List<String> resolveLanguage(String language) { private static List<String> resolveLanguage(String language) {

View File

@@ -0,0 +1,254 @@
/*
* Hello Minecraft! Launcher
* Copyright (C) 2025 huangyuhui <huanghongxun2008@126.com> and contributors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package org.jackhuang.hmcl.gradle.l10n;
import org.gradle.api.DefaultTask;
import org.gradle.api.GradleException;
import org.gradle.api.file.RegularFileProperty;
import org.gradle.api.tasks.InputFile;
import org.gradle.api.tasks.OutputFile;
import org.gradle.api.tasks.TaskAction;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import java.io.BufferedReader;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.StandardOpenOption;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/// @author Glavo
/// @see [language-subtag-registry](https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry)
public abstract class ParseLanguageSubtagRegistry extends DefaultTask {
@InputFile
public abstract RegularFileProperty getLanguageSubtagRegistryFile();
@OutputFile
public abstract RegularFileProperty getSublanguagesFile();
/// CSV file storing the mapping from subtag to their default scripts.
@OutputFile
public abstract RegularFileProperty getDefaultScriptFile();
@TaskAction
public void run() throws IOException {
List<Item> items;
try (var reader = Files.newBufferedReader(getLanguageSubtagRegistryFile().getAsFile().get().toPath())) {
var builder = new ItemsBuilder();
builder.parse(reader);
items = builder.items;
}
MultiMap scriptToSubtag = new MultiMap();
MultiMap languageToSub = new MultiMap();
// Classical Chinese should use Traditional Chinese characters by default
scriptToSubtag.add("Hant", "lzh");
for (Item item : items) {
String type = item.firstValueOrThrow("Type");
if (type.equals("grandfathered") || type.equals("redundant")
|| !item.allValues("Deprecated").isEmpty())
continue;
String subtag = item.firstValueOrThrow("Subtag");
mainSwitch:
switch (type) {
case "language", "extlang" -> {
item.firstValue("Macrolanguage")
.ifPresent(macroLang -> languageToSub.add(macroLang, subtag));
item.firstValue("Suppress-Script")
.ifPresent(script -> scriptToSubtag.add(script, subtag));
}
case "variant" -> {
List<String> prefixes = item.allValues("Prefix");
String defaultScript = null;
for (String prefix : prefixes) {
String script = Locale.forLanguageTag(prefix).getScript();
if (script.isEmpty()) {
break mainSwitch;
}
if (defaultScript == null) {
defaultScript = script;
} else {
if (!defaultScript.equals(script)) {
break mainSwitch;
}
}
}
if (defaultScript != null) {
scriptToSubtag.add(defaultScript, subtag);
}
}
case "region", "script" -> {
// ignored
}
default -> throw new GradleException(String.format("Unknown subtag type: %s", type));
}
}
languageToSub.saveToCSV(getSublanguagesFile());
scriptToSubtag.saveToCSV(getDefaultScriptFile());
}
private static final class MultiMap {
private final TreeMap<String, Set<String>> allValues = new TreeMap<>(TAG_COMPARATOR);
void add(String key, String value) {
allValues.computeIfAbsent(key, k -> new TreeSet<>(TAG_COMPARATOR)).add(value);
}
void saveToCSV(RegularFileProperty csvFile) throws IOException {
try (var writer = Files.newBufferedWriter(csvFile.getAsFile().get().toPath(),
StandardOpenOption.CREATE,
StandardOpenOption.TRUNCATE_EXISTING)) {
for (Map.Entry<String, Set<String>> entry : allValues.entrySet()) {
String key = entry.getKey();
Set<String> values = entry.getValue();
writer.write(key);
for (String value : values) {
writer.write(',');
writer.write(value);
}
writer.newLine();
}
}
}
}
private static final class Item {
final Map<String, List<String>> values = new LinkedHashMap<>();
public @NotNull List<String> allValues(String name) {
return values.getOrDefault(name, List.of());
}
public @NotNull Optional<String> firstValue(String name) {
return Optional.ofNullable(values.get(name)).map(it -> it.get(0));
}
public @Nullable String firstValueOrNull(String name) {
return firstValue(name).orElse(null);
}
public @NotNull String firstValueOrThrow(String name) {
return firstValue(name).orElseThrow(() -> new GradleException("No value found for " + name + " in " + this));
}
public void put(String name, String value) {
values.computeIfAbsent(name, ignored -> new ArrayList<>(1)).add(value);
}
@Override
public String toString() {
StringJoiner joiner = new StringJoiner("\n");
values.forEach((name, values) -> {
for (String value : values) {
joiner.add(name + ": " + value);
}
});
return joiner.toString();
}
}
private static final class ItemsBuilder {
private final List<Item> items = new ArrayList<>(1024);
private Item current = new Item();
private String currentName = null;
private String currentValue = null;
private void updateCurrent() {
if (currentName != null) {
current.put(currentName, currentValue);
currentName = null;
currentValue = null;
}
}
private void updateItems() throws IOException {
updateCurrent();
if (current.values.isEmpty())
return;
if (current.firstValue("Type").isEmpty()) {
if (current.firstValue("File-Date").isPresent()) {
current.values.clear();
return;
} else {
throw new GradleException("Invalid item: " + current);
}
}
items.add(current);
current = new Item();
}
void parse(BufferedReader reader) throws IOException {
Pattern linePattern = Pattern.compile("^(?<name>[A-Za-z\\-]+): (?<value>.*)$");
String line;
while ((line = reader.readLine()) != null) {
if (line.isBlank()) {
continue;
} else if (line.equals("%%")) {
updateItems();
} else if (line.startsWith(" ")) {
if (currentValue != null) {
currentValue = currentValue + " " + line;
} else {
throw new GradleException("Invalid line: " + line);
}
} else {
updateCurrent();
Matcher matcher = linePattern.matcher(line);
if (matcher.matches()) {
currentName = matcher.group("name");
currentValue = matcher.group("value");
} else {
throw new GradleException("Invalid line: " + line);
}
}
}
updateItems();
}
}
private static final Comparator<String> TAG_COMPARATOR = (lang1, lang2) -> {
if (lang1.length() != lang2.length())
return Integer.compare(lang1.length(), lang2.length());
else
return lang1.compareTo(lang2);
};
}

View File

@@ -27,20 +27,18 @@ HMCL 为多种语言提供本地化支持。
HMCL 使用符合 IETF BCP 47 规范的语言标签。 HMCL 使用符合 IETF BCP 47 规范的语言标签。
对于 ISO 639 标准中定义的语言,如果同时存在两字母语言代码和三字母语言代码,那么应当优先选择两字母语言代码。 在选择语言标签时,我们会遵循以下原则:
1. 对于 ISO 639 标准中定义的语言,如果已经在 [IANA 语言子标签注册表](https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry)中注册,我们总是使用经过注册的标签。
例如,对于英语,我们使用 `en` 而不是 `eng` 作为语言代码。 例如,对于英语,我们使用 `en` 而不是 `eng` 作为语言代码。
对于 Minecraft 所定义的非标准语言,应当优先使用语言文件的 `language.code` 中定义的代码,而非游戏语言文件的名称 2. 对于 Minecraft 所定义的非标准语言,应当优先使用语言文件的 `language.code` 中定义的代码,而非游戏语言文件的名称
(但对于存在两字母代码的语言,应当将三字母语言代码替换为对应的两字母语言代码)。
这是因为 Minecraft 有时候会用现实中实际存在的国家/地区代码来表示虚构语言 (比如说海盗英语的语言文件为 `en_pt`,但 `PT` 其实是葡萄牙的国家代码)。 这是因为 Minecraft 有时候会用现实中实际存在的国家/地区代码来表示虚构语言 (比如说海盗英语的语言文件为 `en_pt`,但 `PT` 其实是葡萄牙的国家代码)。
例如,对于颠倒的英语,我们使用 `en-Qabs` 作为语言代码,而不是 `en-UD` 例如,对于颠倒的英语,我们使用 `en-Qabs` 作为语言代码,而不是 `en-UD`
此外,语言代码中应当尽可能选择地区中立的语言标签。
例如,对于简体中文和繁体中文,我们使用 `zh-Hans``zh-Hant` 作为语言代码,而不是 `zh-CN``zh-TW`
</details> </details>
<details> <details>
@@ -106,7 +104,7 @@ HMCL 的绝大多数文本都位于这个文件中,翻译此文件就能翻译
这是一个 Java Properties 文件,格式非常简单。 这是一个 Java Properties 文件,格式非常简单。
在翻译前请先阅读该格式的介绍: [Properties 文件](https://en.wikipedia.org/wiki/.properties)。 在翻译前请先阅读该格式的介绍: [Properties 文件](https://en.wikipedia.org/wiki/.properties)。
作为翻译的第一步,请从[这张表格](https://en.wikipedia.org/wiki/List_of_ISO_639_language_codes)中查询这个语言对应的两字母或三字母语言标签。 作为翻译的第一步,请从[这张表格](https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry)中查询这个语言对应的两字母或三字母语言标签。
例如,英语的语言标签为 `en` 例如,英语的语言标签为 `en`
在确定了语言标签后,请在 [`I18N.properties` 文件旁](../HMCL/src/main/resources/assets/lang)创建 `I18N_<语言标签>.properites` (例如 `I18N_en.properties`) 文件。 在确定了语言标签后,请在 [`I18N.properties` 文件旁](../HMCL/src/main/resources/assets/lang)创建 `I18N_<语言标签>.properites` (例如 `I18N_en.properties`) 文件。
@@ -169,12 +167,62 @@ HMCL 的维护者会替你完成其他步骤。
对于某个语言下的缺失的资源HMCL 支持一套资源回退机制,会根据不同的语言标签推导出一个搜索列表, 对于某个语言下的缺失的资源HMCL 支持一套资源回退机制,会根据不同的语言标签推导出一个搜索列表,
根据该列表依次搜索资源。 根据该列表依次搜索资源。
例如,如果当前环境的语言标签为 `en-US`,那么 HMCL 会根据以下列表的顺序搜索对应的本地化资源: 在搜索前,我们会先通过以下步骤对语言标签进行细化推导。
1. `en-US` 1. 归一化语言代码
如果当前语言标签中的语言代码子标签未在 IANA 语言子标签注册表中进行注册HMCL 会先尝试将其映射为注册表中已注册的标签。
例如HMCL会将语言代码 `eng` 替换为 `en`
2. 映射宏语言至子语言
如果当前语言代码是一个 [ISO 639 宏语言](https://en.wikipedia.org/wiki/ISO_639_macrolanguage)
且该宏语言通常指代某个个体语言HMCL 会将其替换为该个体语言。
例如 `zh` (中文) 通常实际指代 `cmn` (官话),所以我们会将语言代码 `zh` 替换为 `cmn`
3. 推导拼写脚本
如果当前语言标签中未指定拼写脚本HMCL 会依次根据以下规则尝试推导拼写脚本:
1. 如果当前语言标签指定了语言变体,该语言变体已在 IANA 语言子标签注册表中,
且注册表中其所有 `Prefix` 都包含相同的拼写脚本,则将当前拼写脚本指定为该脚本。
例如,如果当前语言变体为 `pinyin` (汉语拼音),则当前拼写脚本会被指定为 `Latn` (拉丁文)。
2. 如果当前语言代码在 IANA 语言子标签注册表中被指定了 `Suppress-Script`,则将当前拼写脚本指定为该脚本。
例如,如果当前语言代码为 `en` (英语),则当前拼写脚本会被指定为 `Latn` (拉丁文);
如果当前语言代码为 `ru` (俄语),则当前拼写脚本会被指定为 `Cyrl` (西里尔文)。
3. 如果当前语言代码是 `lzh` (文言),则将当前拼写脚本指定为 `Hant` (繁体汉字)。
4. 如果当前语言代码是 `zh``zh` 的子语言,则检查当前国家/地区代码是否为 `TW``HK``MO` 之一。
如果结果为真,则将当前拼写脚本指定为 `Hant` (繁体汉字);否则将当前拼写脚本指定为 `Hans` (简体汉字)。
在对语言代码细化推导完成后HMCL 会开始根据此语言标签推导出一个语言标签列表。
例如,对于语言标签 `en-US`HMCL 会将其细化为 `en-Latn-US`,并据此推导出以下搜索列表:
1. `en-Latn-US`
2. `en-Latn`
3. `en-US`
2. `en` 2. `en`
3. `und` 3. `und`
对于语言标签 `zh-CN`HMCL 会将其细化为 `cmn-Hans-CN`,并据此推导出以下搜索列表:
1. `cmn-Hans-CN`
2. `cmn-Hans`
3. `cmn-CN`
4. `cmn`
5. `zh-Hans-CN`
6. `zh-Hans`
7. `zh-CN`
8. `zh`
9. `und`
对于能够混合的资源 (例如 `.properties` 文件)HMCL 会根据此列表的优先级混合资源; 对于能够混合的资源 (例如 `.properties` 文件)HMCL 会根据此列表的优先级混合资源;
对于难以混合的资源 (例如字体文件)HMCL 会根据此列表加载找到的最高优先级的资源。 对于难以混合的资源 (例如字体文件)HMCL 会根据此列表加载找到的最高优先级的资源。
@@ -182,35 +230,33 @@ HMCL 的维护者会替你完成其他步骤。
例如,如果当前环境的语言标签为 `eng-US`,那么 HMCL 会将其映射至 `en-US` 后再根据上述规则搜索本地化资源。 例如,如果当前环境的语言标签为 `eng-US`,那么 HMCL 会将其映射至 `en-US` 后再根据上述规则搜索本地化资源。
如果当前语言是一个 [ISO 639 宏语言](https://en.wikipedia.org/wiki/ISO_639_macrolanguage)的子语言,那么 HMCL 也会搜索宏语言对应的资源。
### 对于中文的额外规则 ### 对于中文的额外规则
对于中文 (以及其子语言标签,例如文言文 (`lzh`)、普通话 (`cmn`)、粤语 (`yue`) 等等)HMCL 有着额外的支持 HMCL 总是会将 `zh-CN` 加入所有中文环境的搜索列表中,将 `zh-TW` 加入所有繁体中文环境的搜索列表中
如果当前环境的语言为中文 (及其子语言),且未指定书写脚本,那么 HMCL 会根据语言和地区标签推导出默认的书写脚本。
对于语言为 `lzh` 或地区为 `TW``HK``MO` 的情况,默认书写脚本为繁体中文 (`Hant`)
而对于其他语言和地区,默认书写脚本为简体中文 (`Hans`)。
此外HMCL 会将 `zh-CN` 加入所有中文环境的搜索列表中,将 `zh-TW` 加入所有繁体中文环境的搜索列表中,
从而适应更多场景。
以下是几个常见中文环境对应的本地化资源搜索列表。 以下是几个常见中文环境对应的本地化资源搜索列表。
- `zh-CN`: - `zh-CN`:
1. `zh-Hans-CN` 1. `cmn-Hans-CN`
2. `zh-Hans` 2. `cmn-Hans`
3. `zh-CN` 3. `cmn-CN`
4. `zh` 4. `cmn`
5. `und` 5. `zh-Hans-CN`
6. `zh-Hans`
7. `zh-CN`
8. `zh`
9. `und`
- `zh-SG`: - `zh-SG`:
1. `zh-Hans-SG` 1. `cmn-Hans-SG`
2. `zh-Hans` 2. `cmn-Hans`
3. `zh-SG` 3. `cmn-SG`
4. `zh-CN` 4. `cmn`
5. `zh` 5. `zh-Hans-SG`
6. `und` 6. `zh-Hans`
7. `zh-SG`
8. `zh-CN`
9. `zh`
10. `und`
- `zh-TW`: - `zh-TW`:
1. `zh-Hant-TW` 1. `zh-Hant-TW`
2. `zh-Hant` 2. `zh-Hant`
@@ -219,19 +265,25 @@ HMCL 的维护者会替你完成其他步骤。
5. `zh-CN` 5. `zh-CN`
6. `und` 6. `und`
- `zh-HK`: - `zh-HK`:
1. `zh-Hant-HK` 1. `cmn-Hant-HK`
2. `zh-Hant` 2. `cmn-Hant`
3. `zh-HK` 3. `cmn-HK`
4. `zh-TW` 4. `cmn`
5. `zh` 5. `zh-Hant-HK`
6. `zh-CN` 6. `zh-Hant`
7. `und` 7. `zh-HK`
8. `zh-TW`
9. `zh`
10. `zh-CN`
11. `und`
- `lzh`: - `lzh`:
1. `lzh-Hant` 1. `lzh-Hant`
2. `lzh` 2. `lzh`
3. `zh-Hant` 3. `zh-Hant`
4. `zh` 4. `zh-TW`
5. `und` 5. `zh`
6. `zh-CN`
7. `und`
## 自动同步文档内容 ## 自动同步文档内容