diff --git a/.gitignore b/.gitignore index 2316f1890..3ba714828 100644 --- a/.gitignore +++ b/.gitignore @@ -19,6 +19,9 @@ hmcl-exported-logs-* /.local/ /.cache/ +# IANA Language Subtag Registry +language-subtag-registry + # gradle build /build/ /HMCL/build/ diff --git a/HMCLCore/src/main/java/org/jackhuang/hmcl/util/i18n/DefaultResourceBundleControl.java b/HMCLCore/src/main/java/org/jackhuang/hmcl/util/i18n/DefaultResourceBundleControl.java index 6c41f3984..076faff83 100644 --- a/HMCLCore/src/main/java/org/jackhuang/hmcl/util/i18n/DefaultResourceBundleControl.java +++ b/HMCLCore/src/main/java/org/jackhuang/hmcl/util/i18n/DefaultResourceBundleControl.java @@ -30,7 +30,7 @@ import java.util.ResourceBundle; /// - For all Chinese locales, `zh-CN` is always added to the candidate list. If `zh-Hans` already exists in the candidate list, /// `zh-CN` is inserted before `zh`; otherwise, it is inserted after `zh`. /// - For all Traditional Chinese locales, `zh-TW` is always added to the candidate list (before `zh`). -/// - For all [supported][LocaleUtils#mapToISO2Language(String)] ISO 639-3 language code (such as `eng`, `zho`, `lzh`, etc.), +/// - For all supported ISO 639-3 language code (such as `eng`, `zho`, `lzh`, etc.), /// a candidate list with the language code replaced by the ISO 639-1 (Macro)language code is added to the end of the candidate list. /// /// @author Glavo diff --git a/HMCLCore/src/main/java/org/jackhuang/hmcl/util/i18n/LocaleUtils.java b/HMCLCore/src/main/java/org/jackhuang/hmcl/util/i18n/LocaleUtils.java index 3d36152b8..7cf7ece16 100644 --- a/HMCLCore/src/main/java/org/jackhuang/hmcl/util/i18n/LocaleUtils.java +++ b/HMCLCore/src/main/java/org/jackhuang/hmcl/util/i18n/LocaleUtils.java @@ -17,9 +17,7 @@ */ package org.jackhuang.hmcl.util.i18n; -import org.jackhuang.hmcl.util.Lang; import org.jackhuang.hmcl.util.StringUtils; -import org.jackhuang.hmcl.util.io.IOUtils; import org.jackhuang.hmcl.util.platform.NativeUtils; import org.jackhuang.hmcl.util.platform.OperatingSystem; import org.jackhuang.hmcl.util.platform.windows.Kernel32; @@ -29,6 +27,8 @@ import org.jetbrains.annotations.Nullable; import org.jetbrains.annotations.Unmodifiable; import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; import java.time.Duration; @@ -48,66 +48,74 @@ public final class LocaleUtils { public static final Locale SYSTEM_DEFAULT = Locale.getDefault(); + public static final boolean IS_CHINA_MAINLAND = isChinaMainland(); + + private static boolean isChinaMainland() { + if ("Asia/Shanghai".equals(ZoneId.systemDefault().getId())) + return true; + + // Check if the time zone is UTC+8 + if (ZonedDateTime.now().getOffset().getTotalSeconds() == Duration.ofHours(8).toSeconds()) { + if ("CN".equals(LocaleUtils.SYSTEM_DEFAULT.getCountry())) + return true; + + if (OperatingSystem.CURRENT_OS == OperatingSystem.WINDOWS && NativeUtils.USE_JNA) { + Kernel32 kernel32 = Kernel32.INSTANCE; + + // https://learn.microsoft.com/windows/win32/intl/table-of-geographical-locations + if (kernel32 != null && kernel32.GetUserGeoID(WinConstants.GEOCLASS_NATION) == 45) // China + return true; + } + } + + return false; + } + public static final Locale LOCALE_ZH_HANS = Locale.forLanguageTag("zh-Hans"); public static final Locale LOCALE_ZH_HANT = Locale.forLanguageTag("zh-Hant"); public static final String DEFAULT_LANGUAGE_KEY = "default"; - private static final Map subLanguageToParent = new HashMap<>(); - private static final Map iso3To2 = new HashMap<>(); - private static final Set rtl = new HashSet<>(); + private static final Map PARENT_LANGUAGE = loadCSV("sublanguages.csv"); + private static final Map NORMALIZED_TAG = loadCSV("language_aliases.csv"); + private static final Map DEFAULT_SCRIPT = loadCSV("default_script.csv"); + private static final Map PREFERRED_LANGUAGE = Map.of("zh", "cmn"); + private static final Set RTL_SCRIPTS = Set.of("Qabs", "Arab", "Hebr"); + private static final Set CHINESE_TRADITIONAL_REGIONS = Set.of("TW", "HK", "MO"); - static { - try { - for (String line : Lang.toIterable(IOUtils.readFullyAsString(LocaleUtils.class.getResourceAsStream("/assets/lang/sublanguages.csv")).lines())) { - if (line.startsWith("#") || line.isBlank()) { - continue; - } - - String[] languages = line.split(","); - if (languages.length < 2) { - LOG.warning("Invalid line in sublanguages.csv: " + line); - continue; - } - - String parent = languages[0]; - for (int i = 1; i < languages.length; i++) { - subLanguageToParent.put(languages[i], parent); - } - } - } catch (Throwable e) { - LOG.warning("Failed to load sublanguages.csv", e); + /// Load CSV files located in `/assets/lang/`. + /// Each line in these files contains at least two elements. + /// + /// For example, if a file contains `value0,value1,value2`, the return value will be `{value1=value0, value2=value0}`. + private static Map loadCSV(String fileName) { + InputStream resource = LocaleUtils.class.getResourceAsStream("/assets/lang/" + fileName); + if (resource == null) { + LOG.warning("Can't find file: " + fileName); + return Map.of(); } - try { - // Line Format: (?[a-z]{2}),(?[a-z]{3}) - for (String line : Lang.toIterable(IOUtils.readFullyAsString(LocaleUtils.class.getResourceAsStream("/assets/lang/iso_languages.csv")).lines())) { - if (line.startsWith("#") || line.isBlank()) { - continue; + HashMap result = new HashMap<>(); + try (resource) { + new String(resource.readAllBytes(), StandardCharsets.UTF_8).lines().forEach(line -> { + if (line.startsWith("#") || line.isBlank()) + return; + + String[] items = line.split(","); + if (items.length < 2) { + LOG.warning("Invalid line in " + fileName + ": " + line); + return; } - String[] parts = line.split(",", 3); - if (parts.length != 2) { - LOG.warning("Invalid line in iso_languages.csv: " + line); - continue; + String parent = items[0]; + for (int i = 1; i < items.length; i++) { + result.put(items[i], parent); } - - iso3To2.put(parts[1], parts[0]); - } + }); } catch (Throwable e) { - LOG.warning("Failed to load iso_languages.csv", e); + LOG.warning("Failed to load " + fileName, e); } - try { - for (String line : Lang.toIterable(IOUtils.readFullyAsString(LocaleUtils.class.getResourceAsStream("/assets/lang/rtl.txt")).lines())) { - if (line.startsWith("#") || line.isBlank()) { - continue; - } - rtl.add(line.trim()); - } - } catch (Throwable e) { - LOG.warning("Failed to load rtl.txt", e); - } + return Map.copyOf(result); } private static Locale getInstance(String language, String script, String region, @@ -130,6 +138,31 @@ public final class LocaleUtils { : locale.stripExtensions().toLanguageTag(); } + public static boolean isEnglish(Locale locale) { + return "en".equals(getRootLanguage(locale)); + } + + public static boolean isChinese(Locale locale) { + return "zh".equals(getRootLanguage(locale)); + } + + // --- + + /// Normalize the language code to the code in the IANA Language Subtag Registry. + /// Typically, it normalizes ISO 639 alpha-3 codes to ISO 639 alpha-2 codes. + public static @NotNull String normalizeLanguage(String language) { + return language.isEmpty() + ? "en" + : NORMALIZED_TAG.getOrDefault(language, language); + } + + /// If `language` is a sublanguage of a [macrolanguage](https://en.wikipedia.org/wiki/ISO_639_macrolanguage), + /// return the macrolanguage; otherwise, return `null`. + public static @Nullable String getParentLanguage(String language) { + return PARENT_LANGUAGE.get(language); + } + + /// @see #getRootLanguage(String) public static @NotNull String getRootLanguage(Locale locale) { return getRootLanguage(locale.getLanguage()); } @@ -140,54 +173,54 @@ public final class LocaleUtils { /// - If `language` is empty, return `en`; /// - Otherwise, return the `language`. public static @NotNull String getRootLanguage(String language) { - if (language.isEmpty()) return "en"; - if (language.length() <= 2) - return language; - - String iso2 = mapToISO2Language(language); - if (iso2 != null) - return iso2; + language = normalizeLanguage(language); String parent = getParentLanguage(language); return parent != null ? parent : language; } + /// If `language` is a macrolanguage, try to map it to the most commonly used individual language. + /// + /// For example, if `language` is `zh`, this method will return `cmn`. + public static @NotNull String getPreferredLanguage(String language) { + language = normalizeLanguage(language); + return PREFERRED_LANGUAGE.getOrDefault(language, language); + } + /// Get the script of the locale. If the script is empty and the language is Chinese, /// the script will be inferred based on the language, the region and the variant. public static @NotNull String getScript(Locale locale) { if (locale.getScript().isEmpty()) { - if (isEnglish(locale)) { - if ("UD".equals(locale.getCountry())) { - return "Qabs"; - } + if (!locale.getVariant().isEmpty()) { + String script = DEFAULT_SCRIPT.get(locale.getVariant()); + if (script != null) + return script; } - if (isChinese(locale)) { - if (CHINESE_LATN_VARIANTS.contains(locale.getVariant())) - return "Latn"; - if (locale.getLanguage().equals("lzh") || CHINESE_TRADITIONAL_REGIONS.contains(locale.getCountry())) - return "Hant"; - else - return "Hans"; + if ("UD".equals(locale.getCountry())) { + return "Qabs"; } + + String script = DEFAULT_SCRIPT.get(normalizeLanguage(locale.getLanguage())); + if (script != null) + return script; + + if (isChinese(locale)) { + return CHINESE_TRADITIONAL_REGIONS.contains(locale.getCountry()) + ? "Hant" + : "Hans"; + } + + return ""; } return locale.getScript(); } public static @NotNull TextDirection getTextDirection(Locale locale) { - TextDirection direction = rtl.contains(getRootLanguage(locale)) + return RTL_SCRIPTS.contains(getScript(locale)) ? TextDirection.RIGHT_TO_LEFT : TextDirection.LEFT_TO_RIGHT; - - if ("Qabs".equals(getScript(locale))) { - direction = switch (direction) { - case RIGHT_TO_LEFT -> TextDirection.LEFT_TO_RIGHT; - case LEFT_TO_RIGHT -> TextDirection.RIGHT_TO_LEFT; - }; - } - - return direction; } private static final ConcurrentMap> CANDIDATE_LOCALES = new ConcurrentHashMap<>(); @@ -196,13 +229,8 @@ public final class LocaleUtils { return CANDIDATE_LOCALES.computeIfAbsent(locale, LocaleUtils::createCandidateLocaleList); } - // ------------- - private static List createCandidateLocaleList(Locale locale) { - String language = locale.getLanguage(); - if (language.isEmpty()) - return List.of(Locale.ENGLISH, Locale.ROOT); - + String language = getPreferredLanguage(locale.getLanguage()); String script = getScript(locale); String region = locale.getCountry(); List variants = locale.getVariant().isEmpty() @@ -211,18 +239,7 @@ public final class LocaleUtils { ArrayList result = new ArrayList<>(); do { - String currentLanguage; - - if (language.length() <= 2) { - currentLanguage = language; - } else { - String iso2 = mapToISO2Language(language); - currentLanguage = iso2 != null - ? iso2 - : language; - } - - addCandidateLocales(result, currentLanguage, script, region, variants); + addCandidateLocales(result, language, script, region, variants); } while ((language = getParentLanguage(language)) != null); result.add(Locale.ROOT); @@ -367,54 +384,6 @@ public final class LocaleUtils { return Map.of(); } - // --- - - /// Map ISO 639 alpha-3 language codes to ISO 639 alpha-2 language codes. - /// Returns `null` if there is no corresponding ISO 639 alpha-2 language code. - public static @Nullable String mapToISO2Language(String iso3Language) { - return iso3To2.get(iso3Language); - } - - /// If `language` is a sublanguage of a [macrolanguage](https://en.wikipedia.org/wiki/ISO_639_macrolanguage), - /// return the macrolanguage; otherwise, return `null`. - public static @Nullable String getParentLanguage(String language) { - return subLanguageToParent.get(language); - } - - public static boolean isEnglish(Locale locale) { - return "en".equals(getRootLanguage(locale)); - } - - public static final Set CHINESE_TRADITIONAL_REGIONS = Set.of("TW", "HK", "MO"); - public static final Set CHINESE_LATN_VARIANTS = Set.of("pinyin", "wadegile", "tongyong"); - - public static boolean isChinese(Locale locale) { - return "zh".equals(getRootLanguage(locale)); - } - - public static final boolean IS_CHINA_MAINLAND = isChinaMainland(); - - private static boolean isChinaMainland() { - if ("Asia/Shanghai".equals(ZoneId.systemDefault().getId())) - return true; - - // Check if the time zone is UTC+8 - if (ZonedDateTime.now().getOffset().getTotalSeconds() == Duration.ofHours(8).toSeconds()) { - if ("CN".equals(LocaleUtils.SYSTEM_DEFAULT.getCountry())) - return true; - - if (OperatingSystem.CURRENT_OS == OperatingSystem.WINDOWS && NativeUtils.USE_JNA) { - Kernel32 kernel32 = Kernel32.INSTANCE; - - // https://learn.microsoft.com/windows/win32/intl/table-of-geographical-locations - if (kernel32 != null && kernel32.GetUserGeoID(WinConstants.GEOCLASS_NATION) == 45) // China - return true; - } - } - - return false; - } - private LocaleUtils() { } } diff --git a/HMCLCore/src/main/resources/assets/lang/default_script.csv b/HMCLCore/src/main/resources/assets/lang/default_script.csv new file mode 100644 index 000000000..9b90629fa --- /dev/null +++ b/HMCLCore/src/main/resources/assets/lang/default_script.csv @@ -0,0 +1,29 @@ +Arab,ar,fa,ps,ur +Armn,hy +Beng,as,bn +Blis,zbl +Cyrl,ab,be,bg,kk,mk,ru,uk +Deva,hi,mr,ne,kok,mai +Ethi,am,ti +Geor,ka +Grek,el +Gujr,gu +Guru,pa +Hant,lzh +Hebr,he,yi +Jpan,ja +Khmr,km +Knda,kn +Kore,ko +Laoo,lo +Latn,af,ay,bs,ca,ch,cs,cy,da,de,en,eo,es,et,eu,fi,fj,fo,fr,fy,ga,gl,gn,gv,hr,ht,hu,id,is,it,kl,la,lb,ln,lt,lv,mg,mh,ms,mt,na,nb,nd,nl,nn,no,nr,ny,om,pl,pt,qu,rm,rn,ro,rw,sg,sk,sl,sm,so,sq,ss,st,sv,sw,tl,tn,to,tr,ts,ve,vi,xh,zu,dsb,frr,frs,gsw,hsb,men,nds,niu,nso,tem,tkl,tmh,tpi,tvl,tailo,pinyin,hepburn,pehoeji,tongyong,wadegile +Mlym,ml +Mymr,my +Nkoo,nqo +Orya,or +Sinh,si +Taml,ta +Telu,te +Thaa,dv +Thai,th +Tibt,dz diff --git a/HMCLCore/src/main/resources/assets/lang/iso_languages.csv b/HMCLCore/src/main/resources/assets/lang/language_aliases.csv similarity index 99% rename from HMCLCore/src/main/resources/assets/lang/iso_languages.csv rename to HMCLCore/src/main/resources/assets/lang/language_aliases.csv index 732d28d7e..4e828a319 100644 --- a/HMCLCore/src/main/resources/assets/lang/iso_languages.csv +++ b/HMCLCore/src/main/resources/assets/lang/language_aliases.csv @@ -73,7 +73,6 @@ io,ido is,isl it,ita iu,iku -iw,heb ja,jpn ji,yid jv,jav diff --git a/HMCLCore/src/main/resources/assets/lang/rtl.txt b/HMCLCore/src/main/resources/assets/lang/rtl.txt deleted file mode 100644 index cbd40fc72..000000000 --- a/HMCLCore/src/main/resources/assets/lang/rtl.txt +++ /dev/null @@ -1,6 +0,0 @@ -ar -fa -he -ps -ur -yi \ No newline at end of file diff --git a/HMCLCore/src/main/resources/assets/lang/sublanguages.csv b/HMCLCore/src/main/resources/assets/lang/sublanguages.csv index 5c8217c0e..4a4a3e362 100644 --- a/HMCLCore/src/main/resources/assets/lang/sublanguages.csv +++ b/HMCLCore/src/main/resources/assets/lang/sublanguages.csv @@ -1 +1,63 @@ -zh,cdo,cjy,cmn,cnp,cpx,csp,czh,czo,gan,hak,hnm,hsn,luh,lzh,mnp,nan,sjc,wuu,yue \ No newline at end of file +ak,tw,fat +ar,aao,abh,abv,acm,acq,acw,acx,acy,adf,aeb,aec,afb,apc,apd,arb,arq,ars,ary,arz,auz,avl,ayh,ayl,ayn,ayp,pga,shu,ssh +ay,ayc,ayr +az,azb,azj +cr,crj,crk,crl,crm,csw,cwd +et,ekk,vro +fa,pes,prs +ff,ffm,fub,fuc,fue,fuf,fuh,fui,fuq,fuv +gn,gnw,gug,gui,gun,nhd +ik,esi,esk +iu,ike,ikt +kg,kng,kwy,ldi +kr,kby,knc,krt +ku,ckb,kmr,sdh +kv,koi,kpv +lv,ltg,lvs +mg,bhr,bmm,bzc,msh,plt,skg,tdx,tkg,txy,xmv,xmw +mn,khk,mvf +ms,id,bjn,btj,bve,bvu,coa,dup,hji,jak,jax,kvb,kvr,kxd,lce,lcf,liw,max,meo,mfa,mfb,min,mqg,msi,mui,orn,ors,pel,pse,tmw,urk,vkk,vkt,xmm,zlm,zmi,zsm +ne,dty,npi +no,nb,nn +oj,ciw,ojb,ojc,ojg,ojs,ojw,otw +om,gax,gaz,hae,orc +or,ory,spv +ps,pbt,pbu,pst +qu,qub,qud,quf,qug,quh,quk,qul,qup,qur,qus,quw,qux,quy,quz,qva,qvc,qve,qvh,qvi,qvj,qvl,qvm,qvn,qvo,qvp,qvs,qvw,qvz,qwa,qwc,qwh,qws,qxa,qxc,qxh,qxl,qxn,qxo,qxp,qxr,qxt,qxu,qxw +sa,cls,vsn +sc,sdc,sdn,src,sro +sh,bs,hr,sr,cnr +sq,aae,aat,aln,als +sw,swc,swh +uz,uzn,uzs +yi,ydd,yih +za,zch,zeh,zgb,zgm,zgn,zhd,zhn,zlj,zln,zlq,zqe,zyb,zyg,zyj,zyn,zzj +zh,cdo,cjy,cmn,cnp,cpx,csp,czh,czo,gan,hak,hnm,hsn,luh,lzh,mnp,nan,sjc,wuu,yue +bal,bcc,bgn,bgp +bik,bcl,bln,bto,cts,fbl,lbl,rbl,ubl +bnc,ebk,lbk,obk,rbk,vbk +bua,bxm,bxr,bxu +chm,mhr,mrj +del,umu,unm +den,scs,xsl +din,dib,dik,dip,diw,dks +doi,dgo,xnr +gba,bdt,gbp,gbq,gmm,gso,gya +gon,esg,gno,wsg +grb,gbo,gec,grj,grv,gry +hai,hax,hdn +hmn,cqd,hea,hma,hmc,hmd,hme,hmg,hmh,hmi,hmj,hml,hmm,hmp,hmq,hms,hmw,hmy,hmz,hnj,hrm,huj,mmr,muq,mww,sfm +jrb,aju,jye,yhd,yud +kln,enb,eyo,niq,oki,pko,sgc,spy,tec,tuy +kok,gom,knn +kpe,gkp,xpe +lah,hnd,hno,jat,phr,pnb,skr,xhe +luy,bxk,ida,lkb,lko,lks,lri,lrm,lsm,lto,lts,lwg,nle,nyd,rag +man,emk,mku,mlq,mnk,msc,mwk +mwr,dhd,mtr,mve,rwr,swv,wry +raj,bgq,gda,gju,hoj,mup,wbr +rom,rmc,rmf,rml,rmn,rmo,rmw,rmy +syr,aii,cld +tmh,taq,thv,thz,ttq +zap,zaa,zab,zac,zad,zae,zaf,zai,zam,zao,zaq,zar,zas,zat,zav,zaw,zax,zca,zcd,zoo,zpa,zpb,zpc,zpd,zpe,zpf,zpg,zph,zpi,zpj,zpk,zpl,zpm,zpn,zpo,zpp,zpq,zpr,zps,zpt,zpu,zpv,zpw,zpx,zpy,zpz,zsr,zte,ztg,ztl,ztm,ztn,ztp,ztq,zts,ztt,ztu,ztx,zty +zza,diq,kiu diff --git a/HMCLCore/src/test/java/org/jackhuang/hmcl/util/i18n/LocaleUtilsTest.java b/HMCLCore/src/test/java/org/jackhuang/hmcl/util/i18n/LocaleUtilsTest.java index 64bce19f4..6918ed2ab 100644 --- a/HMCLCore/src/test/java/org/jackhuang/hmcl/util/i18n/LocaleUtilsTest.java +++ b/HMCLCore/src/test/java/org/jackhuang/hmcl/util/i18n/LocaleUtilsTest.java @@ -28,7 +28,6 @@ import java.util.List; import java.util.Locale; import java.util.Map; import java.util.Set; -import java.util.stream.Collectors; import static org.junit.jupiter.api.Assertions.*; @@ -41,55 +40,76 @@ public final class LocaleUtilsTest { LocaleUtils.getCandidateLocales(Locale.forLanguageTag(languageTag)) .stream() .map(Locale::toLanguageTag) - .collect(Collectors.toList())); + .toList()); + } + + private static void assertCandidateLocalesEquals(String l1, String l2) { + assertEquals( + LocaleUtils.getCandidateLocales(Locale.forLanguageTag(l1)), + LocaleUtils.getCandidateLocales(Locale.forLanguageTag(l2)) + ); } @Test public void testGetCandidateLocales() { - assertCandidateLocales("zh", List.of("zh-Hans", "zh-CN", "zh", "und")); - assertCandidateLocales("zh-CN", List.of("zh-Hans-CN", "zh-Hans", "zh-CN", "zh", "und")); - assertCandidateLocales("zh-SG", List.of("zh-Hans-SG", "zh-Hans", "zh-SG", "zh-CN", "zh", "und")); - assertCandidateLocales("zh-MY", List.of("zh-Hans-MY", "zh-Hans", "zh-MY", "zh-CN", "zh", "und")); - assertCandidateLocales("zh-US", List.of("zh-Hans-US", "zh-Hans", "zh-US", "zh-CN", "zh", "und")); - assertCandidateLocales("zh-TW", List.of("zh-Hant-TW", "zh-Hant", "zh-TW", "zh", "zh-CN", "und")); - assertCandidateLocales("zh-HK", List.of("zh-Hant-HK", "zh-Hant", "zh-HK", "zh-TW", "zh", "zh-CN", "und")); - assertCandidateLocales("zh-MO", List.of("zh-Hant-MO", "zh-Hant", "zh-MO", "zh-TW", "zh", "zh-CN", "und")); - assertCandidateLocales("zh-Hans", List.of("zh-Hans", "zh-CN", "zh", "und")); - assertCandidateLocales("zh-Hant", List.of("zh-Hant", "zh-TW", "zh", "zh-CN", "und")); - assertCandidateLocales("zh-Hans-US", List.of("zh-Hans-US", "zh-Hans", "zh-US", "zh-CN", "zh", "und")); - assertCandidateLocales("zh-Hant-CN", List.of("zh-Hant-CN", "zh-Hant", "zh-CN", "zh-TW", "zh", "und")); - assertCandidateLocales("zh-Hans-TW", List.of("zh-Hans-TW", "zh-Hans", "zh-TW", "zh-CN", "zh", "und")); - assertCandidateLocales("zh-Latn", List.of("zh-Latn", "zh", "zh-CN", "und")); - assertCandidateLocales("zh-Latn-CN", List.of("zh-Latn-CN", "zh-Latn", "zh-CN", "zh", "und")); - assertCandidateLocales("zh-pinyin", List.of("zh-Latn-pinyin", "zh-Latn", "zh-pinyin", "zh", "zh-CN", "und")); - assertCandidateLocales("zho", List.of("zh-Hans", "zh-CN", "zh", "und")); + // English + + assertCandidateLocales("en", List.of("en-Latn", "en", "und")); + assertCandidateLocales("en-US", List.of("en-Latn-US", "en-Latn", "en-US", "en", "und")); + assertCandidateLocalesEquals("en", "eng"); + assertCandidateLocalesEquals("en-US", "eng-US"); + assertCandidateLocalesEquals("und", "en"); + + // Spanish + + assertCandidateLocales("es", List.of("es-Latn", "es", "und")); + assertCandidateLocalesEquals("es", "spa"); + + // Japanese + + assertCandidateLocales("ja", List.of("ja-Jpan", "ja", "und")); + assertCandidateLocales("ja-JP", List.of("ja-Jpan-JP", "ja-Jpan", "ja-JP", "ja", "und")); + assertCandidateLocalesEquals("ja", "jpn"); + assertCandidateLocalesEquals("ja-JP", "jpn-JP"); + + // Russian + + assertCandidateLocales("ru", List.of("ru-Cyrl", "ru", "und")); + assertCandidateLocalesEquals("ru", "rus"); + + // Ukrainian + + assertCandidateLocales("uk", List.of("uk-Cyrl", "uk", "und")); + assertCandidateLocalesEquals("uk", "ukr"); + + // Chinese + + assertCandidateLocales("zh", List.of("cmn-Hans", "cmn", "zh-Hans", "zh-CN", "zh", "und")); + assertCandidateLocales("zh-CN", List.of("cmn-Hans-CN", "cmn-Hans", "cmn-CN", "cmn", "zh-Hans-CN", "zh-Hans", "zh-CN", "zh", "und")); + assertCandidateLocales("zh-SG", List.of("cmn-Hans-SG", "cmn-Hans", "cmn-SG", "cmn", "zh-Hans-SG", "zh-Hans", "zh-SG", "zh-CN", "zh", "und")); + + assertCandidateLocales("zh-TW", List.of("cmn-Hant-TW", "cmn-Hant", "cmn-TW", "cmn", "zh-Hant-TW", "zh-Hant", "zh-TW", "zh", "zh-CN", "und")); + assertCandidateLocales("zh-HK", List.of("cmn-Hant-HK", "cmn-Hant", "cmn-HK", "cmn", "zh-Hant-HK", "zh-Hant", "zh-HK", "zh-TW", "zh", "zh-CN", "und")); + assertCandidateLocales("zh-Hant-CN", List.of("cmn-Hant-CN", "cmn-Hant", "cmn-CN", "cmn", "zh-Hant-CN", "zh-Hant", "zh-CN", "zh-TW", "zh", "und")); + + assertCandidateLocales("zh-pinyin", List.of("cmn-Latn-pinyin", "cmn-Latn", "cmn-pinyin", "cmn", "zh-Latn-pinyin", "zh-Latn", "zh-pinyin", "zh", "zh-CN", "und")); + assertCandidateLocales("lzh", List.of("lzh-Hant", "lzh", "zh-Hant", "zh-TW", "zh", "zh-CN", "und")); - assertCandidateLocales("lzh-Hant", List.of("lzh-Hant", "lzh", "zh-Hant", "zh-TW", "zh", "zh-CN", "und")); - assertCandidateLocales("lzh-Hans", List.of("lzh-Hans", "lzh", "zh-Hans", "zh-CN", "zh", "und")); - assertCandidateLocales("cmn", List.of("cmn-Hans", "cmn", "zh-Hans", "zh-CN", "zh", "und")); - assertCandidateLocales("cmn-Hans", List.of("cmn-Hans", "cmn", "zh-Hans", "zh-CN", "zh", "und")); assertCandidateLocales("yue", List.of("yue-Hans", "yue", "zh-Hans", "zh-CN", "zh", "und")); - assertCandidateLocales("ja", List.of("ja", "und")); - assertCandidateLocales("jpn", List.of("ja", "und")); - assertCandidateLocales("ja-JP", List.of("ja-JP", "ja", "und")); - assertCandidateLocales("jpn-JP", List.of("ja-JP", "ja", "und")); - - assertCandidateLocales("en", List.of("en", "und")); - assertCandidateLocales("eng", List.of("en", "und")); - assertCandidateLocales("en-US", List.of("en-US", "en", "und")); - assertCandidateLocales("eng-US", List.of("en-US", "en", "und")); - - assertCandidateLocales("es", List.of("es", "und")); - assertCandidateLocales("spa", List.of("es", "und")); - - assertCandidateLocales("ru", List.of("ru", "und")); - assertCandidateLocales("rus", List.of("ru", "und")); - - assertCandidateLocales("uk", List.of("uk", "und")); - assertCandidateLocales("ukr", List.of("uk", "und")); - - assertCandidateLocales("und", List.of("en", "und")); + assertCandidateLocalesEquals("zh", "cmn-Hans"); + assertCandidateLocalesEquals("zh-CN", "cmn-Hans-CN"); + assertCandidateLocalesEquals("zh-SG", "cmn-Hans-SG"); + assertCandidateLocalesEquals("zh-MY", "cmn-Hans-MY"); + assertCandidateLocalesEquals("zh-TW", "cmn-Hant-TW"); + assertCandidateLocalesEquals("zh-HK", "cmn-Hant-HK"); + assertCandidateLocalesEquals("zh-Hans", "cmn-Hans"); + assertCandidateLocalesEquals("zh-Hant", "cmn-Hant"); + assertCandidateLocalesEquals("zh-Hant-CN", "cmn-Hant-CN"); + assertCandidateLocalesEquals("zh-Hant-SG", "cmn-Hant-SG"); + assertCandidateLocalesEquals("zh-Latn", "cmn-Latn"); + assertCandidateLocalesEquals("zh-pinyin", "cmn-Latn-pinyin"); + assertCandidateLocalesEquals("zho", "zh"); } @Test @@ -134,7 +154,9 @@ public final class LocaleUtilsTest { assertEquals("Hant", LocaleUtils.getScript(Locale.forLanguageTag("lzh-Hant"))); assertEquals("Hant", LocaleUtils.getScript(Locale.forLanguageTag("lzh-CN"))); + assertEquals("Latn", LocaleUtils.getScript(Locale.forLanguageTag("en"))); assertEquals("Latn", LocaleUtils.getScript(Locale.forLanguageTag("zh-pinyin"))); + assertEquals("Latn", LocaleUtils.getScript(Locale.forLanguageTag("ja-hepburn"))); } @Test @@ -192,20 +214,17 @@ public final class LocaleUtilsTest { } @Test - public void testMapToISO2Language() { - assertEquals("en", LocaleUtils.mapToISO2Language("eng")); - assertEquals("es", LocaleUtils.mapToISO2Language("spa")); - assertEquals("ja", LocaleUtils.mapToISO2Language("jpn")); - assertEquals("ru", LocaleUtils.mapToISO2Language("rus")); - assertEquals("uk", LocaleUtils.mapToISO2Language("ukr")); - assertEquals("zh", LocaleUtils.mapToISO2Language("zho")); - assertEquals("zu", LocaleUtils.mapToISO2Language("zul")); - - assertNull(LocaleUtils.mapToISO2Language(null)); - assertNull(LocaleUtils.mapToISO2Language("")); - assertNull(LocaleUtils.mapToISO2Language("cmn")); - assertNull(LocaleUtils.mapToISO2Language("lzh")); - assertNull(LocaleUtils.mapToISO2Language("tlh")); + public void testNormalizeLanguage() { + assertEquals("en", LocaleUtils.normalizeLanguage("")); + assertEquals("en", LocaleUtils.normalizeLanguage("eng")); + assertEquals("es", LocaleUtils.normalizeLanguage("spa")); + assertEquals("ja", LocaleUtils.normalizeLanguage("jpn")); + assertEquals("ru", LocaleUtils.normalizeLanguage("rus")); + assertEquals("uk", LocaleUtils.normalizeLanguage("ukr")); + assertEquals("zh", LocaleUtils.normalizeLanguage("zho")); + assertEquals("zu", LocaleUtils.normalizeLanguage("zul")); + assertEquals("en", LocaleUtils.normalizeLanguage("")); + assertEquals("cmn", LocaleUtils.normalizeLanguage("cmn")); } @Test @@ -228,10 +247,11 @@ public final class LocaleUtilsTest { assertEquals(TextDirection.LEFT_TO_RIGHT, LocaleUtils.getTextDirection(Locale.forLanguageTag("zh"))); assertEquals(TextDirection.LEFT_TO_RIGHT, LocaleUtils.getTextDirection(Locale.forLanguageTag("zh-Hans"))); assertEquals(TextDirection.LEFT_TO_RIGHT, LocaleUtils.getTextDirection(Locale.forLanguageTag("zh-CN"))); - assertEquals(TextDirection.LEFT_TO_RIGHT, LocaleUtils.getTextDirection(Locale.forLanguageTag("ar-Qabs"))); assertEquals(TextDirection.RIGHT_TO_LEFT, LocaleUtils.getTextDirection(Locale.forLanguageTag("en-Qabs"))); assertEquals(TextDirection.RIGHT_TO_LEFT, LocaleUtils.getTextDirection(Locale.forLanguageTag("ar"))); assertEquals(TextDirection.RIGHT_TO_LEFT, LocaleUtils.getTextDirection(Locale.forLanguageTag("ara"))); + assertEquals(TextDirection.RIGHT_TO_LEFT, LocaleUtils.getTextDirection(Locale.forLanguageTag("he"))); + assertEquals(TextDirection.RIGHT_TO_LEFT, LocaleUtils.getTextDirection(Locale.forLanguageTag("heb"))); } } diff --git a/build.gradle.kts b/build.gradle.kts index b3b3fe7ca..2c72a13ce 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -1,5 +1,6 @@ import org.jackhuang.hmcl.gradle.ci.CheckUpdate import org.jackhuang.hmcl.gradle.docs.UpdateDocuments +import org.jackhuang.hmcl.gradle.l10n.ParseLanguageSubtagRegistry plugins { id("checkstyle") @@ -76,6 +77,12 @@ org.jackhuang.hmcl.gradle.javafx.JavaFXUtils.register(rootProject) defaultTasks("clean", "build") +tasks.register("parseLanguageSubtagRegistry") { + languageSubtagRegistryFile.set(layout.projectDirectory.file("language-subtag-registry")) + + sublanguagesFile.set(layout.projectDirectory.file("HMCLCore/src/main/resources/assets/lang/sublanguages.csv")) + defaultScriptFile.set(layout.projectDirectory.file("HMCLCore/src/main/resources/assets/lang/default_script.csv")) +} tasks.register("updateDocuments") { documentsDir.set(layout.projectDirectory.dir("docs")) diff --git a/buildSrc/src/main/java/org/jackhuang/hmcl/gradle/l10n/LocalizationUtils.java b/buildSrc/src/main/java/org/jackhuang/hmcl/gradle/l10n/LocalizationUtils.java index 805c5f167..f0f4da4b8 100644 --- a/buildSrc/src/main/java/org/jackhuang/hmcl/gradle/l10n/LocalizationUtils.java +++ b/buildSrc/src/main/java/org/jackhuang/hmcl/gradle/l10n/LocalizationUtils.java @@ -19,36 +19,42 @@ package org.jackhuang.hmcl.gradle.l10n; import org.gradle.api.GradleException; -import java.io.IOException; import java.io.InputStream; +import java.nio.charset.StandardCharsets; import java.util.*; final class LocalizationUtils { - public static final Map subLanguageToParent; + public static final Map subLanguageToParent = loadCSV("sublanguages.csv"); - static { - InputStream input = LocalizationUtils.class.getResourceAsStream("sublanguages.csv"); - if (input == null) - throw new GradleException("Missing sublanguages.csv file"); - - Map map = new HashMap<>(); - try (input) { - new String(input.readAllBytes()).lines() - .filter(line -> !line.startsWith("#") && !line.isBlank()) - .forEach(line -> { - String[] languages = line.split(","); - if (languages.length < 2) - throw new GradleException("Invalid line in sublanguages.csv: " + line); - - String parent = languages[0]; - for (int i = 1; i < languages.length; i++) { - map.put(languages[i], parent); - } - }); - } catch (IOException e) { - throw new GradleException("Failed to read sublanguages.csv", e); + private static Map loadCSV(String fileName) { + InputStream resource = LocalizationUtils.class.getResourceAsStream(fileName); + if (resource == null) { + throw new GradleException("Resource not found: " + fileName); } - subLanguageToParent = Collections.unmodifiableMap(map); + + HashMap result = new HashMap<>(); + try (resource) { + new String(resource.readAllBytes(), StandardCharsets.UTF_8).lines().forEach(line -> { + if (line.startsWith("#") || line.isBlank()) + return; + + String[] items = line.split(","); + if (items.length < 2) { + throw new GradleException("Invalid line in sublanguages.csv: " + line); + } + + String parent = items[0]; + for (int i = 1; i < items.length; i++) { + result.put(items[i], parent); + } + }); + } catch (RuntimeException | Error e) { + throw e; + } catch (Throwable e) { + throw new GradleException("Failed to load " + fileName, e); + } + + return Map.copyOf(result); } private static List resolveLanguage(String language) { diff --git a/buildSrc/src/main/java/org/jackhuang/hmcl/gradle/l10n/ParseLanguageSubtagRegistry.java b/buildSrc/src/main/java/org/jackhuang/hmcl/gradle/l10n/ParseLanguageSubtagRegistry.java new file mode 100644 index 000000000..75ada0854 --- /dev/null +++ b/buildSrc/src/main/java/org/jackhuang/hmcl/gradle/l10n/ParseLanguageSubtagRegistry.java @@ -0,0 +1,254 @@ +/* + * Hello Minecraft! Launcher + * Copyright (C) 2025 huangyuhui and contributors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package org.jackhuang.hmcl.gradle.l10n; + +import org.gradle.api.DefaultTask; +import org.gradle.api.GradleException; +import org.gradle.api.file.RegularFileProperty; +import org.gradle.api.tasks.InputFile; +import org.gradle.api.tasks.OutputFile; +import org.gradle.api.tasks.TaskAction; +import org.jetbrains.annotations.NotNull; +import org.jetbrains.annotations.Nullable; + +import java.io.BufferedReader; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.StandardOpenOption; +import java.util.*; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/// @author Glavo +/// @see [language-subtag-registry](https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry) +public abstract class ParseLanguageSubtagRegistry extends DefaultTask { + + @InputFile + public abstract RegularFileProperty getLanguageSubtagRegistryFile(); + + @OutputFile + public abstract RegularFileProperty getSublanguagesFile(); + + /// CSV file storing the mapping from subtag to their default scripts. + @OutputFile + public abstract RegularFileProperty getDefaultScriptFile(); + + @TaskAction + public void run() throws IOException { + List items; + + try (var reader = Files.newBufferedReader(getLanguageSubtagRegistryFile().getAsFile().get().toPath())) { + var builder = new ItemsBuilder(); + builder.parse(reader); + items = builder.items; + } + + MultiMap scriptToSubtag = new MultiMap(); + MultiMap languageToSub = new MultiMap(); + + // Classical Chinese should use Traditional Chinese characters by default + scriptToSubtag.add("Hant", "lzh"); + + for (Item item : items) { + String type = item.firstValueOrThrow("Type"); + if (type.equals("grandfathered") || type.equals("redundant") + || !item.allValues("Deprecated").isEmpty()) + continue; + + String subtag = item.firstValueOrThrow("Subtag"); + + mainSwitch: + switch (type) { + case "language", "extlang" -> { + item.firstValue("Macrolanguage") + .ifPresent(macroLang -> languageToSub.add(macroLang, subtag)); + + item.firstValue("Suppress-Script") + .ifPresent(script -> scriptToSubtag.add(script, subtag)); + } + case "variant" -> { + List prefixes = item.allValues("Prefix"); + String defaultScript = null; + for (String prefix : prefixes) { + String script = Locale.forLanguageTag(prefix).getScript(); + if (script.isEmpty()) { + break mainSwitch; + } + + if (defaultScript == null) { + defaultScript = script; + } else { + if (!defaultScript.equals(script)) { + break mainSwitch; + } + } + } + + if (defaultScript != null) { + scriptToSubtag.add(defaultScript, subtag); + } + } + case "region", "script" -> { + // ignored + } + default -> throw new GradleException(String.format("Unknown subtag type: %s", type)); + } + } + + languageToSub.saveToCSV(getSublanguagesFile()); + scriptToSubtag.saveToCSV(getDefaultScriptFile()); + } + + private static final class MultiMap { + private final TreeMap> allValues = new TreeMap<>(TAG_COMPARATOR); + + void add(String key, String value) { + allValues.computeIfAbsent(key, k -> new TreeSet<>(TAG_COMPARATOR)).add(value); + } + + void saveToCSV(RegularFileProperty csvFile) throws IOException { + try (var writer = Files.newBufferedWriter(csvFile.getAsFile().get().toPath(), + StandardOpenOption.CREATE, + StandardOpenOption.TRUNCATE_EXISTING)) { + + for (Map.Entry> entry : allValues.entrySet()) { + String key = entry.getKey(); + Set values = entry.getValue(); + + writer.write(key); + + for (String value : values) { + writer.write(','); + writer.write(value); + } + + writer.newLine(); + } + } + } + } + + private static final class Item { + final Map> values = new LinkedHashMap<>(); + + public @NotNull List allValues(String name) { + return values.getOrDefault(name, List.of()); + } + + public @NotNull Optional firstValue(String name) { + return Optional.ofNullable(values.get(name)).map(it -> it.get(0)); + } + + public @Nullable String firstValueOrNull(String name) { + return firstValue(name).orElse(null); + } + + public @NotNull String firstValueOrThrow(String name) { + return firstValue(name).orElseThrow(() -> new GradleException("No value found for " + name + " in " + this)); + } + + public void put(String name, String value) { + values.computeIfAbsent(name, ignored -> new ArrayList<>(1)).add(value); + } + + @Override + public String toString() { + StringJoiner joiner = new StringJoiner("\n"); + + values.forEach((name, values) -> { + for (String value : values) { + joiner.add(name + ": " + value); + } + }); + + return joiner.toString(); + } + } + + private static final class ItemsBuilder { + private final List items = new ArrayList<>(1024); + private Item current = new Item(); + private String currentName = null; + private String currentValue = null; + + private void updateCurrent() { + if (currentName != null) { + current.put(currentName, currentValue); + currentName = null; + currentValue = null; + } + } + + private void updateItems() throws IOException { + updateCurrent(); + + if (current.values.isEmpty()) + return; + + if (current.firstValue("Type").isEmpty()) { + if (current.firstValue("File-Date").isPresent()) { + current.values.clear(); + return; + } else { + throw new GradleException("Invalid item: " + current); + } + } + + items.add(current); + current = new Item(); + } + + void parse(BufferedReader reader) throws IOException { + Pattern linePattern = Pattern.compile("^(?[A-Za-z\\-]+): (?.*)$"); + + String line; + while ((line = reader.readLine()) != null) { + if (line.isBlank()) { + continue; + } else if (line.equals("%%")) { + updateItems(); + } else if (line.startsWith(" ")) { + if (currentValue != null) { + currentValue = currentValue + " " + line; + } else { + throw new GradleException("Invalid line: " + line); + } + } else { + updateCurrent(); + + Matcher matcher = linePattern.matcher(line); + if (matcher.matches()) { + currentName = matcher.group("name"); + currentValue = matcher.group("value"); + } else { + throw new GradleException("Invalid line: " + line); + } + } + } + + updateItems(); + } + } + + private static final Comparator TAG_COMPARATOR = (lang1, lang2) -> { + if (lang1.length() != lang2.length()) + return Integer.compare(lang1.length(), lang2.length()); + else + return lang1.compareTo(lang2); + }; +} diff --git a/docs/Localization_zh.md b/docs/Localization_zh.md index 1df599346..f57d23c85 100644 --- a/docs/Localization_zh.md +++ b/docs/Localization_zh.md @@ -27,19 +27,17 @@ HMCL 为多种语言提供本地化支持。 HMCL 使用符合 IETF BCP 47 规范的语言标签。 -对于 ISO 639 标准中定义的语言,如果同时存在两字母语言代码和三字母语言代码,那么应当优先选择两字母语言代码。 +在选择语言标签时,我们会遵循以下原则: -例如,对于英语,我们使用 `en` 而不是 `eng` 作为语言代码。 +1. 对于 ISO 639 标准中定义的语言,如果已经在 [IANA 语言子标签注册表](https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry)中注册,我们总是使用经过注册的标签。 -对于 Minecraft 所定义的非标准语言,应当优先使用语言文件的 `language.code` 中定义的代码,而非游戏语言文件的名称 -(但对于存在两字母代码的语言,应当将三字母语言代码替换为对应的两字母语言代码)。 -这是因为 Minecraft 有时候会用现实中实际存在的国家/地区代码来表示虚构语言 (比如说海盗英语的语言文件为 `en_pt`,但 `PT` 其实是葡萄牙的国家代码)。 + 例如,对于英语,我们使用 `en` 而不是 `eng` 作为语言代码。 -例如,对于颠倒的英语,我们使用 `en-Qabs` 作为语言代码,而不是 `en-UD`。 +2. 对于 Minecraft 所定义的非标准语言,应当优先使用语言文件的 `language.code` 中定义的代码,而非游戏语言文件的名称。 -此外,语言代码中应当尽可能选择地区中立的语言标签。 + 这是因为 Minecraft 有时候会用现实中实际存在的国家/地区代码来表示虚构语言 (比如说海盗英语的语言文件为 `en_pt`,但 `PT` 其实是葡萄牙的国家代码)。 -例如,对于简体中文和繁体中文,我们使用 `zh-Hans`和 `zh-Hant` 作为语言代码,而不是 `zh-CN` 和 `zh-TW`。 + 例如,对于颠倒的英语,我们使用 `en-Qabs` 作为语言代码,而不是 `en-UD`。 @@ -106,7 +104,7 @@ HMCL 的绝大多数文本都位于这个文件中,翻译此文件就能翻译 这是一个 Java Properties 文件,格式非常简单。 在翻译前请先阅读该格式的介绍: [Properties 文件](https://en.wikipedia.org/wiki/.properties)。 -作为翻译的第一步,请从[这张表格](https://en.wikipedia.org/wiki/List_of_ISO_639_language_codes)中查询这个语言对应的两字母或三字母语言标签。 +作为翻译的第一步,请从[这张表格](https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry)中查询这个语言对应的两字母或三字母语言标签。 例如,英语的语言标签为 `en`。 在确定了语言标签后,请在 [`I18N.properties` 文件旁](../HMCL/src/main/resources/assets/lang)创建 `I18N_<语言标签>.properites` (例如 `I18N_en.properties`) 文件。 @@ -169,12 +167,62 @@ HMCL 的维护者会替你完成其他步骤。 对于某个语言下的缺失的资源,HMCL 支持一套资源回退机制,会根据不同的语言标签推导出一个搜索列表, 根据该列表依次搜索资源。 -例如,如果当前环境的语言标签为 `en-US`,那么 HMCL 会根据以下列表的顺序搜索对应的本地化资源: +在搜索前,我们会先通过以下步骤对语言标签进行细化推导。 -1. `en-US` +1. 归一化语言代码 + + 如果当前语言标签中的语言代码子标签未在 IANA 语言子标签注册表中进行注册,HMCL 会先尝试将其映射为注册表中已注册的标签。 + + 例如,HMCL会将语言代码 `eng` 替换为 `en`。 + +2. 映射宏语言至子语言 + + 如果当前语言代码是一个 [ISO 639 宏语言](https://en.wikipedia.org/wiki/ISO_639_macrolanguage), + 且该宏语言通常指代某个个体语言,HMCL 会将其替换为该个体语言。 + + 例如 `zh` (中文) 通常实际指代 `cmn` (官话),所以我们会将语言代码 `zh` 替换为 `cmn`。 + +3. 推导拼写脚本 + + 如果当前语言标签中未指定拼写脚本,HMCL 会依次根据以下规则尝试推导拼写脚本: + + 1. 如果当前语言标签指定了语言变体,该语言变体已在 IANA 语言子标签注册表中, + 且注册表中其所有 `Prefix` 都包含相同的拼写脚本,则将当前拼写脚本指定为该脚本。 + + 例如,如果当前语言变体为 `pinyin` (汉语拼音),则当前拼写脚本会被指定为 `Latn` (拉丁文)。 + + 2. 如果当前语言代码在 IANA 语言子标签注册表中被指定了 `Suppress-Script`,则将当前拼写脚本指定为该脚本。 + + 例如,如果当前语言代码为 `en` (英语),则当前拼写脚本会被指定为 `Latn` (拉丁文); + 如果当前语言代码为 `ru` (俄语),则当前拼写脚本会被指定为 `Cyrl` (西里尔文)。 + + 3. 如果当前语言代码是 `lzh` (文言),则将当前拼写脚本指定为 `Hant` (繁体汉字)。 + + 4. 如果当前语言代码是 `zh` 或 `zh` 的子语言,则检查当前国家/地区代码是否为 `TW`、`HK`、`MO` 之一。 + 如果结果为真,则将当前拼写脚本指定为 `Hant` (繁体汉字);否则将当前拼写脚本指定为 `Hans` (简体汉字)。 + +在对语言代码细化推导完成后,HMCL 会开始根据此语言标签推导出一个语言标签列表。 + +例如,对于语言标签 `en-US`,HMCL 会将其细化为 `en-Latn-US`,并据此推导出以下搜索列表: + +1. `en-Latn-US` +2. `en-Latn` +3. `en-US` 2. `en` 3. `und` +对于语言标签 `zh-CN`,HMCL 会将其细化为 `cmn-Hans-CN`,并据此推导出以下搜索列表: + +1. `cmn-Hans-CN` +2. `cmn-Hans` +3. `cmn-CN` +4. `cmn` +5. `zh-Hans-CN` +6. `zh-Hans` +7. `zh-CN` +8. `zh` +9. `und` + 对于能够混合的资源 (例如 `.properties` 文件),HMCL 会根据此列表的优先级混合资源; 对于难以混合的资源 (例如字体文件),HMCL 会根据此列表加载找到的最高优先级的资源。 @@ -182,35 +230,33 @@ HMCL 的维护者会替你完成其他步骤。 例如,如果当前环境的语言标签为 `eng-US`,那么 HMCL 会将其映射至 `en-US` 后再根据上述规则搜索本地化资源。 -如果当前语言是一个 [ISO 639 宏语言](https://en.wikipedia.org/wiki/ISO_639_macrolanguage)的子语言,那么 HMCL 也会搜索宏语言对应的资源。 - ### 对于中文的额外规则 -对于中文 (以及其子语言标签,例如文言文 (`lzh`)、普通话 (`cmn`)、粤语 (`yue`) 等等),HMCL 有着额外的支持。 - -如果当前环境的语言为中文 (及其子语言),且未指定书写脚本,那么 HMCL 会根据语言和地区标签推导出默认的书写脚本。 - -对于语言为 `lzh` 或地区为 `TW`、`HK`、`MO` 的情况,默认书写脚本为繁体中文 (`Hant`); -而对于其他语言和地区,默认书写脚本为简体中文 (`Hans`)。 - -此外,HMCL 会将 `zh-CN` 加入所有中文环境的搜索列表中,将 `zh-TW` 加入所有繁体中文环境的搜索列表中, -从而适应更多场景。 +HMCL 总是会将 `zh-CN` 加入所有中文环境的搜索列表中,将 `zh-TW` 加入所有繁体中文环境的搜索列表中。 以下是几个常见中文环境对应的本地化资源搜索列表。 - `zh-CN`: - 1. `zh-Hans-CN` - 2. `zh-Hans` - 3. `zh-CN` - 4. `zh` - 5. `und` + 1. `cmn-Hans-CN` + 2. `cmn-Hans` + 3. `cmn-CN` + 4. `cmn` + 5. `zh-Hans-CN` + 6. `zh-Hans` + 7. `zh-CN` + 8. `zh` + 9. `und` - `zh-SG`: - 1. `zh-Hans-SG` - 2. `zh-Hans` - 3. `zh-SG` - 4. `zh-CN` - 5. `zh` - 6. `und` + 1. `cmn-Hans-SG` + 2. `cmn-Hans` + 3. `cmn-SG` + 4. `cmn` + 5. `zh-Hans-SG` + 6. `zh-Hans` + 7. `zh-SG` + 8. `zh-CN` + 9. `zh` + 10. `und` - `zh-TW`: 1. `zh-Hant-TW` 2. `zh-Hant` @@ -219,19 +265,25 @@ HMCL 的维护者会替你完成其他步骤。 5. `zh-CN` 6. `und` - `zh-HK`: - 1. `zh-Hant-HK` - 2. `zh-Hant` - 3. `zh-HK` - 4. `zh-TW` - 5. `zh` - 6. `zh-CN` - 7. `und` + 1. `cmn-Hant-HK` + 2. `cmn-Hant` + 3. `cmn-HK` + 4. `cmn` + 5. `zh-Hant-HK` + 6. `zh-Hant` + 7. `zh-HK` + 8. `zh-TW` + 9. `zh` + 10. `zh-CN` + 11. `und` - `lzh`: 1. `lzh-Hant` 2. `lzh` 3. `zh-Hant` - 4. `zh` - 5. `und` + 4. `zh-TW` + 5. `zh` + 6. `zh-CN` + 7. `und` ## 自动同步文档内容