2022-12-29 05:22:40 +01:00
|
|
|
|
#define LOCALIZATION_CPP
|
|
|
|
|
#include "Localization.h"
|
|
|
|
|
#include "LocalizationStorage.h"
|
|
|
|
|
|
2022-12-31 04:37:45 +01:00
|
|
|
|
#include "Alloc.h"
|
2022-12-29 05:22:40 +01:00
|
|
|
|
#include "Game.h"
|
2023-02-23 04:14:38 +01:00
|
|
|
|
#include "UTF8.h"
|
2022-12-29 05:22:40 +01:00
|
|
|
|
#include "UtilityClass.h"
|
|
|
|
|
#include "VFormat.h"
|
|
|
|
|
|
|
|
|
|
namespace loc
|
|
|
|
|
{
|
|
|
|
|
|
2024-02-08 03:35:05 +01:00
|
|
|
|
int lang_set = 0;
|
Fix "no language files found" title screen bug
If you've never set a language before (<lang_set> is not 1), then the
language screen will show up before the title screen. Selecting the
language will then make the title screen show up.
If no language files are present, the old logic for handling this was
to simply show the language screen at startup anyway, and let it
display the error message that language files are missing, as a warning
that the game is not packaged correctly. However, this logic has two
flaws:
- If the user has ever had language files and set a language before
(in a VVVVVV on that computer), the warning element is gone because
the language screen is not shown in that case - the game is simply in
English
- If the user has never set a language before, and then goes to the
language screen later via the menu, they will be sent to the title
screen, even if they were in-game. The main menu will also be broken.
The new way is to not show the language screen at startup if language
files are missing, and to change the logic so that you will only be
sent to the title screen if you actually haven't seen the title screen
yet.
I will also add a proper warning that fonts or language files are
missing by adding a message in the bottom left corner (in place of the
MMMMMM installed message).
2023-08-29 23:03:08 +02:00
|
|
|
|
bool pre_title_lang_menu = false;
|
|
|
|
|
|
2022-12-29 05:22:40 +01:00
|
|
|
|
std::string lang = "en";
|
|
|
|
|
std::string lang_custom = "";
|
2023-10-02 02:44:13 +02:00
|
|
|
|
bool english_sprites = false;
|
2023-01-21 19:06:30 +01:00
|
|
|
|
std::string new_level_font = "";
|
2022-12-29 05:22:40 +01:00
|
|
|
|
LangMeta langmeta;
|
|
|
|
|
|
|
|
|
|
// language screen list
|
|
|
|
|
std::vector<LangMeta> languagelist;
|
|
|
|
|
int languagelist_curlang;
|
|
|
|
|
bool show_translator_menu = false;
|
|
|
|
|
size_t limitscheck_current_overflow;
|
2022-12-24 04:16:56 +01:00
|
|
|
|
std::vector<std::string> testable_script_ids;
|
2022-12-29 05:22:40 +01:00
|
|
|
|
|
|
|
|
|
int n_untranslated_roomnames = 0;
|
|
|
|
|
int n_unexplained_roomnames = 0;
|
|
|
|
|
int n_untranslated_roomnames_custom = 0;
|
|
|
|
|
int n_unexplained_roomnames_custom = 0;
|
2022-11-27 20:06:08 +01:00
|
|
|
|
int n_untranslated_roomnames_area[9];
|
2022-12-29 05:22:40 +01:00
|
|
|
|
|
|
|
|
|
int n_untranslated[COUNT_UNTRANSLATED_INDEX] = {0};
|
|
|
|
|
|
|
|
|
|
const LangMeta* get_langmeta(void)
|
|
|
|
|
{
|
|
|
|
|
if (game.currentmenuname == Menu::language && (unsigned)game.currentmenuoption < languagelist.size())
|
|
|
|
|
{
|
|
|
|
|
return &languagelist[game.currentmenuoption];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return &langmeta;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const char* gettext(const char* eng)
|
|
|
|
|
{
|
|
|
|
|
if (lang == "en")
|
|
|
|
|
{
|
|
|
|
|
return eng;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return map_lookup_text(map_translation, eng, eng);
|
|
|
|
|
}
|
|
|
|
|
|
Add support for string cases in strings.xml (gendered Rescued/Missing)
I wanted to not complicate the system with different string cases (like
cgettext) if possible, and I have been able to keep the main strings a
simple English=Translation mapping thus far, but apparently strings
like "Rescued!" (which are one string in English), have to be
translated for the correct gender in some languages. So this was a good
time to add support for string cases anyway.
It's a number that can be given to a string to specify the specific
case it's used, to disambiguate identical English keys. In the case of
"Rescued!" and "Missing...", male versions of the string are case 1,
female versions are case 2, and Viridian being missing is case 3. Of
course, if a language doesn't need to use different variants, it can
simply fill in the same string for the different cases.
If any other string needs to switch to different cases: distinguish
them in the English strings.xml with the case="N" attribute (N=1 and
higher), sync language files from the translator menu (existing
translations for the uncased string will simply be copied to all cases)
and change loc::gettext("...") to loc::gettext_case("...", 1),
loc::gettext_case("...", 2), etc.
2022-12-01 01:27:30 +01:00
|
|
|
|
const char* gettext_case(const char* eng, char textcase)
|
|
|
|
|
{
|
|
|
|
|
if (lang == "en")
|
|
|
|
|
{
|
|
|
|
|
return eng;
|
|
|
|
|
}
|
|
|
|
|
if (textcase == 0)
|
|
|
|
|
{
|
|
|
|
|
return gettext(eng);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
char* eng_prefixed = add_disambiguator(textcase, eng, NULL);
|
|
|
|
|
if (eng_prefixed == NULL)
|
|
|
|
|
{
|
|
|
|
|
return eng;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const char* tra = map_lookup_text(map_translation, eng_prefixed, eng);
|
2022-12-31 04:37:45 +01:00
|
|
|
|
VVV_free(eng_prefixed);
|
Add support for string cases in strings.xml (gendered Rescued/Missing)
I wanted to not complicate the system with different string cases (like
cgettext) if possible, and I have been able to keep the main strings a
simple English=Translation mapping thus far, but apparently strings
like "Rescued!" (which are one string in English), have to be
translated for the correct gender in some languages. So this was a good
time to add support for string cases anyway.
It's a number that can be given to a string to specify the specific
case it's used, to disambiguate identical English keys. In the case of
"Rescued!" and "Missing...", male versions of the string are case 1,
female versions are case 2, and Viridian being missing is case 3. Of
course, if a language doesn't need to use different variants, it can
simply fill in the same string for the different cases.
If any other string needs to switch to different cases: distinguish
them in the English strings.xml with the case="N" attribute (N=1 and
higher), sync language files from the translator menu (existing
translations for the uncased string will simply be copied to all cases)
and change loc::gettext("...") to loc::gettext_case("...", 1),
loc::gettext_case("...", 2), etc.
2022-12-01 01:27:30 +01:00
|
|
|
|
return tra;
|
|
|
|
|
}
|
|
|
|
|
|
2022-12-29 05:22:40 +01:00
|
|
|
|
static const char* gettext_plural_english(const char* eng_plural, const char* eng_singular, int n)
|
|
|
|
|
{
|
|
|
|
|
/* Do be consistent with negative number handling for other languages... */
|
|
|
|
|
if (n == 1 || n == -1)
|
|
|
|
|
{
|
|
|
|
|
return eng_singular;
|
|
|
|
|
}
|
|
|
|
|
return eng_plural;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const char* gettext_plural(const char* eng_plural, const char* eng_singular, int n)
|
|
|
|
|
{
|
|
|
|
|
if (lang != "en")
|
|
|
|
|
{
|
|
|
|
|
unsigned char form = form_for_count(n);
|
|
|
|
|
char* key = add_disambiguator(form+1, eng_plural, NULL);
|
|
|
|
|
if (key != NULL)
|
|
|
|
|
{
|
|
|
|
|
const char* tra = map_lookup_text(map_translation_plural, key, NULL);
|
|
|
|
|
|
2022-12-31 04:37:45 +01:00
|
|
|
|
VVV_free(key);
|
2022-12-29 05:22:40 +01:00
|
|
|
|
|
|
|
|
|
if (tra != NULL)
|
|
|
|
|
{
|
|
|
|
|
return tra;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return gettext_plural_english(eng_plural, eng_singular, n);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void gettext_plural_fill(char* buf, size_t buf_len, const char* eng_plural, const char* eng_singular, const char* args_index, ...)
|
|
|
|
|
{
|
|
|
|
|
/* Choose the right plural string based on a number, and then vformat that string.
|
|
|
|
|
* The first vararg determines the specific plural form. */
|
|
|
|
|
|
|
|
|
|
va_list args;
|
|
|
|
|
va_start(args, args_index);
|
|
|
|
|
int count = va_arg(args, int);
|
|
|
|
|
va_end(args);
|
|
|
|
|
|
|
|
|
|
const char* tra = gettext_plural(eng_plural, eng_singular, count);
|
|
|
|
|
|
|
|
|
|
va_start(args, args_index);
|
|
|
|
|
vformat_buf_valist(buf, buf_len, tra, args_index, args);
|
|
|
|
|
va_end(args);
|
|
|
|
|
}
|
|
|
|
|
|
Add a system for selecting between wordy/wordy2
Some languages have different spellings of wordy numbers based on the
gender of the things they're counting (uno crewmate versus una trinket)
or what a number's role is in the sentence (e.g. twenta out of twentu).
We've always had the idea we couldn't support such complex differences
though, because the game can't be adapted to know what gender each
object will have and what word classes might exist in other languages,
so translators would in those cases just have to forgo the wordy
numbers and just let the game use "20 out of 20".
A solution we came up semi-recently though (after all translations were
finished except for Arabic), was to allow the translator to define
however many classes of wordy numbers they need, and fill them all out.
This would not need the game to be *adapted* for every language's
specific grammar and word genders/classes. Instead, the translator
would just choose their correct self-defined class at the time they use
`wordy` in the VFormat placeholder. Something like
{n|wordy|class=feminine}, or {n|wordy_feminine}.
So this would benefit several languages, but we came up with the
solution a little late for all languages to benefit from it. The Arabic
translators asked for two separate classes of wordy numbers though, so
my plan is to first just have a second list of wordy numbers
(translation2 in numbers.xml), which can be accessed by passing the
`wordy2` flag to VFormat, instead of `wordy`.
Once 2.4 is released, we can take our time to do it properly. This
would involve the ability for translators to define however many
classes they need, to name them what they want, and this name would
then be useable in VFormat placeholders. We can convert all existing
translations to have one class defined by default, such as "wordy", or
"translation" depending on implementation, but there's not so much
concern for maintaining backwards compatibility here, so we can do a
mass-switchover for all language files. That said, it wouldn't be too
hard to add a special case for "translation" being "wordy" either.
We can then ask translators if they would like to change anything with
the new system in place.
For now, we can use this system for Arabic, maybe Spanish since there
were complaints about uno/una, and *maybe* Dutch (it has a thing where
the number "one" is often capitalized differently, but it's not
mandatory per se)
2024-01-06 04:15:06 +01:00
|
|
|
|
std::string getnumber(int n, const char* number_class)
|
2022-12-29 05:22:40 +01:00
|
|
|
|
{
|
|
|
|
|
if (n < 0 || n > 100)
|
|
|
|
|
{
|
|
|
|
|
return help.String(n);
|
|
|
|
|
}
|
|
|
|
|
|
Add a system for selecting between wordy/wordy2
Some languages have different spellings of wordy numbers based on the
gender of the things they're counting (uno crewmate versus una trinket)
or what a number's role is in the sentence (e.g. twenta out of twentu).
We've always had the idea we couldn't support such complex differences
though, because the game can't be adapted to know what gender each
object will have and what word classes might exist in other languages,
so translators would in those cases just have to forgo the wordy
numbers and just let the game use "20 out of 20".
A solution we came up semi-recently though (after all translations were
finished except for Arabic), was to allow the translator to define
however many classes of wordy numbers they need, and fill them all out.
This would not need the game to be *adapted* for every language's
specific grammar and word genders/classes. Instead, the translator
would just choose their correct self-defined class at the time they use
`wordy` in the VFormat placeholder. Something like
{n|wordy|class=feminine}, or {n|wordy_feminine}.
So this would benefit several languages, but we came up with the
solution a little late for all languages to benefit from it. The Arabic
translators asked for two separate classes of wordy numbers though, so
my plan is to first just have a second list of wordy numbers
(translation2 in numbers.xml), which can be accessed by passing the
`wordy2` flag to VFormat, instead of `wordy`.
Once 2.4 is released, we can take our time to do it properly. This
would involve the ability for translators to define however many
classes they need, to name them what they want, and this name would
then be useable in VFormat placeholders. We can convert all existing
translations to have one class defined by default, such as "wordy", or
"translation" depending on implementation, but there's not so much
concern for maintaining backwards compatibility here, so we can do a
mass-switchover for all language files. That said, it wouldn't be too
hard to add a special case for "translation" being "wordy" either.
We can then ask translators if they would like to change anything with
the new system in place.
For now, we can use this system for Arabic, maybe Spanish since there
were complaints about uno/una, and *maybe* Dutch (it has a thing where
the number "one" is often capitalized differently, but it's not
mandatory per se)
2024-01-06 04:15:06 +01:00
|
|
|
|
// FIXME: implement a more flexible system later, where translators define the classes
|
|
|
|
|
std::string (*number_ptr)[101];
|
|
|
|
|
if (SDL_strcmp(number_class, "wordy2") == 0)
|
|
|
|
|
{
|
|
|
|
|
number_ptr = &number2;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
number_ptr = &number;
|
|
|
|
|
}
|
|
|
|
|
if ((*number_ptr)[n].empty())
|
2022-12-29 05:22:40 +01:00
|
|
|
|
{
|
|
|
|
|
return help.String(n);
|
|
|
|
|
}
|
Add a system for selecting between wordy/wordy2
Some languages have different spellings of wordy numbers based on the
gender of the things they're counting (uno crewmate versus una trinket)
or what a number's role is in the sentence (e.g. twenta out of twentu).
We've always had the idea we couldn't support such complex differences
though, because the game can't be adapted to know what gender each
object will have and what word classes might exist in other languages,
so translators would in those cases just have to forgo the wordy
numbers and just let the game use "20 out of 20".
A solution we came up semi-recently though (after all translations were
finished except for Arabic), was to allow the translator to define
however many classes of wordy numbers they need, and fill them all out.
This would not need the game to be *adapted* for every language's
specific grammar and word genders/classes. Instead, the translator
would just choose their correct self-defined class at the time they use
`wordy` in the VFormat placeholder. Something like
{n|wordy|class=feminine}, or {n|wordy_feminine}.
So this would benefit several languages, but we came up with the
solution a little late for all languages to benefit from it. The Arabic
translators asked for two separate classes of wordy numbers though, so
my plan is to first just have a second list of wordy numbers
(translation2 in numbers.xml), which can be accessed by passing the
`wordy2` flag to VFormat, instead of `wordy`.
Once 2.4 is released, we can take our time to do it properly. This
would involve the ability for translators to define however many
classes they need, to name them what they want, and this name would
then be useable in VFormat placeholders. We can convert all existing
translations to have one class defined by default, such as "wordy", or
"translation" depending on implementation, but there's not so much
concern for maintaining backwards compatibility here, so we can do a
mass-switchover for all language files. That said, it wouldn't be too
hard to add a special case for "translation" being "wordy" either.
We can then ask translators if they would like to change anything with
the new system in place.
For now, we can use this system for Arabic, maybe Spanish since there
were complaints about uno/una, and *maybe* Dutch (it has a thing where
the number "one" is often capitalized differently, but it's not
mandatory per se)
2024-01-06 04:15:06 +01:00
|
|
|
|
return (*number_ptr)[n];
|
2022-12-29 05:22:40 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool is_script_custom(const char* script_id)
|
|
|
|
|
{
|
|
|
|
|
return SDL_strncmp(script_id, "custom_", 7) == 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const TextboxFormat* gettext_cutscene(const std::string& script_id, const std::string& eng, char textcase)
|
|
|
|
|
{
|
|
|
|
|
hashmap* map;
|
|
|
|
|
const char* map_script_key;
|
|
|
|
|
if (is_script_custom(script_id.c_str()))
|
|
|
|
|
{
|
|
|
|
|
map = map_translation_cutscene_custom;
|
|
|
|
|
map_script_key = &script_id.c_str()[7];
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
if (lang == "en")
|
|
|
|
|
{
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
map = map_translation_cutscene;
|
|
|
|
|
map_script_key = script_id.c_str();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
uintptr_t ptr_cutscene_map;
|
2023-03-05 21:42:21 +01:00
|
|
|
|
bool found = hashmap_get(map, map_script_key, SDL_strlen(map_script_key), &ptr_cutscene_map);
|
2022-12-29 05:22:40 +01:00
|
|
|
|
hashmap* cutscene_map = (hashmap*) ptr_cutscene_map;
|
|
|
|
|
|
|
|
|
|
if (!found || cutscene_map == NULL)
|
|
|
|
|
{
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
size_t alloc_len;
|
|
|
|
|
char* key = add_disambiguator(textcase, eng.c_str(), &alloc_len);
|
|
|
|
|
if (key == NULL)
|
|
|
|
|
{
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
uintptr_t ptr_format;
|
2023-03-05 21:42:21 +01:00
|
|
|
|
found = hashmap_get(cutscene_map, key, alloc_len-1, &ptr_format);
|
2022-12-29 05:22:40 +01:00
|
|
|
|
const TextboxFormat* format = (TextboxFormat*) ptr_format;
|
|
|
|
|
|
2022-12-31 04:37:45 +01:00
|
|
|
|
VVV_free(key);
|
2022-12-29 05:22:40 +01:00
|
|
|
|
|
|
|
|
|
if (!found)
|
|
|
|
|
{
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
return format;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const char* get_roomname_explanation(bool custom_level, int roomx, int roomy)
|
|
|
|
|
{
|
|
|
|
|
/* Never returns NULL. */
|
|
|
|
|
|
|
|
|
|
if (!fix_room_coords(custom_level, &roomx, &roomy))
|
|
|
|
|
{
|
|
|
|
|
return "";
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const char* explanation;
|
|
|
|
|
if (custom_level)
|
|
|
|
|
{
|
|
|
|
|
explanation = explanation_roomnames_custom[roomy][roomx];
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
explanation = explanation_roomnames[roomy][roomx];
|
|
|
|
|
}
|
|
|
|
|
if (explanation == NULL)
|
|
|
|
|
{
|
|
|
|
|
return "";
|
|
|
|
|
}
|
|
|
|
|
return explanation;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const char* get_roomname_translation(bool custom_level, int roomx, int roomy)
|
|
|
|
|
{
|
|
|
|
|
/* Only looks for the translation, doesn't return English fallback.
|
|
|
|
|
* Never returns NULL.
|
|
|
|
|
* Also used for room name translation mode. */
|
|
|
|
|
|
|
|
|
|
if (!fix_room_coords(custom_level, &roomx, &roomy))
|
|
|
|
|
{
|
|
|
|
|
return "";
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const char* tra;
|
|
|
|
|
if (custom_level)
|
|
|
|
|
{
|
|
|
|
|
tra = translation_roomnames_custom[roomy][roomx];
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
tra = translation_roomnames[roomy][roomx];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (tra == NULL)
|
|
|
|
|
{
|
|
|
|
|
return "";
|
|
|
|
|
}
|
|
|
|
|
return tra;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const char* gettext_roomname(bool custom_level, int roomx, int roomy, const char* eng, bool special)
|
|
|
|
|
{
|
|
|
|
|
if (!custom_level && lang == "en")
|
|
|
|
|
{
|
|
|
|
|
return eng;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (special)
|
|
|
|
|
{
|
|
|
|
|
return gettext_roomname_special(eng);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const char* tra = get_roomname_translation(custom_level, roomx, roomy);
|
|
|
|
|
if (tra[0] == '\0')
|
|
|
|
|
{
|
|
|
|
|
return eng;
|
|
|
|
|
}
|
|
|
|
|
return tra;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const char* gettext_roomname_special(const char* eng)
|
|
|
|
|
{
|
|
|
|
|
if (lang == "en")
|
|
|
|
|
{
|
|
|
|
|
return eng;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return map_lookup_text(map_translation_roomnames_special, eng, eng);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool is_cutscene_translated(const std::string& script_id)
|
|
|
|
|
{
|
|
|
|
|
hashmap* map;
|
|
|
|
|
const char* map_script_key;
|
|
|
|
|
if (is_script_custom(script_id.c_str()))
|
|
|
|
|
{
|
|
|
|
|
map = map_translation_cutscene_custom;
|
|
|
|
|
map_script_key = &script_id.c_str()[7];
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
if (lang == "en")
|
|
|
|
|
{
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
map = map_translation_cutscene;
|
|
|
|
|
map_script_key = script_id.c_str();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
uintptr_t ptr_unused;
|
2023-03-05 21:42:21 +01:00
|
|
|
|
return hashmap_get(map, map_script_key, SDL_strlen(map_script_key), &ptr_unused);
|
2022-12-29 05:22:40 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
uint32_t toupper_ch(uint32_t ch)
|
|
|
|
|
{
|
|
|
|
|
// Convert a single Unicode codepoint to its uppercase variant
|
|
|
|
|
// Supports important Latin (1 and A), Cyrillic and Greek
|
|
|
|
|
|
|
|
|
|
// Turkish i?
|
|
|
|
|
if (get_langmeta()->toupper_i_dot && ch == 'i') return 0x130;
|
|
|
|
|
|
|
|
|
|
// a-z?
|
|
|
|
|
if ('a' <= ch && ch <= 'z') return ch - 0x20;
|
|
|
|
|
|
|
|
|
|
// Latin-1 Supplement? But not the division sign
|
|
|
|
|
if (0xE0 <= ch && ch <= 0xFE && ch != 0xF7) return ch - 0x20;
|
|
|
|
|
|
|
|
|
|
// ß? Yes, we do have this! And otherwise we could only replace it with SS later on.
|
|
|
|
|
if (ch == 0xDF) return 0x1E9E;
|
|
|
|
|
|
|
|
|
|
// ÿ?
|
|
|
|
|
if (ch == 0xFF) return 0x178;
|
|
|
|
|
|
|
|
|
|
// Let's get some exceptions for Latin Extended-A out of the way, starting with ı
|
|
|
|
|
if (ch == 0x131) return 'I';
|
|
|
|
|
|
|
|
|
|
// This range between two obscure exceptions...
|
|
|
|
|
if (0x139 <= ch && ch <= 0x148 && ch % 2 == 0) return ch - 1;
|
|
|
|
|
|
|
|
|
|
// The rest of Latin Extended-A?
|
|
|
|
|
if (0x100 <= ch && ch <= 0x177 && ch % 2 == 1) return ch - 1;
|
|
|
|
|
|
|
|
|
|
// Okay, Ÿ also pushed some aside...
|
|
|
|
|
if (0x179 <= ch && ch <= 0x17E && ch % 2 == 0) return ch - 1;
|
|
|
|
|
|
|
|
|
|
// Can't hurt to support Romanian properly...
|
|
|
|
|
if (ch == 0x219 || ch == 0x21B) return ch - 1;
|
|
|
|
|
|
|
|
|
|
// Cyrillic а-я?
|
|
|
|
|
if (0x430 <= ch && ch <= 0x44F) return ch - 0x20;
|
|
|
|
|
|
|
|
|
|
// There's probably a good reason Cyrillic upper and lower accents are wrapped around the alphabet...
|
|
|
|
|
if (0x450 <= ch && ch <= 0x45F) return ch - 0x50;
|
|
|
|
|
|
|
|
|
|
// Apparently a Ukrainian letter is all the way over there, why not.
|
|
|
|
|
if (ch == 0x491) return ch - 1;
|
|
|
|
|
|
|
|
|
|
// Time for Greek, thankfully we're not making a lowercasing function with that double sigma!
|
|
|
|
|
if (ch == 0x3C2) return 0x3A3;
|
|
|
|
|
|
|
|
|
|
// The entire Greek alphabet then, along with two accented letters
|
|
|
|
|
if (0x3B1 <= ch && ch <= 0x3CB) return ch - 0x20;
|
|
|
|
|
|
|
|
|
|
// Unfortunately Greek accented letters are all over the place.
|
|
|
|
|
if (ch == 0x3AC) return 0x386;
|
|
|
|
|
if (0x3AD <= ch && ch <= 0x3AF) return ch - 0x25;
|
|
|
|
|
if (ch == 0x3CC) return 0x38C;
|
|
|
|
|
if (ch == 0x3CD || ch == 0x3CE) return ch - 0x3F;
|
|
|
|
|
|
|
|
|
|
// Nothing matched! Just leave it as is
|
|
|
|
|
return ch;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::string toupper(const std::string& lower)
|
|
|
|
|
{
|
|
|
|
|
// Convert a UTF-8 string to uppercase
|
|
|
|
|
if (!get_langmeta()->toupper)
|
|
|
|
|
{
|
|
|
|
|
return lower;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::string upper;
|
2023-02-23 04:14:38 +01:00
|
|
|
|
/* Capacity is not final, but some uppercase is more bytes than the
|
|
|
|
|
* lowercase equivalent, so some extra breathing room couldn't hurt... */
|
|
|
|
|
upper.reserve(lower.length() + 6);
|
|
|
|
|
const char* lower_c = lower.c_str();
|
|
|
|
|
uint32_t ch;
|
2022-12-29 05:22:40 +01:00
|
|
|
|
bool ignorenext = false;
|
2023-02-23 04:14:38 +01:00
|
|
|
|
while ((ch = UTF8_next(&lower_c)))
|
2022-12-29 05:22:40 +01:00
|
|
|
|
{
|
|
|
|
|
if (get_langmeta()->toupper_lower_escape_char && ch == '~')
|
|
|
|
|
{
|
|
|
|
|
ignorenext = true;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!ignorenext)
|
|
|
|
|
{
|
|
|
|
|
ch = toupper_ch(ch);
|
|
|
|
|
}
|
2023-02-23 04:14:38 +01:00
|
|
|
|
upper.append(UTF8_encode(ch).bytes);
|
2022-12-29 05:22:40 +01:00
|
|
|
|
|
|
|
|
|
ignorenext = false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return upper;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::string remove_toupper_escape_chars(const std::string& _s)
|
|
|
|
|
{
|
|
|
|
|
// No-op, except if langmeta.toupper_lower_escape_char, to remove the ~ escape character
|
|
|
|
|
|
|
|
|
|
if (!get_langmeta()->toupper_lower_escape_char)
|
|
|
|
|
{
|
|
|
|
|
return _s;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::string s = std::string(_s);
|
|
|
|
|
for (signed int i = s.size()-1; i >= 0; i--)
|
|
|
|
|
{
|
|
|
|
|
if (s[i] == '~')
|
|
|
|
|
{
|
|
|
|
|
s.erase(i, 1);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return s;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
} /* namespace loc */
|