From 1f929b25f8254d64d3e44e58a9d426a0d35899af Mon Sep 17 00:00:00 2001 From: Dav999 Date: Tue, 2 Jan 2024 03:57:26 +0100 Subject: [PATCH] Implement bidi reordering at display time I'm now using SheenBidi to reorder RTL and bidirectional text properly at text rendering time! For Arabic this is still missing reshaping, but everything's looking really promising now! The code changes are really non-invasive. The changes to Font.cpp are absolutely minimal: 1305+ if (bidi_should_transform(text)) 1306+ { 1307+ text = bidi_transform(text); 1308+ } There's now a FontBidi.cpp, which implements these two functions, notably bidi_transform(), which takes a UTF-8 encoded string and returns another UTF-8 encoded string that has bidi reorderings and reshapings applied. In that function, SheenBidi gives us information about where in the input string runs start and end, and on a basic level, all we need to do there is to concatenate the parts together in the order that we're given them, and to reverse the RTL runs (recognizable by odd levels). As this is a proof-of-concept, bidi_should_transform() still always returns true, applying the bidi algorithm to all languages and all strings. I'm thinking of enabling bidi only when the language/font metadata enables RTL (which could be for the interface or for a custom level), or outside of that, at least when RTL characters are detected (such as Arabic or Hebrew Unicode blocks). --- desktop_version/CMakeLists.txt | 1 + desktop_version/src/Font.cpp | 6 ++ desktop_version/src/FontBidi.cpp | 120 +++++++++++++++++++++++++++++++ desktop_version/src/FontBidi.h | 12 ++++ 4 files changed, 139 insertions(+) create mode 100644 desktop_version/src/FontBidi.cpp create mode 100644 desktop_version/src/FontBidi.h diff --git a/desktop_version/CMakeLists.txt b/desktop_version/CMakeLists.txt index 9551b50b..0d4b8a7e 100644 --- a/desktop_version/CMakeLists.txt +++ b/desktop_version/CMakeLists.txt @@ -77,6 +77,7 @@ set(VVV_SRC src/FileSystemUtils.cpp src/Finalclass.cpp src/Font.cpp + src/FontBidi.cpp src/Game.cpp src/Graphics.cpp src/GraphicsResources.cpp diff --git a/desktop_version/src/Font.cpp b/desktop_version/src/Font.cpp index 642ff95a..3024bd87 100644 --- a/desktop_version/src/Font.cpp +++ b/desktop_version/src/Font.cpp @@ -6,6 +6,7 @@ #include "Constants.h" #include "CustomLevels.h" #include "FileSystemUtils.h" +#include "FontBidi.h" #include "Graphics.h" #include "GraphicsUtil.h" #include "Localization.h" @@ -1301,6 +1302,11 @@ void print( y -= h_diff_8/2; } + if (bidi_should_transform(text)) + { + text = bidi_transform(text); + } + int position = 0; uint32_t codepoint; while ((codepoint = UTF8_next(&text))) diff --git a/desktop_version/src/FontBidi.cpp b/desktop_version/src/FontBidi.cpp new file mode 100644 index 00000000..d5df4cfd --- /dev/null +++ b/desktop_version/src/FontBidi.cpp @@ -0,0 +1,120 @@ +#include "FontBidi.h" + +#include + +#include "Alloc.h" +#include "UTF8.h" + +extern "C" +{ +#include +} + +namespace font +{ + +bool bidi_should_transform(const char* text) +{ + // TODO + return true; +} + +const char* bidi_transform(const char* text) +{ + uint32_t utf32_in[1024]; + int n_codepoints = 0; + + const char* text_ptr = text; + while ((utf32_in[n_codepoints] = UTF8_next(&text_ptr))) + { + n_codepoints++; + + if (n_codepoints >= 1023) + { + break; + } + } + utf32_in[n_codepoints] = 0; + + if (n_codepoints == 0) + { + return text; + } + + static char utf8_out[1024]; + size_t utf8_out_cur = 0; + + SBCodepointSequence codepoint_sequence = {SBStringEncodingUTF32, (void*) utf32_in, (SBUInteger) n_codepoints}; + + SBAlgorithmRef algorithm = SBAlgorithmCreate(&codepoint_sequence); + if (algorithm == NULL) + { + return text; + } + SBParagraphRef paragraph = SBAlgorithmCreateParagraph(algorithm, 0, INT32_MAX, SBLevelDefaultRTL); + SDL_assert(paragraph != NULL); + SBUInteger paragraph_len = SBParagraphGetLength(paragraph); + SBLineRef paragraph_line = SBParagraphCreateLine(paragraph, 0, paragraph_len); + SDL_assert(paragraph_line != NULL); + + // Make sure )brackets( are mirrored correctly... + SBMirrorLocatorRef mirror_locator = SBMirrorLocatorCreate(); + if (mirror_locator != NULL) + { + SBMirrorLocatorLoadLine(mirror_locator, paragraph_line, (void*) utf32_in); + const SBMirrorAgent *mirror_agent = SBMirrorLocatorGetAgent(mirror_locator); + while (SBMirrorLocatorMoveNext(mirror_locator)) + { + utf32_in[mirror_agent->index] = mirror_agent->mirror; + } + VVV_freefunc(SBMirrorLocatorRelease, mirror_locator); + } + + SBUInteger n_runs = SBLineGetRunCount(paragraph_line); + const SBRun *runs = SBLineGetRunsPtr(paragraph_line); + + for (SBUInteger i = 0; i < n_runs; i++) + { + bool is_ltr = runs[i].level % 2 == 0; + for (size_t c = 0; c < runs[i].length; c++) + { + size_t ix; + if (is_ltr) + { + ix = runs[i].offset + c; + } + else + { + ix = runs[i].offset + runs[i].length - 1 - c; + } + + int out_room_left = sizeof(utf8_out) - 1 - utf8_out_cur; + if (out_room_left <= 0) + { + goto no_more_runs; + } + + // TODO prolly do something with reshaping here + + UTF8_encoding enc = UTF8_encode(utf32_in[ix]); + size_t n_copy = SDL_min(enc.nbytes, (size_t) out_room_left); + SDL_memcpy( + &utf8_out[utf8_out_cur], + enc.bytes, + n_copy + ); + + utf8_out_cur += n_copy; + } + } + no_more_runs: + utf8_out[utf8_out_cur] = '\0'; + + VVV_freefunc(SBLineRelease, paragraph_line); + VVV_freefunc(SBParagraphRelease, paragraph); + VVV_freefunc(SBAlgorithmRelease, algorithm); + + return utf8_out; +} + +} // namespace font diff --git a/desktop_version/src/FontBidi.h b/desktop_version/src/FontBidi.h new file mode 100644 index 00000000..f2064775 --- /dev/null +++ b/desktop_version/src/FontBidi.h @@ -0,0 +1,12 @@ +#ifndef FONTBIDI_H +#define FONTBIDI_H + +namespace font +{ + +bool bidi_should_transform(const char* text); +const char* bidi_transform(const char* text); + +} // namespace font + +#endif // FONTBIDI_H