Implement bidi reordering at display time

I'm now using SheenBidi to reorder RTL and bidirectional text properly
at text rendering time! For Arabic this is still missing reshaping, but
everything's looking really promising now!

The code changes are really non-invasive. The changes to Font.cpp are
absolutely minimal:

     1305+    if (bidi_should_transform(text))
     1306+    {
     1307+        text = bidi_transform(text);
     1308+    }

There's now a FontBidi.cpp, which implements these two functions,
notably bidi_transform(), which takes a UTF-8 encoded string and
returns another UTF-8 encoded string that has bidi reorderings and
reshapings applied.

In that function, SheenBidi gives us information about where in the
input string runs start and end, and on a basic level, all we need to
do there is to concatenate the parts together in the order that we're
given them, and to reverse the RTL runs (recognizable by odd levels).

As this is a proof-of-concept, bidi_should_transform() still always
returns true, applying the bidi algorithm to all languages and all
strings. I'm thinking of enabling bidi only when the language/font
metadata enables RTL (which could be for the interface or for a custom
level), or outside of that, at least when RTL characters are detected
(such as Arabic or Hebrew Unicode blocks).
This commit is contained in:
Dav999 2024-01-02 03:57:26 +01:00 committed by Misa Elizabeth Kai
parent 5766e6c426
commit 59ccdbea00
4 changed files with 139 additions and 0 deletions

View File

@ -77,6 +77,7 @@ set(VVV_SRC
src/FileSystemUtils.cpp
src/Finalclass.cpp
src/Font.cpp
src/FontBidi.cpp
src/Game.cpp
src/Graphics.cpp
src/GraphicsResources.cpp

View File

@ -6,6 +6,7 @@
#include "Constants.h"
#include "CustomLevels.h"
#include "FileSystemUtils.h"
#include "FontBidi.h"
#include "Graphics.h"
#include "GraphicsUtil.h"
#include "Localization.h"
@ -1301,6 +1302,11 @@ void print(
y -= h_diff_8/2;
}
if (bidi_should_transform(text))
{
text = bidi_transform(text);
}
int position = 0;
uint32_t codepoint;
while ((codepoint = UTF8_next(&text)))

View File

@ -0,0 +1,120 @@
#include "FontBidi.h"
#include <SDL.h>
#include "Alloc.h"
#include "UTF8.h"
extern "C"
{
#include <SheenBidi.h>
}
namespace font
{
bool bidi_should_transform(const char* text)
{
// TODO
return true;
}
const char* bidi_transform(const char* text)
{
uint32_t utf32_in[1024];
int n_codepoints = 0;
const char* text_ptr = text;
while ((utf32_in[n_codepoints] = UTF8_next(&text_ptr)))
{
n_codepoints++;
if (n_codepoints >= 1023)
{
break;
}
}
utf32_in[n_codepoints] = 0;
if (n_codepoints == 0)
{
return text;
}
static char utf8_out[1024];
size_t utf8_out_cur = 0;
SBCodepointSequence codepoint_sequence = {SBStringEncodingUTF32, (void*) utf32_in, (SBUInteger) n_codepoints};
SBAlgorithmRef algorithm = SBAlgorithmCreate(&codepoint_sequence);
if (algorithm == NULL)
{
return text;
}
SBParagraphRef paragraph = SBAlgorithmCreateParagraph(algorithm, 0, INT32_MAX, SBLevelDefaultRTL);
SDL_assert(paragraph != NULL);
SBUInteger paragraph_len = SBParagraphGetLength(paragraph);
SBLineRef paragraph_line = SBParagraphCreateLine(paragraph, 0, paragraph_len);
SDL_assert(paragraph_line != NULL);
// Make sure )brackets( are mirrored correctly...
SBMirrorLocatorRef mirror_locator = SBMirrorLocatorCreate();
if (mirror_locator != NULL)
{
SBMirrorLocatorLoadLine(mirror_locator, paragraph_line, (void*) utf32_in);
const SBMirrorAgent *mirror_agent = SBMirrorLocatorGetAgent(mirror_locator);
while (SBMirrorLocatorMoveNext(mirror_locator))
{
utf32_in[mirror_agent->index] = mirror_agent->mirror;
}
VVV_freefunc(SBMirrorLocatorRelease, mirror_locator);
}
SBUInteger n_runs = SBLineGetRunCount(paragraph_line);
const SBRun *runs = SBLineGetRunsPtr(paragraph_line);
for (SBUInteger i = 0; i < n_runs; i++)
{
bool is_ltr = runs[i].level % 2 == 0;
for (size_t c = 0; c < runs[i].length; c++)
{
size_t ix;
if (is_ltr)
{
ix = runs[i].offset + c;
}
else
{
ix = runs[i].offset + runs[i].length - 1 - c;
}
int out_room_left = sizeof(utf8_out) - 1 - utf8_out_cur;
if (out_room_left <= 0)
{
goto no_more_runs;
}
// TODO prolly do something with reshaping here
UTF8_encoding enc = UTF8_encode(utf32_in[ix]);
size_t n_copy = SDL_min(enc.nbytes, (size_t) out_room_left);
SDL_memcpy(
&utf8_out[utf8_out_cur],
enc.bytes,
n_copy
);
utf8_out_cur += n_copy;
}
}
no_more_runs:
utf8_out[utf8_out_cur] = '\0';
VVV_freefunc(SBLineRelease, paragraph_line);
VVV_freefunc(SBParagraphRelease, paragraph);
VVV_freefunc(SBAlgorithmRelease, algorithm);
return utf8_out;
}
} // namespace font

View File

@ -0,0 +1,12 @@
#ifndef FONTBIDI_H
#define FONTBIDI_H
namespace font
{
bool bidi_should_transform(const char* text);
const char* bidi_transform(const char* text);
} // namespace font
#endif // FONTBIDI_H