1
0
Fork 0
mirror of https://github.com/TerryCavanagh/VVVVVV.git synced 2024-12-22 17:49:43 +01:00

Implement new string formatting system (VFormat)

This commit adds a new string formatting system to replace uses of
`SDL_snprintf` and string concatenation.

Making our own string formatting system has been briefly discussed
during the review of the localization branch, and on the VVVVVV
Discord. It's inspired by Python's format strings, but simpler.

This is primarily to benefit localization - strings will be easier to
understand (`Now using %s Tileset` → `Now using {area} Tileset`,
`"%s remain"` → `"{n_crewmates|wordy} remain"`), translators can change
the word order for their language's grammar (`%1$s` is a POSIX
extension), and this system is also less error-prone (making the format
string not align with the actual arguments won't result in a crash or
UB).

It also integrates our needs better - particularly the "wordy" numbers
without having to have a `help.number_words(n).c_str()` at the
callsite, translators can opt in and out of wordy numbers per string,
and this should also make it easier to solve #859.

This commit adds the formatting system itself, and changes one
`SDL_snprintf` in the code to use it as a small demo (the rest should
probably be done in the localization branch to avoid more unneeded
work).

The system is described in full detail in VFormat.h and in the pull
request description.
This commit is contained in:
Dav999-v 2022-04-30 21:09:11 +02:00 committed by Misa Elizabeth Kai
parent eb46143098
commit ea4302b41e
6 changed files with 629 additions and 1 deletions

View file

@ -68,6 +68,7 @@ endif()
set(VVV_SRC
src/BinaryBlob.cpp
src/BlockV.cpp
src/CWrappers.cpp
src/Ent.cpp
src/Entity.cpp
src/FileSystemUtils.cpp
@ -101,6 +102,7 @@ set(VVV_SRC
src/GlitchrunnerMode.c
src/Network.c
src/ThirdPartyDeps.c
src/VFormat.c
src/Vlogging.c
src/Xoshiro.c
../third_party/physfs/extras/physfsrwops.c

View file

@ -0,0 +1,17 @@
#include <SDL.h>
#include "UtilityClass.h"
extern "C" char* HELP_number_words(int _t)
{
/* C wrapper for UtilityClass::number_words.
* Caller must SDL_free. */
std::string str = help.number_words(_t);
char* buffer = (char*) SDL_malloc(str.size() + 1);
str.copy(buffer, str.size());
buffer[str.size()] = '\0';
return buffer;
}

View file

@ -0,0 +1,6 @@
#ifndef CWRAPPERS_H
#define CWRAPPERS_H
char* HELP_number_words(int _t);
#endif /* CWRAPPERS_H */

View file

@ -19,6 +19,7 @@
#include "Screen.h"
#include "Script.h"
#include "UtilityClass.h"
#include "VFormat.h"
editorclass::editorclass(void)
{
@ -4182,7 +4183,7 @@ void editorclass::switch_tileset(const bool reversed)
clamp_tilecol(levx, levy, false);
char buffer[64];
SDL_snprintf(buffer, sizeof(buffer), "Now using %s Tileset", tilesets[tiles]);
vformat_buf(buffer, sizeof(buffer), "Now using {area} Tileset", "area:str", tilesets[tiles]);
note = buffer;
notedelay = 45;

View file

@ -0,0 +1,471 @@
#include "VFormat.h"
#include <physfs.h>
#include <SDL.h>
#include <stdbool.h>
#include "CWrappers.h"
static inline bool is_whitespace(char ch)
{
return ch == ' ' || ch == '\t';
}
static inline void trim_whitespace(const char** string, size_t* bytes)
{
/* Strips leading and trailing whitespace. */
while (*bytes > 0 && is_whitespace((*string)[0]))
{
(*string)++;
(*bytes)--;
}
while (*bytes > 0 && is_whitespace((*string)[*bytes - 1]))
{
(*bytes)--;
}
}
static inline void call_with_button(format_callback callback, void* userdata, int button_value)
{
/* Call the given callback with the specified button character (from
* Unicode Private Use Area) so the text renderer can display it. */
if (button_value < 0 || button_value > 0xFF)
{
button_value = 0xFF;
}
uint32_t utf32[2];
utf32[0] = 0xE000 + button_value;
utf32[1] = 0x0000;
char utf8[5];
PHYSFS_utf8FromUcs4(utf32, utf8, sizeof(utf8));
callback(userdata, utf8, SDL_strlen(utf8));
}
static inline void call_with_upper(format_callback callback, void* userdata, const char* string, size_t bytes)
{
/* Call the given callback with the specified string, where the first
* letter is changed to uppercase.
* First reserve some more space for the (in UTF-8) possibly larger
* uppercase character, and the currently missing null terminator.
* (Theoretically 1->4 bytes, + 1 for '\0') */
size_t conv_bytes = bytes + 4;
char* utf8 = SDL_malloc(conv_bytes);
uint32_t* utf32 = SDL_malloc(conv_bytes*4);
if (utf8 == NULL || utf32 == NULL)
{
/* Never mind the capitalization then! Better than nothing. */
callback(userdata, string, bytes);
SDL_free(utf32);
SDL_free(utf8);
return;
}
SDL_memcpy(utf8, string, bytes);
utf8[bytes] = '\0';
PHYSFS_utf8ToUcs4(utf8, utf32, conv_bytes*4);
utf32[0] = SDL_toupper(utf32[0]); /* FIXME: use loc::toupper_ch in localization branch */
PHYSFS_utf8FromUcs4(utf32, utf8, conv_bytes);
callback(userdata, utf8, SDL_strlen(utf8));
SDL_free(utf32);
SDL_free(utf8);
}
void vformat_cb_valist(
format_callback callback,
void* userdata,
const char* format_string,
const char* args_index,
va_list args
)
{
/* Variant of vformat_cb which takes a va_list instead of `...`
*
* Also the core formatting function where everything comes together.
* The callback receives each part of the resulting string.
* These parts are not null-terminated. */
const char* cursor = format_string;
while (true)
{
/* Fast path: maybe we're at the end? */
if (cursor[0] == '\0')
{
return;
}
/* Find the next { or }. The } is only needed now for escaping. */
const char* first_a = SDL_strchr(cursor, '{');
const char* first_b = SDL_strchr(cursor, '}');
const char* next_stop = first_a;
if (next_stop == NULL || first_b < next_stop)
{
next_stop = first_b;
}
if (next_stop == NULL)
{
/* No more placeholders or escapes in this string, run it to the end */
callback(userdata, cursor, SDL_strlen(cursor));
return;
}
/* Output text until the placeholder... */
callback(userdata, cursor, next_stop - cursor);
cursor = next_stop;
if (cursor[0] == '}')
{
/* Just handle }}, for symmetry with {{ */
callback(userdata, cursor, 1);
cursor++;
if (*cursor == '}')
{
cursor++;
}
}
else if (cursor[0] == '{' && cursor[1] == '{')
{
/* Just a {{ */
callback(userdata, cursor, 1);
cursor += 2;
}
else if (cursor[0] == '{')
{
/* Start of a placeholder!
* A placeholder can consist of one or more parts separated by |,
* the first part always being the key, all others being flags. */
const char* placeholder_start = cursor;
cursor++;
const char* name = cursor;
size_t name_len = 0;
bool flag_wordy = false;
int flag_digits = 0;
bool flag_spaces = false;
bool flag_upper = false;
bool first_iter = true;
do
{
first_a = SDL_strchr(cursor, '|');
first_b = SDL_strchr(cursor, '}');
next_stop = first_a;
if (next_stop == NULL || first_b < next_stop)
{
next_stop = first_b;
}
if (next_stop == NULL)
{
/* Unterminated placeholder */
callback(userdata, placeholder_start, SDL_strlen(placeholder_start));
return;
}
size_t flag_len = next_stop - cursor;
if (first_iter)
{
name_len = flag_len;
first_iter = false;
}
else if (flag_len == 5 && SDL_memcmp(cursor, "wordy", 5) == 0)
{
flag_wordy = true;
}
else if (flag_len >= 8 && SDL_memcmp(cursor, "digits=", 7) == 0)
{
/* strtol stops on the first non-digit anyway, so... */
flag_digits = SDL_strtol(cursor + 7, NULL, 10);
}
else if (flag_len == 6 && SDL_memcmp(cursor, "spaces", 6) == 0)
{
flag_spaces = true;
}
else if (flag_len == 5 && SDL_memcmp(cursor, "upper", 5) == 0)
{
flag_upper = true;
}
cursor = next_stop + 1;
}
while (next_stop[0] != '}');
/* Now look up the name in the arguments.
* The arguments index string is comma-separated,
* each argument is name:type. */
va_list args_copy;
va_copy(args_copy, args);
const char* args_index_cursor = args_index;
bool match = false;
do
{
while (args_index_cursor[0] == ' ')
{
/* Skip spaces between args */
args_index_cursor++;
}
const char* next_comma = SDL_strchr(args_index_cursor, ',');
const char* next_colon = SDL_strchr(args_index_cursor, ':');
if (next_comma == NULL)
{
next_comma = SDL_strchr(args_index_cursor, '\0');
}
if (next_colon == NULL || next_colon > next_comma)
{
break;
}
const char* arg_name = args_index_cursor;
size_t arg_name_len = next_colon - arg_name;
const char* arg_type = next_colon + 1;
size_t arg_type_len = next_comma - arg_type;
trim_whitespace(&arg_name, &arg_name_len);
trim_whitespace(&arg_type, &arg_type_len);
match = (arg_name_len == name_len && SDL_memcmp(arg_name, name, name_len) == 0);
if (arg_type_len == 3 && SDL_memcmp(arg_type, "int", 3) == 0)
{
int value = va_arg(args_copy, int);
if (match)
{
if (flag_wordy)
{
char* number = HELP_number_words(value);
if (flag_upper)
{
call_with_upper(callback, userdata, number, SDL_strlen(number));
}
else
{
callback(userdata, number, SDL_strlen(number));
}
SDL_free(number);
}
else
{
const char* format = flag_spaces ? "%*d" : "%0*d";
char buffer[24];
SDL_snprintf(buffer, sizeof(buffer), format, flag_digits, value);
callback(userdata, buffer, SDL_strlen(buffer));
}
}
}
else if (arg_type_len == 3 && SDL_memcmp(arg_type, "str", 3) == 0)
{
const char* value = va_arg(args_copy, const char*);
if (match)
{
if (value == NULL)
{
callback(userdata, "[null]", 6);
}
else if (flag_upper)
{
call_with_upper(callback, userdata, value, SDL_strlen(value));
}
else
{
callback(userdata, value, SDL_strlen(value));
}
}
}
else if (arg_type_len == 3 && SDL_memcmp(arg_type, "but", 3) == 0)
{
int button_value = va_arg(args_copy, int);
if (match)
{
call_with_button(callback, userdata, button_value);
}
}
else
{
/* Unknown type, now what type do we give va_arg? */
match = false;
break;
}
if (match)
{
break;
}
/* Remember: next_comma can also be the final '\0' */
args_index_cursor = next_comma + 1;
}
while (args_index_cursor[-1] != '\0');
va_end(args_copy);
if (!match)
{
callback(userdata, "[", 1);
callback(userdata, name, name_len);
callback(userdata, "?]", 2);
}
}
}
}
void vformat_cb(
format_callback callback,
void* userdata,
const char* format_string,
const char* args_index,
...
)
{
/* Format with a user-supplied callback.
* The callback receives each part of the resulting string.
* These parts are not null-terminated. */
va_list args;
va_start(args, args_index);
vformat_cb_valist(callback, userdata, format_string, args_index, args);
va_end(args);
}
typedef struct _buffer_info
{
size_t total_needed;
char* buffer_cursor;
size_t buffer_left;
} buffer_info;
static void callback_buffer_append(void* userdata, const char* string, size_t bytes)
{
/* Callback for vformat_buf. */
buffer_info* buf = (buffer_info*) userdata;
buf->total_needed += bytes;
if (buf->buffer_cursor != NULL && buf->buffer_left > 0)
{
size_t copy_len = SDL_min(bytes, buf->buffer_left);
SDL_memcpy(buf->buffer_cursor, string, copy_len);
buf->buffer_cursor += copy_len;
buf->buffer_left -= copy_len;
}
}
size_t vformat_buf_valist(
char* buffer,
size_t buffer_len,
const char* format_string,
const char* args_index,
va_list args
)
{
/* Variant of vformat_buf which takes a va_list instead of `...` */
buffer_info buf = {
.total_needed = 1,
.buffer_cursor = buffer,
.buffer_left = buffer_len
};
if (buf.buffer_left != 0)
{
/* We do still need to write the null terminator */
buf.buffer_left--;
}
vformat_cb_valist(callback_buffer_append, (void*) &buf, format_string, args_index, args);
if (buffer != NULL && buffer_len > 0)
{
*buf.buffer_cursor = '\0';
}
return buf.total_needed;
}
size_t vformat_buf(
char* buffer,
size_t buffer_len,
const char* format_string,
const char* args_index,
...
)
{
/* Format to the specified buffer.
* Ensures buffer is not overrun, and that it is null-terminated.
* Returns total number of bytes that were needed, or that would have been needed.
* You may pass a NULL buffer and/or size 0 if you only need this needed length. */
va_list args;
va_start(args, args_index);
size_t total_needed = vformat_buf_valist(buffer, buffer_len, format_string, args_index, args);
va_end(args);
return total_needed;
}
char* vformat_alloc_valist(
const char* format_string,
const char* args_index,
va_list args
)
{
/* Variant of vformat_alloc which takes a va_list instead of `...` */
size_t needed = vformat_buf_valist(NULL, 0, format_string, args_index, args);
char* buffer = SDL_malloc(needed);
if (buffer == NULL)
{
return NULL;
}
vformat_buf_valist(buffer, needed, format_string, args_index, args);
return buffer;
}
char* vformat_alloc(
const char* format_string,
const char* args_index,
...
)
{
/* Format to an automatically allocated and resized buffer.
* Caller must SDL_free. */
va_list args;
va_start(args, args_index);
char* buffer = vformat_alloc_valist(format_string, args_index, args);
va_end(args);
return buffer;
}

View file

@ -0,0 +1,131 @@
/*
* == VFormat ==
*
* VVVVVV's format strings. See the function declarations below (vformat_*).
* - vformat_cb Calls a user-supplied callback function for each part of
* the resulting string.
* - vformat_buf Fills a user-supplied buffer with the result.
* - vformat_alloc Allocates a buffer with the result (caller must SDL_free).
*
* All include the following parameters:
* - format_string The string which needs placeholders to be filled in
* - args_index A string describing the varargs
* - ... The varargs (like printf-family functions)
*
* Variants ending in _valist are equivalent, but take a va_list instead of a variable
* number of arguments. These use va_copy (they have to) and thus leave the given
* va_list untouched.
*
* The FORMAT STRING can include placeholders, which look like {name} or {name|flags}.
* The name can be user-defined, as long as it appears in the ARGS INDEX.
* Flags are separated by |, invalid flags are ignored.
*
* The valid flags are:
* - wordy [ints only] use number words (Twenty) instead of digits (20)
* - digits=n [ints only] force minimum n digits, like n=5 --> 00031
* - spaces [only if using digits=n] use leading spaces instead of 0s
* - upper uppercase the first character with loc::toupper_ch
*
* For example, {aa|digits=3|spaces} fills in the argument with the name "aa".
* Space is reserved for 3 digits, and unused columns are padded with spaces.
* This is equivalent to %3d in printf-family functions.
*
* Literal { and } characters can be included in the text by doubling them,
* like {{ and }}. Including curly braces of any kind inside placeholder tags,
* (as part of the name or flags), is unsupported. Included string arguments
* aren't subject to further parsing, so including curly braces in string
* arguments poses no problems.
*
* The ARGS INDEX defines the names and types of the arguments you pass.
* In the args index string, arguments are separated by commas, and each argument is
* in the format name:type. Whitespace around the : and , separators is ignored.
*
* The valid types are:
* - int Signed integer
* - str const char*
* - but Controller button icon
*
* Full example:
*
* char buffer[100];
* vformat_buf(buffer, sizeof(buffer),
* "{crewmate} got {number} out of {total} trinkets in {m}:{s|digits=2}.{ms|digits=3}",
* "number:int, total:int, crewmate:str, m:int, s:int, ms:int",
* 2, 20, "Vermilion", 2, 3, 1
* );
*
* => "Vermilion got 2 out of 20 trinkets in 2:03.001"
*
* ERROR CONDITIONS: These functions should not cause any crashes or UB unless you
* mislead them on the programming side of things. For example, if your args index doesn't
* align with the varargs you actually pass, or you pass the wrong buffer size into
* vformat_buf. That means it should be safe to use user-generated content as a format
* string. If the format string refers to an argument name that is not found in the index,
* that placeholder will be filled in as [name?] (where `name` is filled in with the name).
* If you pass a NULL string argument, it will be rendered as [null].
*/
#ifndef VFORMAT_H
#define VFORMAT_H
#include <stdarg.h>
#include <stddef.h>
#ifdef __cplusplus
extern "C"
{
#endif
typedef void (*format_callback)(void* userdata, const char* string, size_t bytes);
void vformat_cb_valist(
format_callback callback,
void* userdata,
const char* format_string,
const char* args_index,
va_list args
);
void vformat_cb(
format_callback callback,
void* userdata,
const char* format_string,
const char* args_index,
...
);
size_t vformat_buf_valist(
char* buffer,
size_t buffer_len,
const char* format_string,
const char* args_index,
va_list args
);
size_t vformat_buf(
char* buffer,
size_t buffer_len,
const char* format_string,
const char* args_index,
...
);
char* vformat_alloc_valist(
const char* format_string,
const char* args_index,
va_list args
);
char* vformat_alloc(
const char* format_string,
const char* args_index,
...
);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* VFORMAT_H */