diff --git a/desktop_version/CMakeLists.txt b/desktop_version/CMakeLists.txt index 3edafd17..010a09d5 100644 --- a/desktop_version/CMakeLists.txt +++ b/desktop_version/CMakeLists.txt @@ -56,6 +56,7 @@ INCLUDE_DIRECTORIES( ../third_party/tinyxml ../third_party/physfs ../third_party/lodepng + ../third_party/utfcpp/source ) # Source Lists diff --git a/desktop_version/src/FileSystemUtils.cpp b/desktop_version/src/FileSystemUtils.cpp index 2a8f7f22..2a1d2df5 100644 --- a/desktop_version/src/FileSystemUtils.cpp +++ b/desktop_version/src/FileSystemUtils.cpp @@ -179,7 +179,7 @@ bool FILESYSTEM_loadTiXmlDocument(const char *name, TiXmlDocument *doc) { return false; } - doc->Parse((const char*)mem); + doc->Parse((const char*)mem, NULL, TIXML_ENCODING_UTF8); FILESYSTEM_freeMemory(&mem); return true; } diff --git a/desktop_version/src/Graphics.cpp b/desktop_version/src/Graphics.cpp index bc5270c3..678fa29d 100644 --- a/desktop_version/src/Graphics.cpp +++ b/desktop_version/src/Graphics.cpp @@ -3,6 +3,10 @@ #include "Entity.h" #include "Map.h" #include "Screen.h" +#include "FileSystemUtils.h" +#include +#include +#include void Graphics::init() { @@ -129,6 +133,22 @@ void Graphics::init() showmousecursor = true; } +int Graphics::font_idx(char32_t ch) { + if (font_positions.size() > 0) { + std::map::iterator iter = font_positions.find(ch); + if (iter == font_positions.end()) { + iter = font_positions.find('?'); + if (iter == font_positions.end()) { + puts("font.txt missing fallback character!"); + exit(1); + } + } + return iter->second; + } else { + return ch; + } +} + Graphics::~Graphics() { @@ -146,7 +166,7 @@ void Graphics::drawspritesetcol(int x, int y, int t, int c, UtilityClass& help) void Graphics::Makebfont() { - for (int j = 0; j < 16; j++) + for (int j = 0; j < (grphx.im_bfont->h / 8); j++) { for (int i = 0; i < 16; i++) { @@ -154,25 +174,31 @@ void Graphics::Makebfont() SDL_Surface* temp = GetSubSurface(grphx.im_bfont,i*8,j*8,8,8); bfont.push_back(temp); - temp = GetSubSurface(grphx.im_bfont,i*8,j*8,8,8); - SDL_Surface* TempFlipped = FlipSurfaceVerticle(temp); - + SDL_Surface* TempFlipped = FlipSurfaceVerticle(temp); flipbfont.push_back(TempFlipped); - SDL_FreeSurface(temp); - } } - //Ok, now we work out the lengths (this data string cortesy of a program I wrote!) - for (int i = 0; i < 256; i++) - { - bfontlen.push_back(6); + unsigned char* charmap = NULL; + size_t length; + FILESYSTEM_loadFileToMemory("graphics/font.txt", &charmap, &length); + if (charmap != NULL) { + unsigned char* current = charmap; + unsigned char* end = charmap + length; + int pos = 0; + while (current != end) { + int codepoint = utf8::unchecked::next(current); + font_positions[codepoint] = pos; + ++pos; + } } +} - - for(int k = 0; k < 96; k++) - { - bfontlen[k + 32] = 8;// int(maprow[k]); +int Graphics::bfontlen(char32_t ch) { + if (ch < 32) { + return 6; + } else { + return 8; } } @@ -250,13 +276,9 @@ void Graphics::Print( int _x, int _y, std::string _s, int r, int g, int b, bool _x = ((160 ) - ((len(_s)) / 2)); int bfontpos = 0; int curr; - for (unsigned int i = 0; i < _s.length(); i++) - { - curr = (_s.c_str())[i]; - if (curr > 255 || curr < 0) - { - curr = '?'; - } + std::string::iterator iter = _s.begin(); + while (iter != _s.end()) { + curr = utf8::unchecked::next(iter); point tpoint; tpoint.x = _x + bfontpos; tpoint.y = _y; @@ -267,13 +289,13 @@ void Graphics::Print( int _x, int _y, std::string _s, int r, int g, int b, bool if (flipmode) { - BlitSurfaceColoured( flipbfont[curr], NULL, backBuffer, &fontRect , ct); + BlitSurfaceColoured( flipbfont[font_idx(curr)], NULL, backBuffer, &fontRect , ct); } else { - BlitSurfaceColoured( bfont[curr], NULL, backBuffer, &fontRect , ct); + BlitSurfaceColoured( bfont[font_idx(curr)], NULL, backBuffer, &fontRect , ct); } - bfontpos+=bfontlen[curr] ; + bfontpos+=bfontlen(curr) ; } } @@ -293,13 +315,9 @@ void Graphics::bigprint( int _x, int _y, std::string _s, int r, int g, int b, b int bfontpos = 0; int curr; - for (unsigned int i = 0; i < _s.length(); i++) - { - curr = (_s.c_str())[i]; - if (curr > 255 || curr < 0) - { - curr = '?'; - } + std::string::iterator iter = _s.begin(); + while (iter != _s.end()) { + curr = utf8::unchecked::next(iter); /* point tpoint; @@ -313,29 +331,29 @@ void Graphics::bigprint( int _x, int _y, std::string _s, int r, int g, int b, b if (flipmode) { - SDL_Surface* tempPrint = ScaleSurfaceSlow(flipbfont[curr], bfont[curr]->w *sc,bfont[curr]->h *sc); + SDL_Surface* tempPrint = ScaleSurfaceSlow(flipbfont[font_idx(curr)], bfont[font_idx(curr)]->w *sc,bfont[font_idx(curr)]->h *sc); SDL_Rect printrect = { Sint16((_x) + bfontpos), Sint16(_y) , Sint16(bfont_rect.w*sc), Sint16(bfont_rect.h * sc)}; BlitSurfaceColoured(tempPrint, NULL, backBuffer, &printrect, ct); SDL_FreeSurface(tempPrint); } else { - SDL_Surface* tempPrint = ScaleSurfaceSlow(bfont[curr], bfont[curr]->w *sc,bfont[curr]->h *sc); + SDL_Surface* tempPrint = ScaleSurfaceSlow(bfont[font_idx(curr)], bfont[font_idx(curr)]->w *sc,bfont[font_idx(curr)]->h *sc); SDL_Rect printrect = { static_cast((_x) + bfontpos), static_cast(_y) , static_cast((bfont_rect.w*sc)+1), static_cast((bfont_rect.h * sc)+1)}; BlitSurfaceColoured(tempPrint, NULL, backBuffer, &printrect, ct); SDL_FreeSurface(tempPrint); } - bfontpos+=bfontlen[curr] *sc; + bfontpos+=bfontlen(curr) *sc; } } int Graphics::len(std::string t) { int bfontpos = 0; - for (unsigned int i = 0; i < t.length(); i++) - { - int cur = (t.c_str())[i]; - bfontpos+= bfontlen[cur] ; + std::string::iterator iter = t.begin(); + while (iter != t.end()) { + int cur = utf8::unchecked::next(iter); + bfontpos += bfontlen(cur); } return bfontpos; } @@ -351,14 +369,9 @@ void Graphics::PrintOff( int _x, int _y, std::string _s, int r, int g, int b, bo if (cen) _x = ((160) - (len(_s) / 2))+_x; int bfontpos = 0; - int curr; - for (unsigned int i = 0; i < _s.length(); i++) - { - curr = (_s.c_str())[i]; - if (curr > 255 || curr < 0) - { - curr = '?'; - } + std::string::iterator iter = _s.begin(); + while (iter != _s.end()) { + int curr = utf8::unchecked::next(iter); point tpoint; tpoint.x = _x + bfontpos; tpoint.y = _y; @@ -369,16 +382,16 @@ void Graphics::PrintOff( int _x, int _y, std::string _s, int r, int g, int b, bo if (flipmode) { - //flipbfont[cur].colorTransform(bfont_rect, ct); - BlitSurfaceColoured( bfont[curr], NULL, backBuffer, &fontRect , ct); + //flipbfont[font_idx(cur)].colorTransform(bfont_rect, ct); + BlitSurfaceColoured( bfont[font_idx(curr)], NULL, backBuffer, &fontRect , ct); } else { - //bfont[cur].colorTransform(bfont_rect, ct); - //backBuffer.copyPixels(bfont[cur], bfont_rect, tpoint); - BlitSurfaceColoured( bfont[curr], NULL, backBuffer, &fontRect , ct); + //bfont[font_idx(cur)].colorTransform(bfont_rect, ct); + //backBuffer.copyPixels(bfont[font_idx(cur)], bfont_rect, tpoint); + BlitSurfaceColoured( bfont[font_idx(curr)], NULL, backBuffer, &fontRect , ct); } - bfontpos+=bfontlen[curr] ; + bfontpos+=bfontlen(curr) ; } } @@ -417,13 +430,9 @@ void Graphics::RPrint( int _x, int _y, std::string _s, int r, int g, int b, bool _x = ((308) - (_s.length() / 2)); int bfontpos = 0; int curr; - for (unsigned int i = 0; i < _s.length(); i++) - { - curr = (_s.c_str())[i]; - if (curr > 255 || curr < 0) - { - curr = '?'; - } + std::string::iterator iter = _s.begin(); + while (iter != _s.end()) { + curr = utf8::unchecked::next(iter); point tpoint; tpoint.x = _x + bfontpos; tpoint.y = _y; @@ -434,16 +443,16 @@ void Graphics::RPrint( int _x, int _y, std::string _s, int r, int g, int b, bool if (flipmode) { - //flipbfont[cur].colorTransform(bfont_rect, ct); - BlitSurfaceColoured( flipbfont[curr], NULL, backBuffer, &fontRect , ct); + //flipbfont[font_idx(cur)].colorTransform(bfont_rect, ct); + BlitSurfaceColoured( flipbfont[font_idx(curr)], NULL, backBuffer, &fontRect , ct); } else { - //bfont[cur].colorTransform(bfont_rect, ct); - //backBuffer.copyPixels(bfont[cur], bfont_rect, tpoint); - BlitSurfaceColoured( bfont[curr], NULL, backBuffer, &fontRect , ct); + //bfont[font_idx(cur)].colorTransform(bfont_rect, ct); + //backBuffer.copyPixels(bfont[font_idx(cur)], bfont_rect, tpoint); + BlitSurfaceColoured( bfont[font_idx(curr)], NULL, backBuffer, &fontRect , ct); } - bfontpos+=bfontlen[curr] ; + bfontpos+=bfontlen(curr) ; } } @@ -1035,7 +1044,8 @@ void Graphics::createtextbox( std::string t, int xp, int yp, int r/*= 255*/, int textbox[m].clear(); textbox[m].line[0] = t; textbox[m].xp = xp; - if (xp == -1) textbox[m].xp = 160 - (((t.length() / 2) + 1) * 8); + int length = utf8::unchecked::distance(t.begin(), t.end()); + if (xp == -1) textbox[m].xp = 160 - (((length / 2) + 1) * 8); textbox[m].yp = yp; textbox[m].initcol(r, g, b); textbox[m].resize(); @@ -3100,28 +3110,24 @@ void Graphics::bigrprint(int x, int y, std::string& t, int r, int g, int b, bool int bfontpos = 0; int cur; - for (size_t i = 0; i < t.length(); i++) - { - cur = (t.c_str())[i]; - if (cur > 255 || cur < 0) - { - cur = '?'; - } + std::string::iterator iter = t.begin(); + while (iter != t.end()) { + cur = utf8::unchecked::next(iter); if (flipmode) { - SDL_Surface* tempPrint = ScaleSurfaceSlow(flipbfont[cur], bfont[cur]->w *sc,bfont[cur]->h *sc); + SDL_Surface* tempPrint = ScaleSurfaceSlow(flipbfont[font_idx(cur)], bfont[font_idx(cur)]->w *sc,bfont[font_idx(cur)]->h *sc); SDL_Rect printrect = { Sint16(x + bfontpos), Sint16(y) , Sint16(bfont_rect.w*sc), Sint16(bfont_rect.h * sc)}; BlitSurfaceColoured(tempPrint, NULL, backBuffer, &printrect ,ct); SDL_FreeSurface(tempPrint); } else { - SDL_Surface* tempPrint = ScaleSurfaceSlow(bfont[cur], bfont[cur]->w *sc,bfont[cur]->h *sc); + SDL_Surface* tempPrint = ScaleSurfaceSlow(bfont[font_idx(cur)], bfont[font_idx(cur)]->w *sc,bfont[font_idx(cur)]->h *sc); SDL_Rect printrect = { Sint16((x) + bfontpos), Sint16(y) , Sint16(bfont_rect.w*sc), Sint16(bfont_rect.h * sc)}; BlitSurfaceColoured(tempPrint, NULL, backBuffer, &printrect, ct); SDL_FreeSurface(tempPrint); } - bfontpos+=bfontlen[cur]* sc; + bfontpos+=bfontlen(cur)* sc; } } diff --git a/desktop_version/src/Graphics.h b/desktop_version/src/Graphics.h index 2aa7c2ac..05445d5c 100644 --- a/desktop_version/src/Graphics.h +++ b/desktop_version/src/Graphics.h @@ -3,6 +3,7 @@ #include "GraphicsResources.h" #include +#include @@ -28,6 +29,9 @@ public: GraphicsResources grphx; + int bfontlen(char32_t ch); + int font_idx(char32_t ch); + void Makebfont(); void drawhuetile(int x, int y, int t, int c); @@ -212,7 +216,6 @@ public: std::vector bfontmask; std::vector flipbfont; std::vector flipbfontmask; - std::vector bfontlen; bool flipmode; bool setflipmode; @@ -280,6 +283,8 @@ public: bool translucentroomname; bool showmousecursor; + + std::map font_positions; }; extern Graphics graphics; diff --git a/desktop_version/src/KeyPoll.cpp b/desktop_version/src/KeyPoll.cpp index 1c3e6c6e..75992cb0 100644 --- a/desktop_version/src/KeyPoll.cpp +++ b/desktop_version/src/KeyPoll.cpp @@ -1,6 +1,7 @@ #include "KeyPoll.h" #include #include +#include void KeyPoll::setSensitivity(int _value) { @@ -94,11 +95,12 @@ void KeyPoll::Poll() if (textentrymode) { - if (evt.key.keysym.sym == SDLK_BACKSPACE) + if (evt.key.keysym.sym == SDLK_BACKSPACE && !keybuffer.empty()) { - bool kbemptybefore = keybuffer.empty(); - keybuffer = keybuffer.substr(0, keybuffer.length() - 1); - if (!kbemptybefore && keybuffer.empty()) + std::string::iterator iter = keybuffer.end(); + utf8::unchecked::prior(iter); + keybuffer = keybuffer.substr(0, iter - keybuffer.begin()); + if (keybuffer.empty()) { linealreadyemptykludge = true; } diff --git a/desktop_version/src/Textbox.cpp b/desktop_version/src/Textbox.cpp index a33512d2..75f93ae1 100644 --- a/desktop_version/src/Textbox.cpp +++ b/desktop_version/src/Textbox.cpp @@ -1,4 +1,5 @@ #include "Textbox.h" +#include textboxclass::textboxclass() { @@ -133,7 +134,8 @@ void textboxclass::resize() max = 0; for (int iter = 0; iter < numlines; iter++) { - if (line[iter].length() > (unsigned int)max) max = line[iter].length(); + unsigned int len = utf8::unchecked::distance(line[iter].begin(), line[iter].end()); + if (len > (unsigned int)max) max = len; } lw = max; diff --git a/desktop_version/src/editor.cpp b/desktop_version/src/editor.cpp index b5c3774f..20d587df 100644 --- a/desktop_version/src/editor.cpp +++ b/desktop_version/src/editor.cpp @@ -16,6 +16,7 @@ #include "FileSystemUtils.h" #include +#include edlevelclass::edlevelclass() { @@ -2625,7 +2626,8 @@ void editorrender( KeyPoll& key, Graphics& dwgfx, Game& game, mapclass& map, ent } else { - fillboxabs(dwgfx, (edentity[i].x*8)- (ed.levx*40*8),(edentity[i].y*8)- (ed.levy*30*8),edentity[i].scriptname.length()*8,8,dwgfx.getRGB(96,96,96)); + int length = utf8::unchecked::distance(edentity[i].scriptname.begin(), edentity[i].scriptname.end()); + fillboxabs(dwgfx, (edentity[i].x*8)- (ed.levx*40*8),(edentity[i].y*8)- (ed.levy*30*8),length*8,8,dwgfx.getRGB(96,96,96)); } dwgfx.bprint((edentity[i].x*8)- (ed.levx*40*8),(edentity[i].y*8)- (ed.levy*30*8), edentity[i].scriptname, 196, 196, 255 - help.glow); break; @@ -3796,7 +3798,7 @@ void editorinput( KeyPoll& key, Graphics& dwgfx, Game& game, mapclass& map, enti ed.sby--; } key.keybuffer=ed.sb[ed.pagey+ed.sby]; - ed.sbx = ed.sb[ed.pagey+ed.sby].length(); + ed.sbx = utf8::unchecked::distance(ed.sb[ed.pagey+ed.sby].begin(), ed.sb[ed.pagey+ed.sby].end()); } if (key.isDown(27)) @@ -3881,7 +3883,7 @@ void editorinput( KeyPoll& key, Graphics& dwgfx, Game& game, mapclass& map, enti } ed.sb[ed.pagey+ed.sby]=key.keybuffer; - ed.sbx = ed.sb[ed.pagey+ed.sby].length(); + ed.sbx = utf8::unchecked::distance(ed.sb[ed.pagey+ed.sby].begin(), ed.sb[ed.pagey+ed.sby].end()); if(!game.press_map && !key.isDown(27)) game.mapheld=false; if (!game.mapheld) @@ -3900,7 +3902,7 @@ void editorinput( KeyPoll& key, Graphics& dwgfx, Game& game, mapclass& map, enti } if(ed.sby+ed.pagey>=ed.sblength) ed.sblength=ed.sby+ed.pagey; key.keybuffer=ed.sb[ed.pagey+ed.sby]; - ed.sbx = ed.sb[ed.pagey+ed.sby].length(); + ed.sbx = utf8::unchecked::distance(ed.sb[ed.pagey+ed.sby].begin(), ed.sb[ed.pagey+ed.sby].end()); } else { diff --git a/third_party/utfcpp/LICENSE b/third_party/utfcpp/LICENSE new file mode 100644 index 00000000..36b7cd93 --- /dev/null +++ b/third_party/utfcpp/LICENSE @@ -0,0 +1,23 @@ +Boost Software License - Version 1.0 - August 17th, 2003 + +Permission is hereby granted, free of charge, to any person or organization +obtaining a copy of the software and accompanying documentation covered by +this license (the "Software") to use, reproduce, display, distribute, +execute, and transmit the Software, and to prepare derivative works of the +Software, and to permit third-parties to whom the Software is furnished to +do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including +the above license grant, this restriction and the following disclaimer, +must be included in all copies of the Software, in whole or in part, and +all derivative works of the Software, unless such copies or derivative +works are solely in the form of machine-executable object code generated by +a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/third_party/utfcpp/source/utf8/core.h b/third_party/utfcpp/source/utf8/core.h new file mode 100644 index 00000000..36581614 --- /dev/null +++ b/third_party/utfcpp/source/utf8/core.h @@ -0,0 +1,287 @@ +// Copyright 2006 Nemanja Trifunovic + +/* +Permission is hereby granted, free of charge, to any person or organization +obtaining a copy of the software and accompanying documentation covered by +this license (the "Software") to use, reproduce, display, distribute, +execute, and transmit the Software, and to prepare derivative works of the +Software, and to permit third-parties to whom the Software is furnished to +do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including +the above license grant, this restriction and the following disclaimer, +must be included in all copies of the Software, in whole or in part, and +all derivative works of the Software, unless such copies or derivative +works are solely in the form of machine-executable object code generated by +a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. +*/ + + +#ifndef UTF8_FOR_CPP_CORE_H_2675DCD0_9480_4c0c_B92A_CC14C027B731 +#define UTF8_FOR_CPP_CORE_H_2675DCD0_9480_4c0c_B92A_CC14C027B731 + +#include + +// Determine the C++ standard version. +// If the user defines UTF_CPP_CPLUSPLUS, use that. +// Otherwise, trust the unreliable predefined macro __cplusplus + +#if !defined UTF_CPP_CPLUSPLUS + #define UTF_CPP_CPLUSPLUS __cplusplus +#endif + +#if UTF_CPP_CPLUSPLUS >= 201103L // C++ 11 or later + #define OVERRIDE override + #define NOEXCEPT noexcept +#else // C++ 98/03 + #define OVERRIDE + #define NOEXCEPT throw() +#endif // C++ 11 or later + + +namespace utf8 +{ + // The typedefs for 8-bit, 16-bit and 32-bit unsigned integers + // You may need to change them to match your system. + // These typedefs have the same names as ones from cstdint, or boost/cstdint + typedef unsigned char uint8_t; + typedef unsigned short uint16_t; + typedef unsigned int uint32_t; + +// Helper code - not intended to be directly called by the library users. May be changed at any time +namespace internal +{ + // Unicode constants + // Leading (high) surrogates: 0xd800 - 0xdbff + // Trailing (low) surrogates: 0xdc00 - 0xdfff + const uint16_t LEAD_SURROGATE_MIN = 0xd800u; + const uint16_t LEAD_SURROGATE_MAX = 0xdbffu; + const uint16_t TRAIL_SURROGATE_MIN = 0xdc00u; + const uint16_t TRAIL_SURROGATE_MAX = 0xdfffu; + const uint16_t LEAD_OFFSET = 0xd7c0u; // LEAD_SURROGATE_MIN - (0x10000 >> 10) + const uint32_t SURROGATE_OFFSET = 0xfca02400u; // 0x10000u - (LEAD_SURROGATE_MIN << 10) - TRAIL_SURROGATE_MIN + + // Maximum valid value for a Unicode code point + const uint32_t CODE_POINT_MAX = 0x0010ffffu; + + template + inline uint8_t mask8(octet_type oc) + { + return static_cast(0xff & oc); + } + template + inline bool is_trail(octet_type oc) + { + return ((utf8::internal::mask8(oc) >> 6) == 0x2); + } + + template + inline bool is_surrogate(u16 cp) + { + return (cp >= LEAD_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX); + } + + template + inline bool is_code_point_valid(u32 cp) + { + return (cp <= CODE_POINT_MAX && !utf8::internal::is_surrogate(cp)); + } + + template + inline typename std::iterator_traits::difference_type + sequence_length(octet_iterator lead_it) + { + uint8_t lead = utf8::internal::mask8(*lead_it); + if (lead < 0x80) + return 1; + else if ((lead >> 5) == 0x6) + return 2; + else if ((lead >> 4) == 0xe) + return 3; + else if ((lead >> 3) == 0x1e) + return 4; + else + return 0; + } + + template + inline bool is_overlong_sequence(uint32_t cp, octet_difference_type length) + { + if (cp < 0x80) { + if (length != 1) + return true; + } + else if (cp < 0x800) { + if (length != 2) + return true; + } + else if (cp < 0x10000) { + if (length != 3) + return true; + } + + return false; + } + + enum utf_error {UTF8_OK, NOT_ENOUGH_ROOM, INVALID_LEAD, INCOMPLETE_SEQUENCE, OVERLONG_SEQUENCE, INVALID_CODE_POINT}; + + /// Helper for get_sequence_x + template + utf_error increase_safely(octet_iterator& it, octet_iterator end) + { + if (++it == end) + return NOT_ENOUGH_ROOM; + + if (!utf8::internal::is_trail(*it)) + return INCOMPLETE_SEQUENCE; + + return UTF8_OK; + } + + #define UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(IT, END) {utf_error ret = increase_safely(IT, END); if (ret != UTF8_OK) return ret;} + + /// get_sequence_x functions decode utf-8 sequences of the length x + template + utf_error get_sequence_1(octet_iterator& it, octet_iterator end, uint32_t& code_point) + { + if (it == end) + return NOT_ENOUGH_ROOM; + + code_point = utf8::internal::mask8(*it); + + return UTF8_OK; + } + + template + utf_error get_sequence_2(octet_iterator& it, octet_iterator end, uint32_t& code_point) + { + if (it == end) + return NOT_ENOUGH_ROOM; + + code_point = utf8::internal::mask8(*it); + + UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end) + + code_point = ((code_point << 6) & 0x7ff) + ((*it) & 0x3f); + + return UTF8_OK; + } + + template + utf_error get_sequence_3(octet_iterator& it, octet_iterator end, uint32_t& code_point) + { + if (it == end) + return NOT_ENOUGH_ROOM; + + code_point = utf8::internal::mask8(*it); + + UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end) + + code_point = ((code_point << 12) & 0xffff) + ((utf8::internal::mask8(*it) << 6) & 0xfff); + + UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end) + + code_point += (*it) & 0x3f; + + return UTF8_OK; + } + + template + utf_error get_sequence_4(octet_iterator& it, octet_iterator end, uint32_t& code_point) + { + if (it == end) + return NOT_ENOUGH_ROOM; + + code_point = utf8::internal::mask8(*it); + + UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end) + + code_point = ((code_point << 18) & 0x1fffff) + ((utf8::internal::mask8(*it) << 12) & 0x3ffff); + + UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end) + + code_point += (utf8::internal::mask8(*it) << 6) & 0xfff; + + UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end) + + code_point += (*it) & 0x3f; + + return UTF8_OK; + } + + #undef UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR + + template + utf_error validate_next(octet_iterator& it, octet_iterator end, uint32_t& code_point) + { + if (it == end) + return NOT_ENOUGH_ROOM; + + // Save the original value of it so we can go back in case of failure + // Of course, it does not make much sense with i.e. stream iterators + octet_iterator original_it = it; + + uint32_t cp = 0; + // Determine the sequence length based on the lead octet + typedef typename std::iterator_traits::difference_type octet_difference_type; + const octet_difference_type length = utf8::internal::sequence_length(it); + + // Get trail octets and calculate the code point + utf_error err = UTF8_OK; + switch (length) { + case 0: + return INVALID_LEAD; + case 1: + err = utf8::internal::get_sequence_1(it, end, cp); + break; + case 2: + err = utf8::internal::get_sequence_2(it, end, cp); + break; + case 3: + err = utf8::internal::get_sequence_3(it, end, cp); + break; + case 4: + err = utf8::internal::get_sequence_4(it, end, cp); + break; + } + + if (err == UTF8_OK) { + // Decoding succeeded. Now, security checks... + if (utf8::internal::is_code_point_valid(cp)) { + if (!utf8::internal::is_overlong_sequence(cp, length)){ + // Passed! Return here. + code_point = cp; + ++it; + return UTF8_OK; + } + else + err = OVERLONG_SEQUENCE; + } + else + err = INVALID_CODE_POINT; + } + + // Failure branch - restore the original value of the iterator + it = original_it; + return err; + } + + template + inline utf_error validate_next(octet_iterator& it, octet_iterator end) { + uint32_t ignored; + return utf8::internal::validate_next(it, end, ignored); + } + +} // namespace internal +} + +#endif // header guard + diff --git a/third_party/utfcpp/source/utf8/unchecked.h b/third_party/utfcpp/source/utf8/unchecked.h new file mode 100644 index 00000000..1accbd4e --- /dev/null +++ b/third_party/utfcpp/source/utf8/unchecked.h @@ -0,0 +1,90 @@ +// Copyright 2006 Nemanja Trifunovic + +/* +Permission is hereby granted, free of charge, to any person or organization +obtaining a copy of the software and accompanying documentation covered by +this license (the "Software") to use, reproduce, display, distribute, +execute, and transmit the Software, and to prepare derivative works of the +Software, and to permit third-parties to whom the Software is furnished to +do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including +the above license grant, this restriction and the following disclaimer, +must be included in all copies of the Software, in whole or in part, and +all derivative works of the Software, unless such copies or derivative +works are solely in the form of machine-executable object code generated by +a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. +*/ + + +#ifndef UTF8_FOR_CPP_UNCHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731 +#define UTF8_FOR_CPP_UNCHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731 + +#include "core.h" + +namespace utf8 +{ + namespace unchecked + { + template + uint32_t next(octet_iterator& it) + { + uint32_t cp = utf8::internal::mask8(*it); + typename std::iterator_traits::difference_type length = utf8::internal::sequence_length(it); + switch (length) { + case 1: + break; + case 2: + it++; + cp = ((cp << 6) & 0x7ff) + ((*it) & 0x3f); + break; + case 3: + ++it; + cp = ((cp << 12) & 0xffff) + ((utf8::internal::mask8(*it) << 6) & 0xfff); + ++it; + cp += (*it) & 0x3f; + break; + case 4: + ++it; + cp = ((cp << 18) & 0x1fffff) + ((utf8::internal::mask8(*it) << 12) & 0x3ffff); + ++it; + cp += (utf8::internal::mask8(*it) << 6) & 0xfff; + ++it; + cp += (*it) & 0x3f; + break; + } + ++it; + return cp; + } + + template + uint32_t prior(octet_iterator& it) + { + while (utf8::internal::is_trail(*(--it))) ; + octet_iterator temp = it; + return utf8::unchecked::next(temp); + } + + template + typename std::iterator_traits::difference_type + distance (octet_iterator first, octet_iterator last) + { + typename std::iterator_traits::difference_type dist; + for (dist = 0; first < last; ++dist) + utf8::unchecked::next(first); + return dist; + } + } // namespace utf8::unchecked +} // namespace utf8 + + +#endif // header guard +