18 return (b & 0x80) == 0x80;
25 return (b & 0xC0) == 0x80;
32 const char* next = cur;
51 sequenceLength =
static_cast<int>(next - cur);
58 const char* cur = start;
59 int sequenceLength = 0;
61 for (
int i = 0; i < numCharacters; ++i)
66 return static_cast<int>(cur - start);
82 inline bool IsValidUtf8(
const char* str,
bool& hasFourByteChars)
106 bool expectStartByte =
true;
108 uint8_t curNumBytes = 0;
109 uint8_t curIndex = 0;
110 const char* curCharStartByte = str;
111 hasFourByteChars =
false;
121 curCharStartByte = c;
124 if ((*c & 0b10000000) == 0b00000000)
128 expectStartByte =
true;
130 else if ((*c & 0b11100000) == 0b11000000)
132 if ((*c & 0b11111110) == 0b11000000)
139 expectStartByte =
false;
141 else if ((*c & 0b11110000) == 0b11100000)
144 expectStartByte =
false;
146 else if ((*c & 0b11111000) == 0b11110000)
148 if ((*c & 0b11111111) == 0b11110101)
154 if ((*c & 0b11111110) == 0b11110110)
161 expectStartByte =
false;
164 hasFourByteChars =
true;
174 bool isValidContinuationCodeUnit =
false;
177 if (curIndex == 1 && ((*curCharStartByte & 0b11111111) == 0b11100000))
180 if ((*c & 0b11100000) == 0b10100000)
182 isValidContinuationCodeUnit =
true;
185 else if (curIndex == 1 && ((*curCharStartByte & 0b11111111) == 0b11101101))
188 if ((*c & 0b11100000) == 0b10000000)
190 isValidContinuationCodeUnit =
true;
193 else if (curIndex == 1 && ((*curCharStartByte & 0b11111111) == 0b11110000))
196 if (((*c & 0b11000000) == 0b10000000) && ((*c & 0b11110000) != 0b10000000))
198 isValidContinuationCodeUnit =
true;
201 else if (curIndex == 1 && ((*curCharStartByte & 0b11111111) == 0b11110100))
204 if ((*c & 0b11110000) == 0b10000000)
206 isValidContinuationCodeUnit =
true;
212 if ((*c & 0b11000000) == 0b10000000)
214 isValidContinuationCodeUnit =
true;
218 if (isValidContinuationCodeUnit)
221 if (curIndex == (curNumBytes - 1))
223 expectStartByte =
true;
237 return expectStartByte;
bool IsUtf8SequenceByte(char b)
Definition: utf8.h:22
int CountUtf8Bytes(const char *start, int numCharacters)
Definition: utf8.h:56
bool IsUtf8StartByte(char b)
Definition: utf8.h:15
JSON (JavaScript Object Notation).
Definition: adsapi.h:16
bool IsValidUtf8(const char *str, bool &hasFourByteChars)
Definition: utf8.h:82
const char * AdvanceToNextUtf8Character(const char *cur, int &sequenceLength)
Definition: utf8.h:28