OpenShot Audio Library | OpenShotAudio  0.6.0
juce_CharPointer_UTF8.h
1 /*
2  ==============================================================================
3 
4  This file is part of the JUCE library.
5  Copyright (c) 2022 - Raw Material Software Limited
6 
7  JUCE is an open source library subject to commercial or open-source
8  licensing.
9 
10  The code included in this file is provided under the terms of the ISC license
11  http://www.isc.org/downloads/software-support-policy/isc-license. Permission
12  To use, copy, modify, and/or distribute this software for any purpose with or
13  without fee is hereby granted provided that the above copyright notice and
14  this permission notice appear in all copies.
15 
16  JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
17  EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
18  DISCLAIMED.
19 
20  ==============================================================================
21 */
22 
23 namespace juce
24 {
25 
26 //==============================================================================
34 class CharPointer_UTF8 final
35 {
36 public:
37  using CharType = char;
38 
39  explicit CharPointer_UTF8 (const CharType* rawPointer) noexcept
40  : data (const_cast<CharType*> (rawPointer))
41  {
42  }
43 
44  CharPointer_UTF8 (const CharPointer_UTF8& other) = default;
45 
46  CharPointer_UTF8 operator= (CharPointer_UTF8 other) noexcept
47  {
48  data = other.data;
49  return *this;
50  }
51 
52  CharPointer_UTF8 operator= (const CharType* text) noexcept
53  {
54  data = const_cast<CharType*> (text);
55  return *this;
56  }
57 
59  bool operator== (CharPointer_UTF8 other) const noexcept { return data == other.data; }
60  bool operator!= (CharPointer_UTF8 other) const noexcept { return data != other.data; }
61  bool operator<= (CharPointer_UTF8 other) const noexcept { return data <= other.data; }
62  bool operator< (CharPointer_UTF8 other) const noexcept { return data < other.data; }
63  bool operator>= (CharPointer_UTF8 other) const noexcept { return data >= other.data; }
64  bool operator> (CharPointer_UTF8 other) const noexcept { return data > other.data; }
65 
67  CharType* getAddress() const noexcept { return data; }
68 
70  operator const CharType*() const noexcept { return data; }
71 
73  bool isEmpty() const noexcept { return *data == 0; }
74 
76  bool isNotEmpty() const noexcept { return *data != 0; }
77 
79  juce_wchar operator*() const noexcept
80  {
81  auto byte = (signed char) *data;
82 
83  if (byte >= 0)
84  return (juce_wchar) (uint8) byte;
85 
86  uint32 n = (uint32) (uint8) byte;
87  uint32 mask = 0x7f;
88  uint32 bit = 0x40;
89  int numExtraValues = 0;
90 
91  while ((n & bit) != 0 && bit > 0x8)
92  {
93  mask >>= 1;
94  ++numExtraValues;
95  bit >>= 1;
96  }
97 
98  n &= mask;
99 
100  for (int i = 1; i <= numExtraValues; ++i)
101  {
102  auto nextByte = (uint32) (uint8) data[i];
103 
104  if ((nextByte & 0xc0) != 0x80)
105  break;
106 
107  n <<= 6;
108  n |= (nextByte & 0x3f);
109  }
110 
111  return (juce_wchar) n;
112  }
113 
116  {
117  jassert (*data != 0); // trying to advance past the end of the string?
118  auto n = (signed char) *data++;
119 
120  if (n < 0)
121  {
122  uint8 bit = 0x40;
123 
124  while ((static_cast<uint8> (n) & bit) != 0 && bit > 0x8)
125  {
126  ++data;
127  bit = static_cast<uint8> (bit >> 1);
128  }
129  }
130 
131  return *this;
132  }
133 
136  {
137  int count = 0;
138 
139  while ((*--data & 0xc0) == 0x80 && ++count < 4)
140  {}
141 
142  return *this;
143  }
144 
147  juce_wchar getAndAdvance() noexcept
148  {
149  auto byte = (signed char) *data++;
150 
151  if (byte >= 0)
152  return (juce_wchar) (uint8) byte;
153 
154  uint32 n = (uint32) (uint8) byte;
155  uint32 mask = 0x7f;
156  uint32 bit = 0x40;
157  int numExtraValues = 0;
158 
159  while ((n & bit) != 0 && bit > 0x8)
160  {
161  mask >>= 1;
162  ++numExtraValues;
163  bit >>= 1;
164  }
165 
166  n &= mask;
167 
168  while (--numExtraValues >= 0)
169  {
170  auto nextByte = (uint32) (uint8) *data;
171 
172  if ((nextByte & 0xc0) != 0x80)
173  break;
174 
175  ++data;
176  n <<= 6;
177  n |= (nextByte & 0x3f);
178  }
179 
180  return (juce_wchar) n;
181  }
182 
185  {
186  CharPointer_UTF8 temp (*this);
187  ++*this;
188  return temp;
189  }
190 
192  void operator+= (int numToSkip) noexcept
193  {
194  if (numToSkip < 0)
195  {
196  while (++numToSkip <= 0)
197  --*this;
198  }
199  else
200  {
201  while (--numToSkip >= 0)
202  ++*this;
203  }
204  }
205 
207  void operator-= (int numToSkip) noexcept
208  {
209  operator+= (-numToSkip);
210  }
211 
213  juce_wchar operator[] (int characterIndex) const noexcept
214  {
215  auto p (*this);
216  p += characterIndex;
217  return *p;
218  }
219 
221  CharPointer_UTF8 operator+ (int numToSkip) const noexcept
222  {
223  auto p (*this);
224  p += numToSkip;
225  return p;
226  }
227 
229  CharPointer_UTF8 operator- (int numToSkip) const noexcept
230  {
231  auto p (*this);
232  p += -numToSkip;
233  return p;
234  }
235 
237  size_t length() const noexcept
238  {
239  auto* d = data;
240  size_t count = 0;
241 
242  for (;;)
243  {
244  auto n = (uint32) (uint8) *d++;
245 
246  if ((n & 0x80) != 0)
247  {
248  while ((*d & 0xc0) == 0x80)
249  ++d;
250  }
251  else if (n == 0)
252  break;
253 
254  ++count;
255  }
256 
257  return count;
258  }
259 
261  size_t lengthUpTo (const size_t maxCharsToCount) const noexcept
262  {
263  return CharacterFunctions::lengthUpTo (*this, maxCharsToCount);
264  }
265 
267  size_t lengthUpTo (const CharPointer_UTF8 end) const noexcept
268  {
269  return CharacterFunctions::lengthUpTo (*this, end);
270  }
271 
275  size_t sizeInBytes() const noexcept
276  {
277  JUCE_BEGIN_IGNORE_WARNINGS_MSVC (6387)
278  jassert (data != nullptr);
279  return strlen (data) + 1;
280  JUCE_END_IGNORE_WARNINGS_MSVC
281  }
282 
286  static size_t getBytesRequiredFor (const juce_wchar charToWrite) noexcept
287  {
288  size_t num = 1;
289  auto c = (uint32) charToWrite;
290 
291  if (c >= 0x80)
292  {
293  ++num;
294  if (c >= 0x800)
295  {
296  ++num;
297  if (c >= 0x10000)
298  ++num;
299  }
300  }
301 
302  return num;
303  }
304 
309  template <class CharPointer>
310  static size_t getBytesRequiredFor (CharPointer text) noexcept
311  {
312  size_t count = 0;
313 
314  while (auto n = text.getAndAdvance())
315  count += getBytesRequiredFor (n);
316 
317  return count;
318  }
319 
322  {
323  return CharPointer_UTF8 (data + strlen (data));
324  }
325 
327  void write (const juce_wchar charToWrite) noexcept
328  {
329  auto c = (uint32) charToWrite;
330 
331  if (c >= 0x80)
332  {
333  int numExtraBytes = 1;
334  if (c >= 0x800)
335  {
336  ++numExtraBytes;
337  if (c >= 0x10000)
338  ++numExtraBytes;
339  }
340 
341  *data++ = (CharType) ((uint32) (0xff << (7 - numExtraBytes)) | (c >> (numExtraBytes * 6)));
342 
343  while (--numExtraBytes >= 0)
344  *data++ = (CharType) (0x80 | (0x3f & (c >> (numExtraBytes * 6))));
345  }
346  else
347  {
348  *data++ = (CharType) c;
349  }
350  }
351 
353  void writeNull() const noexcept
354  {
355  *data = 0;
356  }
357 
359  template <typename CharPointer>
360  void writeAll (const CharPointer src) noexcept
361  {
362  CharacterFunctions::copyAll (*this, src);
363  }
364 
366  void writeAll (const CharPointer_UTF8 src) noexcept
367  {
368  auto* s = src.data;
369 
370  while ((*data = *s) != 0)
371  {
372  ++data;
373  ++s;
374  }
375  }
376 
381  template <typename CharPointer>
382  size_t writeWithDestByteLimit (const CharPointer src, const size_t maxDestBytes) noexcept
383  {
384  return CharacterFunctions::copyWithDestByteLimit (*this, src, maxDestBytes);
385  }
386 
391  template <typename CharPointer>
392  void writeWithCharLimit (const CharPointer src, const int maxChars) noexcept
393  {
394  CharacterFunctions::copyWithCharLimit (*this, src, maxChars);
395  }
396 
398  template <typename CharPointer>
399  int compare (const CharPointer other) const noexcept
400  {
401  return CharacterFunctions::compare (*this, other);
402  }
403 
405  template <typename CharPointer>
406  int compareUpTo (const CharPointer other, const int maxChars) const noexcept
407  {
408  return CharacterFunctions::compareUpTo (*this, other, maxChars);
409  }
410 
412  template <typename CharPointer>
413  int compareIgnoreCase (const CharPointer other) const noexcept
414  {
415  return CharacterFunctions::compareIgnoreCase (*this, other);
416  }
417 
419  int compareIgnoreCase (const CharPointer_UTF8 other) const noexcept
420  {
421  return CharacterFunctions::compareIgnoreCase (*this, other);
422  }
423 
425  template <typename CharPointer>
426  int compareIgnoreCaseUpTo (const CharPointer other, const int maxChars) const noexcept
427  {
428  return CharacterFunctions::compareIgnoreCaseUpTo (*this, other, maxChars);
429  }
430 
432  template <typename CharPointer>
433  int indexOf (const CharPointer stringToFind) const noexcept
434  {
435  return CharacterFunctions::indexOf (*this, stringToFind);
436  }
437 
439  int indexOf (const juce_wchar charToFind) const noexcept
440  {
441  return CharacterFunctions::indexOfChar (*this, charToFind);
442  }
443 
445  int indexOf (const juce_wchar charToFind, const bool ignoreCase) const noexcept
446  {
447  return ignoreCase ? CharacterFunctions::indexOfCharIgnoreCase (*this, charToFind)
448  : CharacterFunctions::indexOfChar (*this, charToFind);
449  }
450 
452  bool isWhitespace() const noexcept { return CharacterFunctions::isWhitespace ((juce_wchar) *(*this)); }
454  bool isDigit() const noexcept { const CharType c = *data; return c >= '0' && c <= '9'; }
456  bool isLetter() const noexcept { return CharacterFunctions::isLetter (operator*()) != 0; }
458  bool isLetterOrDigit() const noexcept { return CharacterFunctions::isLetterOrDigit (operator*()) != 0; }
460  bool isUpperCase() const noexcept { return CharacterFunctions::isUpperCase (operator*()) != 0; }
462  bool isLowerCase() const noexcept { return CharacterFunctions::isLowerCase (operator*()) != 0; }
463 
465  juce_wchar toUpperCase() const noexcept { return CharacterFunctions::toUpperCase (operator*()); }
467  juce_wchar toLowerCase() const noexcept { return CharacterFunctions::toLowerCase (operator*()); }
468 
470  int getIntValue32() const noexcept { return atoi (data); }
471 
473  int64 getIntValue64() const noexcept
474  {
475  #if JUCE_WINDOWS && ! JUCE_MINGW
476  return _atoi64 (data);
477  #else
478  return atoll (data);
479  #endif
480  }
481 
483  double getDoubleValue() const noexcept { return CharacterFunctions::getDoubleValue (*this); }
484 
487 
490 
492  static bool canRepresent (juce_wchar character) noexcept
493  {
494  return ((uint32) character) < (uint32) 0x10ffff;
495  }
496 
498  static bool isValidString (const CharType* dataToTest, int maxBytesToRead)
499  {
500  while (--maxBytesToRead >= 0 && *dataToTest != 0)
501  {
502  auto byte = (signed char) *dataToTest++;
503 
504  if (byte < 0)
505  {
506  int bit = 0x40;
507  int numExtraValues = 0;
508 
509  while ((byte & bit) != 0)
510  {
511  if (bit < 8)
512  return false;
513 
514  ++numExtraValues;
515  bit >>= 1;
516 
517  if (bit == 8 && (numExtraValues > maxBytesToRead
518  || *CharPointer_UTF8 (dataToTest - 1) > 0x10ffff))
519  return false;
520  }
521 
522  if (numExtraValues == 0)
523  return false;
524 
525  maxBytesToRead -= numExtraValues;
526  if (maxBytesToRead < 0)
527  return false;
528 
529  while (--numExtraValues >= 0)
530  if ((*dataToTest++ & 0xc0) != 0x80)
531  return false;
532  }
533  }
534 
535  return true;
536  }
537 
540  {
541  return CharPointer_UTF8 (reinterpret_cast<Atomic<CharType*>&> (data).exchange (newValue.data));
542  }
543 
545  enum
546  {
547  byteOrderMark1 = 0xef,
548  byteOrderMark2 = 0xbb,
549  byteOrderMark3 = 0xbf
550  };
551 
555  static bool isByteOrderMark (const void* possibleByteOrder) noexcept
556  {
557  JUCE_BEGIN_IGNORE_WARNINGS_MSVC (28182)
558  jassert (possibleByteOrder != nullptr);
559  auto c = static_cast<const uint8*> (possibleByteOrder);
560 
561  return c[0] == (uint8) byteOrderMark1
562  && c[1] == (uint8) byteOrderMark2
563  && c[2] == (uint8) byteOrderMark3;
564  JUCE_END_IGNORE_WARNINGS_MSVC
565  }
566 
567 private:
568  CharType* data;
569 };
570 
571 } // namespace juce
int compareIgnoreCase(const CharPointer_UTF8 other) const noexcept
int indexOf(const juce_wchar charToFind) const noexcept
int compareUpTo(const CharPointer other, const int maxChars) const noexcept
void writeAll(const CharPointer src) noexcept
CharPointer_UTF8 findTerminatingNull() const noexcept
int compareIgnoreCaseUpTo(const CharPointer other, const int maxChars) const noexcept
static size_t getBytesRequiredFor(CharPointer text) noexcept
void operator+=(int numToSkip) noexcept
bool isLetterOrDigit() const noexcept
juce_wchar operator*() const noexcept
bool isLetter() const noexcept
void incrementToEndOfWhitespace() noexcept
size_t lengthUpTo(const CharPointer_UTF8 end) const noexcept
juce_wchar getAndAdvance() noexcept
size_t writeWithDestByteLimit(const CharPointer src, const size_t maxDestBytes) noexcept
bool isNotEmpty() const noexcept
size_t lengthUpTo(const size_t maxCharsToCount) const noexcept
CharPointer_UTF8 operator-(int numToSkip) const noexcept
void operator-=(int numToSkip) noexcept
double getDoubleValue() const noexcept
size_t sizeInBytes() const noexcept
CharPointer_UTF8 operator--() noexcept
void writeAll(const CharPointer_UTF8 src) noexcept
void writeNull() const noexcept
bool isEmpty() const noexcept
juce_wchar toUpperCase() const noexcept
static size_t getBytesRequiredFor(const juce_wchar charToWrite) noexcept
int indexOf(const CharPointer stringToFind) const noexcept
CharPointer_UTF8 & operator++() noexcept
int64 getIntValue64() const noexcept
static bool canRepresent(juce_wchar character) noexcept
int indexOf(const juce_wchar charToFind, const bool ignoreCase) const noexcept
int getIntValue32() const noexcept
bool isUpperCase() const noexcept
int compareIgnoreCase(const CharPointer other) const noexcept
CharType * getAddress() const noexcept
bool isLowerCase() const noexcept
static bool isByteOrderMark(const void *possibleByteOrder) noexcept
void writeWithCharLimit(const CharPointer src, const int maxChars) noexcept
CharPointer_UTF8 atomicSwap(const CharPointer_UTF8 newValue)
static bool isValidString(const CharType *dataToTest, int maxBytesToRead)
juce_wchar toLowerCase() const noexcept
juce_wchar operator[](int characterIndex) const noexcept
CharPointer_UTF8 operator+(int numToSkip) const noexcept
int compare(const CharPointer other) const noexcept
bool isWhitespace() const noexcept
bool operator==(CharPointer_UTF8 other) const noexcept
CharPointer_UTF8 findEndOfWhitespace() const noexcept
bool isDigit() const noexcept
void write(const juce_wchar charToWrite) noexcept
size_t length() const noexcept
static void incrementToEndOfWhitespace(Type &text) noexcept
static int compare(juce_wchar char1, juce_wchar char2) noexcept
static juce_wchar toLowerCase(juce_wchar character) noexcept
static size_t copyWithDestByteLimit(DestCharPointerType &dest, SrcCharPointerType src, size_t maxBytesToWrite) noexcept
static int indexOfCharIgnoreCase(Type text, juce_wchar charToFind) noexcept
static int compareIgnoreCaseUpTo(CharPointerType1 s1, CharPointerType2 s2, int maxChars) noexcept
static int indexOfChar(Type text, const juce_wchar charToFind) noexcept
static int compareIgnoreCase(juce_wchar char1, juce_wchar char2) noexcept
static bool isLowerCase(juce_wchar character) noexcept
static bool isLetter(char character) noexcept
static int indexOf(CharPointerType1 textToSearch, const CharPointerType2 substringToLookFor) noexcept
static bool isWhitespace(char character) noexcept
static size_t lengthUpTo(CharPointerType text, const size_t maxCharsToCount) noexcept
static Type findEndOfWhitespace(Type text) noexcept
static void copyWithCharLimit(DestCharPointerType &dest, SrcCharPointerType src, int maxChars) noexcept
static bool isLetterOrDigit(char character) noexcept
static juce_wchar toUpperCase(juce_wchar character) noexcept
static bool isUpperCase(juce_wchar character) noexcept
static double getDoubleValue(CharPointerType text) noexcept
static void copyAll(DestCharPointerType &dest, SrcCharPointerType src) noexcept
static int compareUpTo(CharPointerType1 s1, CharPointerType2 s2, int maxChars) noexcept