OpenShot Audio Library | OpenShotAudio  0.6.0
juce_CharacterFunctions.cpp
1 /*
2  ==============================================================================
3 
4  This file is part of the JUCE library.
5  Copyright (c) 2022 - Raw Material Software Limited
6 
7  JUCE is an open source library subject to commercial or open-source
8  licensing.
9 
10  The code included in this file is provided under the terms of the ISC license
11  http://www.isc.org/downloads/software-support-policy/isc-license. Permission
12  To use, copy, modify, and/or distribute this software for any purpose with or
13  without fee is hereby granted provided that the above copyright notice and
14  this permission notice appear in all copies.
15 
16  JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
17  EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
18  DISCLAIMED.
19 
20  ==============================================================================
21 */
22 
23 namespace juce
24 {
25 
26 JUCE_BEGIN_IGNORE_WARNINGS_MSVC (4514 4996)
27 
28 juce_wchar CharacterFunctions::toUpperCase (const juce_wchar character) noexcept
29 {
30  return (juce_wchar) towupper ((wint_t) character);
31 }
32 
33 juce_wchar CharacterFunctions::toLowerCase (const juce_wchar character) noexcept
34 {
35  return (juce_wchar) towlower ((wint_t) character);
36 }
37 
38 bool CharacterFunctions::isUpperCase (const juce_wchar character) noexcept
39 {
40  #if JUCE_WINDOWS
41  return iswupper ((wint_t) character) != 0;
42  #else
43  return toLowerCase (character) != character;
44  #endif
45 }
46 
47 bool CharacterFunctions::isLowerCase (const juce_wchar character) noexcept
48 {
49  #if JUCE_WINDOWS
50  return iswlower ((wint_t) character) != 0;
51  #else
52  return toUpperCase (character) != character;
53  #endif
54 }
55 
56 JUCE_END_IGNORE_WARNINGS_MSVC
57 
58 //==============================================================================
59 bool CharacterFunctions::isWhitespace (const char character) noexcept
60 {
61  return character == ' ' || (character <= 13 && character >= 9);
62 }
63 
64 bool CharacterFunctions::isWhitespace (const juce_wchar character) noexcept
65 {
66  return iswspace ((wint_t) character) != 0;
67 }
68 
69 bool CharacterFunctions::isDigit (const char character) noexcept
70 {
71  return (character >= '0' && character <= '9');
72 }
73 
74 bool CharacterFunctions::isDigit (const juce_wchar character) noexcept
75 {
76  return iswdigit ((wint_t) character) != 0;
77 }
78 
79 bool CharacterFunctions::isLetter (const char character) noexcept
80 {
81  return (character >= 'a' && character <= 'z')
82  || (character >= 'A' && character <= 'Z');
83 }
84 
85 bool CharacterFunctions::isLetter (const juce_wchar character) noexcept
86 {
87  return iswalpha ((wint_t) character) != 0;
88 }
89 
90 bool CharacterFunctions::isLetterOrDigit (const char character) noexcept
91 {
92  return (character >= 'a' && character <= 'z')
93  || (character >= 'A' && character <= 'Z')
94  || (character >= '0' && character <= '9');
95 }
96 
97 bool CharacterFunctions::isLetterOrDigit (const juce_wchar character) noexcept
98 {
99  return iswalnum ((wint_t) character) != 0;
100 }
101 
102 bool CharacterFunctions::isPrintable (const char character) noexcept
103 {
104  return (character >= ' ' && character <= '~');
105 }
106 
107 bool CharacterFunctions::isPrintable (const juce_wchar character) noexcept
108 {
109  return iswprint ((wint_t) character) != 0;
110 }
111 
112 int CharacterFunctions::getHexDigitValue (const juce_wchar digit) noexcept
113 {
114  auto d = (unsigned int) (digit - '0');
115 
116  if (d < (unsigned int) 10)
117  return (int) d;
118 
119  d += (unsigned int) ('0' - 'a');
120 
121  if (d < (unsigned int) 6)
122  return (int) d + 10;
123 
124  d += (unsigned int) ('a' - 'A');
125 
126  if (d < (unsigned int) 6)
127  return (int) d + 10;
128 
129  return -1;
130 }
131 
132 double CharacterFunctions::mulexp10 (const double value, int exponent) noexcept
133 {
134  if (exponent == 0)
135  return value;
136 
137  if (exactlyEqual (value, 0.0))
138  return 0;
139 
140  const bool negative = (exponent < 0);
141 
142  if (negative)
143  exponent = -exponent;
144 
145  double result = 1.0, power = 10.0;
146 
147  for (int bit = 1; exponent != 0; bit <<= 1)
148  {
149  if ((exponent & bit) != 0)
150  {
151  exponent ^= bit;
152  result *= power;
153 
154  if (exponent == 0)
155  break;
156  }
157 
158  power *= power;
159  }
160 
161  return negative ? (value / result) : (value * result);
162 }
163 
165 {
166  if (c < 0x80 || c >= 0xa0)
167  return (juce_wchar) c;
168 
169  static const uint16 lookup[] = { 0x20AC, 0x0007, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
170  0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x0007, 0x017D, 0x0007,
171  0x0007, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
172  0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x0007, 0x017E, 0x0178 };
173 
174  return (juce_wchar) lookup[c - 0x80];
175 }
176 
177 
178 //==============================================================================
179 //==============================================================================
180 #if JUCE_UNIT_TESTS
181 
182 #define QUOTE(x) #x
183 #define STR(value) QUOTE(value)
184 #define ASYM_CHARPTR_DOUBLE_PAIR(str, value) std::pair<const char*, double> (STR(str), value)
185 #define CHARPTR_DOUBLE_PAIR(value) ASYM_CHARPTR_DOUBLE_PAIR(value, value)
186 #define CHARPTR_DOUBLE_PAIR_COMBOS(value) \
187  CHARPTR_DOUBLE_PAIR(value), \
188  CHARPTR_DOUBLE_PAIR(-value), \
189  ASYM_CHARPTR_DOUBLE_PAIR(+value, value), \
190  ASYM_CHARPTR_DOUBLE_PAIR(000000 ## value, value), \
191  ASYM_CHARPTR_DOUBLE_PAIR(+000 ## value, value), \
192  ASYM_CHARPTR_DOUBLE_PAIR(-0 ## value, -value)
193 
194 namespace characterFunctionsTests
195 {
196 
197 template <typename CharPointerType>
198 MemoryBlock memoryBlockFromCharPtr (const typename CharPointerType::CharType* charPtr)
199 {
200  using CharType = typename CharPointerType::CharType;
201 
202  MemoryBlock result;
203  CharPointerType source (charPtr);
204 
205  result.setSize (CharPointerType::getBytesRequiredFor (source) + sizeof (CharType));
206  CharPointerType dest { (CharType*) result.getData() };
207  dest.writeAll (source);
208  return result;
209 }
210 
211 template <typename FromCharPointerType, typename ToCharPointerType>
212 MemoryBlock convert (const MemoryBlock& source, bool removeNullTerminator = false)
213 {
214  using ToCharType = typename ToCharPointerType ::CharType;
215  using FromCharType = typename FromCharPointerType::CharType;
216 
217  FromCharPointerType sourcePtr { (FromCharType*) source.getData() };
218 
219  std::vector<juce_wchar> sourceChars;
220  size_t requiredSize = 0;
221  juce_wchar c;
222 
223  while ((c = sourcePtr.getAndAdvance()) != '\0')
224  {
225  requiredSize += ToCharPointerType::getBytesRequiredFor (c);
226  sourceChars.push_back (c);
227  }
228 
229  if (! removeNullTerminator)
230  requiredSize += sizeof (ToCharType);
231 
232  MemoryBlock result;
233  result.setSize (requiredSize);
234 
235  ToCharPointerType dest { (ToCharType*) result.getData() };
236 
237  for (auto wc : sourceChars)
238  dest.write (wc);
239 
240  if (! removeNullTerminator)
241  dest.writeNull();
242 
243  return result;
244 }
245 
246 struct SeparatorStrings
247 {
248  std::vector<MemoryBlock> terminals, nulls;
249 };
250 
251 template <typename CharPointerType>
252 SeparatorStrings getSeparators()
253 {
254  jassertfalse;
255  return {};
256 }
257 
258 template <>
259 SeparatorStrings getSeparators<CharPointer_ASCII>()
260 {
261  SeparatorStrings result;
262 
263  const CharPointer_ASCII::CharType* terminalCharPtrs[] = {
264  "", "-", "+", "e", "e+", "E-", "f", " ", ",", ";", "<", "'", "\"", "_", "k",
265  " +", " -", " -e", "-In ", " +n", "n", " r"
266  };
267 
268  for (auto ptr : terminalCharPtrs)
269  result.terminals.push_back (memoryBlockFromCharPtr<CharPointer_ASCII> (ptr));
270 
271  const CharPointer_ASCII::CharType* nullCharPtrs[] = { "." };
272 
273  result.nulls = result.terminals;
274 
275  for (auto ptr : nullCharPtrs)
276  result.nulls.push_back (memoryBlockFromCharPtr<CharPointer_ASCII> (ptr));
277 
278  return result;
279 }
280 
281 template <>
282 SeparatorStrings getSeparators<CharPointer_UTF8>()
283 {
284  auto result = getSeparators<CharPointer_ASCII>();
285 
286  const CharPointer_UTF8::CharType* terminalCharPtrs[] = {
287  "\xe2\x82\xac", // €
288  "\xf0\x90\x90\xB7", // 𐐷
289  "\xf0\x9f\x98\x83", // 😃
290  "\xf0\x9f\x8f\x81\xF0\x9F\x9A\x97" // 🏁🚗
291  };
292 
293  for (auto ptr : terminalCharPtrs)
294  {
295  auto block = memoryBlockFromCharPtr<CharPointer_UTF8> (ptr);
296 
297  for (auto vec : { &result.terminals, &result.nulls })
298  vec->push_back (block);
299  }
300 
301  return result;
302 }
303 
304 template <typename CharPointerType, typename StorageType>
305 SeparatorStrings prefixWithAsciiSeparators (const std::vector<std::vector<StorageType>>& terminalCharPtrs)
306 {
307  auto asciiSeparators = getSeparators<CharPointer_ASCII>();
308 
309  SeparatorStrings result;
310 
311  for (const auto& block : asciiSeparators.terminals)
312  result.terminals.push_back (convert<CharPointer_ASCII, CharPointerType> (block));
313 
314  for (const auto& block : asciiSeparators.nulls)
315  result.nulls.push_back (convert<CharPointer_ASCII, CharPointerType> (block));
316 
317  for (auto& t : terminalCharPtrs)
318  {
319  const auto block = memoryBlockFromCharPtr<CharPointerType> ((typename CharPointerType::CharType*) t.data());
320 
321  for (auto vec : { &result.terminals, &result.nulls })
322  vec->push_back (block);
323  }
324 
325  return result;
326 }
327 
328 template <>
329 SeparatorStrings getSeparators<CharPointer_UTF16>()
330 {
331  const std::vector<std::vector<char16_t>> terminalCharPtrs {
332  { 0x0 },
333  { 0x0076, 0x0 }, // v
334  { 0x20ac, 0x0 }, // €
335  { 0xd801, 0xdc37, 0x0 }, // 𐐷
336  { 0x0065, 0xd83d, 0xde03, 0x0 }, // e😃
337  { 0xd83c, 0xdfc1, 0xd83d, 0xde97, 0x0 } // 🏁🚗
338  };
339 
340  return prefixWithAsciiSeparators<CharPointer_UTF16> (terminalCharPtrs);
341 }
342 
343 template <>
344 SeparatorStrings getSeparators<CharPointer_UTF32>()
345 {
346  const std::vector<std::vector<char32_t>> terminalCharPtrs = {
347  { 0x00000076, 0x0 }, // v
348  { 0x000020aC, 0x0 }, // €
349  { 0x00010437, 0x0 }, // 𐐷
350  { 0x00000065, 0x0001f603, 0x0 }, // e😃
351  { 0x0001f3c1, 0x0001f697, 0x0 } // 🏁🚗
352  };
353 
354  return prefixWithAsciiSeparators<CharPointer_UTF32> (terminalCharPtrs);
355 }
356 
357 template <typename TestFunction>
358 void withAllPrefixesAndSuffixes (const std::vector<MemoryBlock>& prefixes,
359  const std::vector<MemoryBlock>& suffixes,
360  const std::vector<MemoryBlock>& testValues,
361  TestFunction&& test)
362 {
363  for (const auto& prefix : prefixes)
364  {
365  for (const auto& testValue : testValues)
366  {
367  MemoryBlock testBlock = prefix;
368  testBlock.append (testValue.getData(), testValue.getSize());
369 
370  for (const auto& suffix : suffixes)
371  {
372  MemoryBlock data = testBlock;
373  data.append (suffix.getData(), suffix.getSize());
374 
375  test (data, suffix);
376  }
377  }
378  }
379 }
380 
381 template <typename CharPointerType>
382 class CharacterFunctionsTests final : public UnitTest
383 {
384 public:
385  using CharType = typename CharPointerType::CharType;
386 
387  CharacterFunctionsTests()
388  : UnitTest ("CharacterFunctions", UnitTestCategories::text)
389  {}
390 
391  void runTest() override
392  {
393  beginTest ("readDoubleValue");
394 
395  const std::pair<const char*, double> trials[] =
396  {
397  // Integers
398  CHARPTR_DOUBLE_PAIR_COMBOS (0),
399  CHARPTR_DOUBLE_PAIR_COMBOS (3),
400  CHARPTR_DOUBLE_PAIR_COMBOS (4931),
401  CHARPTR_DOUBLE_PAIR_COMBOS (5000),
402  CHARPTR_DOUBLE_PAIR_COMBOS (9862097),
403 
404  // Floating point numbers
405  CHARPTR_DOUBLE_PAIR_COMBOS (0.),
406  CHARPTR_DOUBLE_PAIR_COMBOS (9.),
407  CHARPTR_DOUBLE_PAIR_COMBOS (7.000),
408  CHARPTR_DOUBLE_PAIR_COMBOS (0.2),
409  CHARPTR_DOUBLE_PAIR_COMBOS (.298630),
410  CHARPTR_DOUBLE_PAIR_COMBOS (1.118),
411  CHARPTR_DOUBLE_PAIR_COMBOS (0.9000),
412  CHARPTR_DOUBLE_PAIR_COMBOS (0.0000001),
413  CHARPTR_DOUBLE_PAIR_COMBOS (500.0000001),
414  CHARPTR_DOUBLE_PAIR_COMBOS (9862098.2398604),
415 
416  // Exponents
417  CHARPTR_DOUBLE_PAIR_COMBOS (0e0),
418  CHARPTR_DOUBLE_PAIR_COMBOS (0.e0),
419  CHARPTR_DOUBLE_PAIR_COMBOS (0.00000e0),
420  CHARPTR_DOUBLE_PAIR_COMBOS (.0e7),
421  CHARPTR_DOUBLE_PAIR_COMBOS (0e-5),
422  CHARPTR_DOUBLE_PAIR_COMBOS (2E0),
423  CHARPTR_DOUBLE_PAIR_COMBOS (4.E0),
424  CHARPTR_DOUBLE_PAIR_COMBOS (1.2000000E0),
425  CHARPTR_DOUBLE_PAIR_COMBOS (1.2000000E6),
426  CHARPTR_DOUBLE_PAIR_COMBOS (.398e3),
427  CHARPTR_DOUBLE_PAIR_COMBOS (10e10),
428  CHARPTR_DOUBLE_PAIR_COMBOS (1.4962e+2),
429  CHARPTR_DOUBLE_PAIR_COMBOS (3198693.0973e4),
430  CHARPTR_DOUBLE_PAIR_COMBOS (10973097.2087E-4),
431  CHARPTR_DOUBLE_PAIR_COMBOS (1.3986e00006),
432  CHARPTR_DOUBLE_PAIR_COMBOS (2087.3087e+00006),
433  CHARPTR_DOUBLE_PAIR_COMBOS (6.0872e-00006),
434 
435  CHARPTR_DOUBLE_PAIR_COMBOS (1.7976931348623157e+308),
436  CHARPTR_DOUBLE_PAIR_COMBOS (2.2250738585072014e-308),
437 
438  // Too many sig figs. The parsing routine on MinGW gets the last
439  // significant figure wrong.
440  CHARPTR_DOUBLE_PAIR_COMBOS (17654321098765432.9),
441  CHARPTR_DOUBLE_PAIR_COMBOS (183456789012345678.9),
442  CHARPTR_DOUBLE_PAIR_COMBOS (1934567890123456789.9),
443  CHARPTR_DOUBLE_PAIR_COMBOS (20345678901234567891.9),
444  CHARPTR_DOUBLE_PAIR_COMBOS (10000000000000000303786028427003666890752.000000),
445  CHARPTR_DOUBLE_PAIR_COMBOS (10000000000000000303786028427003666890752e3),
446  CHARPTR_DOUBLE_PAIR_COMBOS (10000000000000000303786028427003666890752e100),
447  CHARPTR_DOUBLE_PAIR_COMBOS (10000000000000000303786028427003666890752.000000e-5),
448  CHARPTR_DOUBLE_PAIR_COMBOS (10000000000000000303786028427003666890752.000005e-40),
449 
450  CHARPTR_DOUBLE_PAIR_COMBOS (1.23456789012345678901234567890),
451  CHARPTR_DOUBLE_PAIR_COMBOS (1.23456789012345678901234567890e-111),
452  };
453 
454  auto asciiToMemoryBlock = [] (const char* asciiPtr, bool removeNullTerminator)
455  {
456  auto block = memoryBlockFromCharPtr<CharPointer_ASCII> (asciiPtr);
457  return convert<CharPointer_ASCII, CharPointerType> (block, removeNullTerminator);
458  };
459 
460  const auto separators = getSeparators<CharPointerType>();
461 
462  for (const auto& trial : trials)
463  {
464  for (const auto& terminal : separators.terminals)
465  {
466  MemoryBlock data { asciiToMemoryBlock (trial.first, true) };
467  data.append (terminal.getData(), terminal.getSize());
468 
469  CharPointerType charPtr { (CharType*) data.getData() };
470  expectEquals (CharacterFunctions::readDoubleValue (charPtr), trial.second);
471  expect (*charPtr == *(CharPointerType ((CharType*) terminal.getData())));
472  }
473  }
474 
475  auto asciiToMemoryBlocks = [&] (const std::vector<const char*>& asciiPtrs, bool removeNullTerminator)
476  {
477  std::vector<MemoryBlock> result;
478 
479  for (auto* ptr : asciiPtrs)
480  result.push_back (asciiToMemoryBlock (ptr, removeNullTerminator));
481 
482  return result;
483  };
484 
485  std::vector<const char*> prefixCharPtrs = { "" , "+", "-" };
486  const auto prefixes = asciiToMemoryBlocks (prefixCharPtrs, true);
487 
488  {
489  std::vector<const char*> nanCharPtrs = { "NaN", "nan", "NAN", "naN" };
490  auto nans = asciiToMemoryBlocks (nanCharPtrs, true);
491 
492  withAllPrefixesAndSuffixes (prefixes, separators.terminals, nans, [this] (const MemoryBlock& data,
493  const MemoryBlock& suffix)
494  {
495  CharPointerType charPtr { (CharType*) data.getData() };
496  expect (std::isnan (CharacterFunctions::readDoubleValue (charPtr)));
497  expect (*charPtr == *(CharPointerType ((CharType*) suffix.getData())));
498  });
499  }
500 
501  {
502  std::vector<const char*> infCharPtrs = { "Inf", "inf", "INF", "InF", "1.0E1024", "1.23456789012345678901234567890e123456789" };
503  auto infs = asciiToMemoryBlocks (infCharPtrs, true);
504 
505  withAllPrefixesAndSuffixes (prefixes, separators.terminals, infs, [this] (const MemoryBlock& data,
506  const MemoryBlock& suffix)
507  {
508  CharPointerType charPtr { (CharType*) data.getData() };
509  auto expected = charPtr[0] == '-' ? -std::numeric_limits<double>::infinity()
510  : std::numeric_limits<double>::infinity();
511  expectEquals (CharacterFunctions::readDoubleValue (charPtr), expected);
512  expect (*charPtr == *(CharPointerType ((CharType*) suffix.getData())));
513  });
514  }
515 
516  {
517  std::vector<const char*> zeroCharPtrs = { "1.0E-400", "1.23456789012345678901234567890e-123456789" };
518  auto zeros = asciiToMemoryBlocks (zeroCharPtrs, true);
519 
520  withAllPrefixesAndSuffixes (prefixes, separators.terminals, zeros, [this] (const MemoryBlock& data,
521  const MemoryBlock& suffix)
522  {
523  CharPointerType charPtr { (CharType*) data.getData() };
524  auto expected = charPtr[0] == '-' ? -0.0 : 0.0;
525  expectEquals (CharacterFunctions::readDoubleValue (charPtr), expected);
526  expect (*charPtr == *(CharPointerType ((CharType*) suffix.getData())));
527  });
528  }
529 
530  {
531  for (const auto& n : separators.nulls)
532  {
533  MemoryBlock data { n.getData(), n.getSize() };
534  CharPointerType charPtr { (CharType*) data.getData() };
535  expectEquals (CharacterFunctions::readDoubleValue (charPtr), 0.0);
536  expect (charPtr == CharPointerType { (CharType*) data.getData() }.findEndOfWhitespace());
537  }
538  }
539  }
540 };
541 
542 static CharacterFunctionsTests<CharPointer_ASCII> characterFunctionsTestsAscii;
543 static CharacterFunctionsTests<CharPointer_UTF8> characterFunctionsTestsUtf8;
544 static CharacterFunctionsTests<CharPointer_UTF16> characterFunctionsTestsUtf16;
545 static CharacterFunctionsTests<CharPointer_UTF32> characterFunctionsTestsUtf32;
546 
547 }
548 
549 #endif
550 
551 } // namespace juce
static juce_wchar toLowerCase(juce_wchar character) noexcept
static bool isDigit(char character) noexcept
static bool isLowerCase(juce_wchar character) noexcept
static bool isLetter(char character) noexcept
static bool isWhitespace(char character) noexcept
static double readDoubleValue(CharPointerType &text) noexcept
static int getHexDigitValue(juce_wchar digit) noexcept
static bool isLetterOrDigit(char character) noexcept
static bool isUpperCase(juce_wchar character) noexcept
static bool isPrintable(char character) noexcept
static juce_wchar getUnicodeCharFromWindows1252Codepage(uint8 windows1252Char) noexcept
void * getData() noexcept
void setSize(size_t newSize, bool initialiseNewSpaceToZero=false)