ICU 76.1  76.1
uset.h
Go to the documentation of this file.
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 * Copyright (C) 2002-2014, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 * file name: uset.h
11 * encoding: UTF-8
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * created on: 2002mar07
16 * created by: Markus W. Scherer
17 *
18 * C version of UnicodeSet.
19 */
20 
21 
29 #ifndef __USET_H__
30 #define __USET_H__
31 
32 #include "unicode/utypes.h"
33 #include "unicode/uchar.h"
34 
35 #if U_SHOW_CPLUSPLUS_API
36 #include <string_view>
37 #include "unicode/char16ptr.h"
38 #include "unicode/localpointer.h"
39 #include "unicode/unistr.h"
40 #endif // U_SHOW_CPLUSPLUS_API
41 
42 #ifndef USET_DEFINED
43 
44 #ifndef U_IN_DOXYGEN
45 #define USET_DEFINED
46 #endif
53 typedef struct USet USet;
54 #endif
55 
67 enum {
73 
101 
114 
128 };
129 
185 typedef enum USetSpanCondition {
234 #ifndef U_HIDE_DEPRECATED_API
240 #endif // U_HIDE_DEPRECATED_API
242 
243 enum {
251 };
252 
258 typedef struct USerializedSet {
263  const uint16_t *array;
268  int32_t bmpLength;
273  int32_t length;
280 
281 /*********************************************************************
282  * USet API
283  *********************************************************************/
284 
292 U_CAPI USet* U_EXPORT2
294 
305 U_CAPI USet* U_EXPORT2
307 
317 U_CAPI USet* U_EXPORT2
318 uset_openPattern(const UChar* pattern, int32_t patternLength,
319  UErrorCode* ec);
320 
334 U_CAPI USet* U_EXPORT2
335 uset_openPatternOptions(const UChar* pattern, int32_t patternLength,
336  uint32_t options,
337  UErrorCode* ec);
338 
345 U_CAPI void U_EXPORT2
347 
348 #if U_SHOW_CPLUSPLUS_API
349 
350 U_NAMESPACE_BEGIN
351 
362 
363 U_NAMESPACE_END
364 
365 #endif
366 
376 U_CAPI USet * U_EXPORT2
377 uset_clone(const USet *set);
378 
388 U_CAPI UBool U_EXPORT2
389 uset_isFrozen(const USet *set);
390 
405 U_CAPI void U_EXPORT2
407 
418 U_CAPI USet * U_EXPORT2
420 
430 U_CAPI void U_EXPORT2
432  UChar32 start, UChar32 end);
433 
458 U_CAPI int32_t U_EXPORT2
460  const UChar *pattern, int32_t patternLength,
461  uint32_t options,
462  UErrorCode *status);
463 
486 U_CAPI void U_EXPORT2
488  UProperty prop, int32_t value, UErrorCode* ec);
489 
525 U_CAPI void U_EXPORT2
527  const UChar *prop, int32_t propLength,
528  const UChar *value, int32_t valueLength,
529  UErrorCode* ec);
530 
540 U_CAPI UBool U_EXPORT2
541 uset_resemblesPattern(const UChar *pattern, int32_t patternLength,
542  int32_t pos);
543 
559 U_CAPI int32_t U_EXPORT2
560 uset_toPattern(const USet* set,
561  UChar* result, int32_t resultCapacity,
562  UBool escapeUnprintable,
563  UErrorCode* ec);
564 
573 U_CAPI void U_EXPORT2
575 
588 U_CAPI void U_EXPORT2
589 uset_addAll(USet* set, const USet *additionalSet);
590 
600 U_CAPI void U_EXPORT2
601 uset_addRange(USet* set, UChar32 start, UChar32 end);
602 
612 U_CAPI void U_EXPORT2
613 uset_addString(USet* set, const UChar* str, int32_t strLen);
614 
624 U_CAPI void U_EXPORT2
625 uset_addAllCodePoints(USet* set, const UChar *str, int32_t strLen);
626 
635 U_CAPI void U_EXPORT2
637 
647 U_CAPI void U_EXPORT2
649 
659 U_CAPI void U_EXPORT2
660 uset_removeString(USet* set, const UChar* str, int32_t strLen);
661 
671 U_CAPI void U_EXPORT2
672 uset_removeAllCodePoints(USet *set, const UChar *str, int32_t length);
673 
685 U_CAPI void U_EXPORT2
686 uset_removeAll(USet* set, const USet* removeSet);
687 
700 U_CAPI void U_EXPORT2
701 uset_retain(USet* set, UChar32 start, UChar32 end);
702 
714 U_CAPI void U_EXPORT2
715 uset_retainString(USet *set, const UChar *str, int32_t length);
716 
726 U_CAPI void U_EXPORT2
727 uset_retainAllCodePoints(USet *set, const UChar *str, int32_t length);
728 
741 U_CAPI void U_EXPORT2
742 uset_retainAll(USet* set, const USet* retain);
743 
752 U_CAPI void U_EXPORT2
754 
768 U_CAPI void U_EXPORT2
770 
784 U_CAPI void U_EXPORT2
786 
797 U_CAPI void U_EXPORT2
798 uset_complementString(USet *set, const UChar *str, int32_t length);
799 
809 U_CAPI void U_EXPORT2
810 uset_complementAllCodePoints(USet *set, const UChar *str, int32_t length);
811 
823 U_CAPI void U_EXPORT2
824 uset_complementAll(USet* set, const USet* complement);
825 
833 U_CAPI void U_EXPORT2
835 
864 U_CAPI void U_EXPORT2
865 uset_closeOver(USet* set, int32_t attributes);
866 
873 U_CAPI void U_EXPORT2
875 
883 U_CAPI UBool U_EXPORT2
884 uset_isEmpty(const USet* set);
885 
891 U_CAPI UBool U_EXPORT2
892 uset_hasStrings(const USet *set);
893 
902 U_CAPI UBool U_EXPORT2
903 uset_contains(const USet* set, UChar32 c);
904 
914 U_CAPI UBool U_EXPORT2
915 uset_containsRange(const USet* set, UChar32 start, UChar32 end);
916 
925 U_CAPI UBool U_EXPORT2
926 uset_containsString(const USet* set, const UChar* str, int32_t strLen);
927 
938 U_CAPI int32_t U_EXPORT2
939 uset_indexOf(const USet* set, UChar32 c);
940 
956 U_CAPI UChar32 U_EXPORT2
957 uset_charAt(const USet* set, int32_t charIndex);
958 
974 U_CAPI int32_t U_EXPORT2
975 uset_size(const USet* set);
976 
986 U_CAPI int32_t U_EXPORT2
988 
989 #ifndef U_HIDE_DRAFT_API
990 
999 U_CAPI int32_t U_EXPORT2
1001 
1014 U_CAPI const UChar* U_EXPORT2
1015 uset_getString(const USet *set, int32_t index, int32_t *pLength);
1016 
1017 #endif // U_HIDE_DRAFT_API
1018 
1029 U_CAPI int32_t U_EXPORT2
1031 
1062 U_CAPI int32_t U_EXPORT2
1063 uset_getItem(const USet* set, int32_t itemIndex,
1064  UChar32* start, UChar32* end,
1065  UChar* str, int32_t strCapacity,
1066  UErrorCode* ec);
1067 
1076 U_CAPI UBool U_EXPORT2
1077 uset_containsAll(const USet* set1, const USet* set2);
1078 
1089 U_CAPI UBool U_EXPORT2
1090 uset_containsAllCodePoints(const USet* set, const UChar *str, int32_t strLen);
1091 
1100 U_CAPI UBool U_EXPORT2
1101 uset_containsNone(const USet* set1, const USet* set2);
1102 
1111 U_CAPI UBool U_EXPORT2
1112 uset_containsSome(const USet* set1, const USet* set2);
1113 
1133 U_CAPI int32_t U_EXPORT2
1134 uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
1135 
1154 U_CAPI int32_t U_EXPORT2
1155 uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
1156 
1176 U_CAPI int32_t U_EXPORT2
1177 uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
1178 
1197 U_CAPI int32_t U_EXPORT2
1198 uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
1199 
1208 U_CAPI UBool U_EXPORT2
1209 uset_equals(const USet* set1, const USet* set2);
1210 
1211 /*********************************************************************
1212  * Serialized set API
1213  *********************************************************************/
1214 
1264 U_CAPI int32_t U_EXPORT2
1265 uset_serialize(const USet* set, uint16_t* dest, int32_t destCapacity, UErrorCode* pErrorCode);
1266 
1275 U_CAPI UBool U_EXPORT2
1276 uset_getSerializedSet(USerializedSet* fillSet, const uint16_t* src, int32_t srcLength);
1277 
1285 U_CAPI void U_EXPORT2
1287 
1296 U_CAPI UBool U_EXPORT2
1298 
1308 U_CAPI int32_t U_EXPORT2
1310 
1324 U_CAPI UBool U_EXPORT2
1325 uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex,
1326  UChar32* pStart, UChar32* pEnd);
1327 
1328 #if U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API
1329 #ifndef U_HIDE_DRAFT_API
1330 
1331 namespace U_HEADER_ONLY_NAMESPACE {
1332 
1333 // Note: Not U_COMMON_API, and not a subclass of UMemory, because this is a header-only class,
1334 // not intended to be used via export from the ICU DLL.
1335 
1341 public:
1344 
1346  bool operator==(const USetCodePointIterator &other) const {
1347  // No need to compare rangeCount & end given private constructor
1348  // and assuming we don't compare iterators across the set being modified.
1349  // And comparing rangeIndex is redundant with comparing c.
1350  // We might even skip comparing uset.
1351  // Unless we want operator==() to be "correct" for more than iteration.
1352  return uset == other.uset && c == other.c;
1353  }
1354 
1356  bool operator!=(const USetCodePointIterator &other) const { return !operator==(other); }
1357 
1359  UChar32 operator*() const { return c; }
1360 
1366  if (c < end) {
1367  ++c;
1368  } else if (rangeIndex < rangeCount) {
1369  UErrorCode errorCode = U_ZERO_ERROR;
1370  int32_t result = uset_getItem(uset, rangeIndex, &c, &end, nullptr, 0, &errorCode);
1371  if (U_SUCCESS(errorCode) && result == 0) {
1372  ++rangeIndex;
1373  } else {
1374  c = end = U_SENTINEL;
1375  }
1376  } else {
1377  c = end = U_SENTINEL;
1378  }
1379  return *this;
1380  }
1381 
1387  USetCodePointIterator result(*this);
1388  operator++();
1389  return result;
1390  }
1391 
1392 private:
1393  friend class USetCodePoints;
1394 
1395  USetCodePointIterator(const USet *uset, int32_t rangeIndex, int32_t rangeCount)
1396  : uset(uset), rangeIndex(rangeIndex), rangeCount(rangeCount),
1397  c(U_SENTINEL), end(U_SENTINEL) {
1398  // Fetch the first range.
1399  operator++();
1400  }
1401 
1402  const USet *uset;
1403  int32_t rangeIndex;
1404  int32_t rangeCount;
1405  UChar32 c, end;
1406 };
1407 
1427 public:
1432  USetCodePoints(const USet *uset) : uset(uset), rangeCount(uset_getRangeCount(uset)) {}
1433 
1435  USetCodePoints(const USetCodePoints &other) = default;
1436 
1439  return USetCodePointIterator(uset, 0, rangeCount);
1440  }
1441 
1444  return USetCodePointIterator(uset, rangeCount, rangeCount);
1445  }
1446 
1447 private:
1448  const USet *uset;
1449  int32_t rangeCount;
1450 };
1451 
1461  struct iterator {
1464 
1466  bool operator==(const iterator &other) const { return c == other.c; }
1468  bool operator!=(const iterator &other) const { return !operator==(other); }
1469 
1471  UChar32 operator*() const { return c; }
1472 
1478  ++c;
1479  return *this;
1480  }
1481 
1487  return c++;
1488  }
1489 
1495  };
1496 
1500  CodePointRange(const CodePointRange &other) = default;
1502  size_t size() const { return (rangeEnd + 1) - rangeStart; }
1504  iterator begin() const { return rangeStart; }
1506  iterator end() const { return rangeEnd + 1; }
1507 
1518 };
1519 
1525 public:
1527  USetRangeIterator(const USetRangeIterator &other) = default;
1528 
1530  bool operator==(const USetRangeIterator &other) const {
1531  // No need to compare rangeCount given private constructor
1532  // and assuming we don't compare iterators across the set being modified.
1533  // We might even skip comparing uset.
1534  // Unless we want operator==() to be "correct" for more than iteration.
1535  return uset == other.uset && rangeIndex == other.rangeIndex;
1536  }
1537 
1539  bool operator!=(const USetRangeIterator &other) const { return !operator==(other); }
1540 
1543  if (rangeIndex < rangeCount) {
1544  UChar32 start, end;
1545  UErrorCode errorCode = U_ZERO_ERROR;
1546  int32_t result = uset_getItem(uset, rangeIndex, &start, &end, nullptr, 0, &errorCode);
1547  if (U_SUCCESS(errorCode) && result == 0) {
1548  return CodePointRange(start, end);
1549  }
1550  }
1552  }
1553 
1559  ++rangeIndex;
1560  return *this;
1561  }
1562 
1568  USetRangeIterator result(*this);
1569  ++rangeIndex;
1570  return result;
1571  }
1572 
1573 private:
1574  friend class USetRanges;
1575 
1576  USetRangeIterator(const USet *uset, int32_t rangeIndex, int32_t rangeCount)
1577  : uset(uset), rangeIndex(rangeIndex), rangeCount(rangeCount) {}
1578 
1579  const USet *uset;
1580  int32_t rangeIndex;
1581  int32_t rangeCount;
1582 };
1583 
1607 class USetRanges {
1608 public:
1613  USetRanges(const USet *uset) : uset(uset), rangeCount(uset_getRangeCount(uset)) {}
1614 
1616  USetRanges(const USetRanges &other) = default;
1617 
1620  return USetRangeIterator(uset, 0, rangeCount);
1621  }
1622 
1625  return USetRangeIterator(uset, rangeCount, rangeCount);
1626  }
1627 
1628 private:
1629  const USet *uset;
1630  int32_t rangeCount;
1631 };
1632 
1638 public:
1640  USetStringIterator(const USetStringIterator &other) = default;
1641 
1643  bool operator==(const USetStringIterator &other) const {
1644  // No need to compare count given private constructor
1645  // and assuming we don't compare iterators across the set being modified.
1646  // We might even skip comparing uset.
1647  // Unless we want operator==() to be "correct" for more than iteration.
1648  return uset == other.uset && index == other.index;
1649  }
1650 
1652  bool operator!=(const USetStringIterator &other) const { return !operator==(other); }
1653 
1655  std::u16string_view operator*() const {
1656  if (index < count) {
1657  int32_t length;
1658  const UChar *uchars = uset_getString(uset, index, &length);
1659  // assert uchars != nullptr;
1660  return {ConstChar16Ptr(uchars), static_cast<uint32_t>(length)};
1661  }
1662  return {};
1663  }
1664 
1670  ++index;
1671  return *this;
1672  }
1673 
1679  USetStringIterator result(*this);
1680  ++index;
1681  return result;
1682  }
1683 
1684 private:
1685  friend class USetStrings;
1686 
1687  USetStringIterator(const USet *uset, int32_t index, int32_t count)
1688  : uset(uset), index(index), count(count) {}
1689 
1690  const USet *uset;
1691  int32_t index;
1692  int32_t count;
1693 };
1694 
1716 public:
1721  USetStrings(const USet *uset) : uset(uset), count(uset_getStringCount(uset)) {}
1722 
1724  USetStrings(const USetStrings &other) = default;
1725 
1728  return USetStringIterator(uset, 0, count);
1729  }
1730 
1733  return USetStringIterator(uset, count, count);
1734  }
1735 
1736 private:
1737  const USet *uset;
1738  int32_t count;
1739 };
1740 
1746 public:
1748  USetElementIterator(const USetElementIterator &other) = default;
1749 
1751  bool operator==(const USetElementIterator &other) const {
1752  // No need to compare rangeCount & end given private constructor
1753  // and assuming we don't compare iterators across the set being modified.
1754  // We might even skip comparing uset.
1755  // Unless we want operator==() to be "correct" for more than iteration.
1756  return uset == other.uset && c == other.c && index == other.index;
1757  }
1758 
1760  bool operator!=(const USetElementIterator &other) const { return !operator==(other); }
1761 
1763  UnicodeString operator*() const {
1764  if (c >= 0) {
1765  return UnicodeString(c);
1766  } else if (index < totalCount) {
1767  int32_t length;
1768  const UChar *uchars = uset_getString(uset, index - rangeCount, &length);
1769  // assert uchars != nullptr;
1770  return UnicodeString(uchars, length);
1771  } else {
1772  return UnicodeString();
1773  }
1774  }
1775 
1781  if (c < end) {
1782  ++c;
1783  } else if (index < rangeCount) {
1784  UErrorCode errorCode = U_ZERO_ERROR;
1785  int32_t result = uset_getItem(uset, index, &c, &end, nullptr, 0, &errorCode);
1786  if (U_SUCCESS(errorCode) && result == 0) {
1787  ++index;
1788  } else {
1789  c = end = U_SENTINEL;
1790  }
1791  } else if (c >= 0) {
1792  // assert index == rangeCount;
1793  // Switch from the last range to the first string.
1794  c = end = U_SENTINEL;
1795  } else {
1796  ++index;
1797  }
1798  return *this;
1799  }
1800 
1806  USetElementIterator result(*this);
1807  operator++();
1808  return result;
1809  }
1810 
1811 private:
1812  friend class USetElements;
1813 
1814  USetElementIterator(const USet *uset, int32_t index, int32_t rangeCount, int32_t totalCount)
1815  : uset(uset), index(index), rangeCount(rangeCount), totalCount(totalCount),
1816  c(U_SENTINEL), end(U_SENTINEL) {
1817  if (index < rangeCount) {
1818  // Fetch the first range.
1819  operator++();
1820  }
1821  // Otherwise don't move beyond the (index - rangeCount)-th string.
1822  }
1823 
1824  const USet *uset;
1825  int32_t index;
1827  int32_t rangeCount;
1837  int32_t totalCount;
1838  UChar32 c, end;
1839 };
1840 
1866 public:
1871  USetElements(const USet *uset)
1872  : uset(uset), rangeCount(uset_getRangeCount(uset)),
1873  stringCount(uset_getStringCount(uset)) {}
1874 
1876  USetElements(const USetElements &other) = default;
1877 
1880  return USetElementIterator(uset, 0, rangeCount, rangeCount + stringCount);
1881  }
1882 
1885  return USetElementIterator(uset, rangeCount + stringCount, rangeCount, rangeCount + stringCount);
1886  }
1887 
1888 private:
1889  const USet *uset;
1890  int32_t rangeCount, stringCount;
1891 };
1892 
1893 } // namespace U_HEADER_ONLY_NAMESPACE
1894 
1895 #endif // U_HIDE_DRAFT_API
1896 #endif // U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API
1897 
1898 #endif // __USET_H__
C++ API: char16_t pointer wrappers with implicit conversion from bit-compatible raw pointer types.
"Smart pointer" class, closes a USet via uset_close().
Iterator returned by USetCodePoints.
Definition: uset.h:1340
bool operator==(const USetCodePointIterator &other) const
Definition: uset.h:1346
bool operator!=(const USetCodePointIterator &other) const
Definition: uset.h:1356
USetCodePointIterator operator++(int)
Post-increment.
Definition: uset.h:1386
USetCodePointIterator(const USetCodePointIterator &other)=default
USetCodePointIterator & operator++()
Pre-increment.
Definition: uset.h:1365
C++ "range" for iterating over the code points of a USet.
Definition: uset.h:1426
USetCodePoints(const USetCodePoints &other)=default
USetCodePointIterator end() const
Definition: uset.h:1443
USetCodePoints(const USet *uset)
Constructs a C++ "range" object over the code points of the USet.
Definition: uset.h:1432
USetCodePointIterator begin() const
Definition: uset.h:1438
Iterator returned by USetElements.
Definition: uset.h:1745
USetElementIterator & operator++()
Pre-increment.
Definition: uset.h:1780
USetElementIterator operator++(int)
Post-increment.
Definition: uset.h:1805
bool operator==(const USetElementIterator &other) const
Definition: uset.h:1751
bool operator!=(const USetElementIterator &other) const
Definition: uset.h:1760
USetElementIterator(const USetElementIterator &other)=default
A C++ "range" for iterating over all of the elements of a USet.
Definition: uset.h:1865
USetElements(const USetElements &other)=default
USetElementIterator end() const
Definition: uset.h:1884
USetElementIterator begin() const
Definition: uset.h:1879
USetElements(const USet *uset)
Constructs a C++ "range" object over all of the elements of the USet.
Definition: uset.h:1871
Iterator returned by USetRanges.
Definition: uset.h:1524
USetRangeIterator & operator++()
Pre-increment.
Definition: uset.h:1558
CodePointRange operator*() const
Definition: uset.h:1542
bool operator==(const USetRangeIterator &other) const
Definition: uset.h:1530
USetRangeIterator operator++(int)
Post-increment.
Definition: uset.h:1567
bool operator!=(const USetRangeIterator &other) const
Definition: uset.h:1539
USetRangeIterator(const USetRangeIterator &other)=default
C++ "range" for iterating over the code point ranges of a USet.
Definition: uset.h:1607
USetRangeIterator end() const
Definition: uset.h:1624
USetRangeIterator begin() const
Definition: uset.h:1619
USetRanges(const USet *uset)
Constructs a C++ "range" object over the code point ranges of the USet.
Definition: uset.h:1613
USetRanges(const USetRanges &other)=default
Iterator returned by USetStrings.
Definition: uset.h:1637
USetStringIterator(const USetStringIterator &other)=default
bool operator!=(const USetStringIterator &other) const
Definition: uset.h:1652
std::u16string_view operator*() const
Definition: uset.h:1655
USetStringIterator operator++(int)
Post-increment.
Definition: uset.h:1678
USetStringIterator & operator++()
Pre-increment.
Definition: uset.h:1669
bool operator==(const USetStringIterator &other) const
Definition: uset.h:1643
C++ "range" for iterating over the empty and multi-character strings of a USet.
Definition: uset.h:1715
USetStrings(const USetStrings &other)=default
USetStringIterator begin() const
Definition: uset.h:1727
USetStringIterator end() const
Definition: uset.h:1732
USetStrings(const USet *uset)
Constructs a C++ "range" object over the strings of the USet.
Definition: uset.h:1721
C++ API: "Smart pointers" for use with and in ICU4C C++ code.
#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction)
"Smart pointer" definition macro, deletes objects via the closeFunction.
Definition: localpointer.h:550
A serialized form of a Unicode set.
Definition: uset.h:258
uint16_t staticArray[USET_SERIALIZED_STATIC_ARRAY_CAPACITY]
A small buffer for the array to reduce memory allocations.
Definition: uset.h:278
int32_t bmpLength
The length of the array that contains BMP characters.
Definition: uset.h:268
const uint16_t * array
The serialized Unicode Set.
Definition: uset.h:263
int32_t length
The total length of the array.
Definition: uset.h:273
bool operator!=(const iterator &other) const
Definition: uset.h:1468
UChar32 c
The current code point in the range.
Definition: uset.h:1494
iterator operator++(int)
Post-increment.
Definition: uset.h:1486
iterator & operator++()
Pre-increment.
Definition: uset.h:1477
bool operator==(const iterator &other) const
Definition: uset.h:1466
A contiguous range of code points in a USet/UnicodeSet.
Definition: uset.h:1459
CodePointRange(UChar32 start, UChar32 end)
Definition: uset.h:1498
UChar32 rangeEnd
Inclusive end of a USet/UnicodeSet range of code points.
Definition: uset.h:1517
CodePointRange(const CodePointRange &other)=default
UChar32 rangeStart
Start of a USet/UnicodeSet range of code points.
Definition: uset.h:1512
C API: Unicode Properties.
UProperty
Selection constants for Unicode properties.
Definition: uchar.h:196
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:427
int8_t UBool
The ICU boolean type, a signed-byte integer.
Definition: umachine.h:247
#define U_CAPI
This is used to declare a function as a public ICU C API.
Definition: umachine.h:110
char16_t UChar
The base type for UTF-16 code units and pointers.
Definition: umachine.h:378
#define U_SENTINEL
This value is intended for sentinel values for APIs that (take or) return single code points (UChar32...
Definition: umachine.h:447
C++ API: Unicode String.
U_CAPI UBool uset_containsAll(const USet *set1, const USet *set2)
Returns true if set1 contains all the characters and strings of set2.
U_CAPI UBool uset_hasStrings(const USet *set)
U_CAPI int32_t uset_getStringCount(const USet *set)
U_CAPI UBool uset_serializedContains(const USerializedSet *set, UChar32 c)
Returns true if the given USerializedSet contains the given character.
U_CAPI UBool uset_getSerializedSet(USerializedSet *fillSet, const uint16_t *src, int32_t srcLength)
Given a serialized array, fill in the given serialized set object.
U_CAPI void uset_set(USet *set, UChar32 start, UChar32 end)
Causes the USet object to represent the range start - end.
U_CAPI void uset_addString(USet *set, const UChar *str, int32_t strLen)
Adds the given string to the given USet.
U_CAPI UBool uset_containsString(const USet *set, const UChar *str, int32_t strLen)
Returns true if the given USet contains the given string.
@ USET_IGNORE_SPACE
Ignore white space within patterns unless quoted or escaped.
Definition: uset.h:72
@ USET_ADD_CASE_MAPPINGS
Adds all case mappings for each element in the set.
Definition: uset.h:113
@ USET_CASE_INSENSITIVE
Enable case insensitive matching.
Definition: uset.h:100
@ USET_SIMPLE_CASE_INSENSITIVE
Enable case insensitive matching.
Definition: uset.h:127
U_CAPI UBool uset_isEmpty(const USet *set)
Returns true if the given USet contains no characters and no strings.
U_CAPI int32_t uset_getItemCount(const USet *set)
Returns the number of items in this set.
U_CAPI int32_t uset_getSerializedRangeCount(const USerializedSet *set)
Returns the number of disjoint ranges of characters contained in the given serialized set.
U_CAPI UChar32 uset_charAt(const USet *set, int32_t charIndex)
Returns the character at the given index within this set, where the set is ordered by ascending code ...
U_CAPI USet * uset_open(UChar32 start, UChar32 end)
Creates a USet object that contains the range of characters start..end, inclusive.
U_CAPI void uset_retainAll(USet *set, const USet *retain)
Retains only the elements in this set that are contained in the specified set.
U_CAPI void uset_setSerializedToOne(USerializedSet *fillSet, UChar32 c)
Set the USerializedSet to contain the given character (and nothing else).
U_CAPI void uset_complementString(USet *set, const UChar *str, int32_t length)
Complements the specified string in this set.
U_CAPI void uset_applyIntPropertyValue(USet *set, UProperty prop, int32_t value, UErrorCode *ec)
Modifies the set to contain those code points which have the given value for the given binary or enum...
U_CAPI void uset_complementRange(USet *set, UChar32 start, UChar32 end)
Complements the specified range in this set.
U_CAPI UBool uset_resemblesPattern(const UChar *pattern, int32_t patternLength, int32_t pos)
Return true if the given position, in the given pattern, appears to be the start of a UnicodeSet patt...
U_CAPI void uset_addRange(USet *set, UChar32 start, UChar32 end)
Adds the given range of characters to the given USet.
U_CAPI void uset_add(USet *set, UChar32 c)
Adds the given character to the given USet.
U_CAPI void uset_complementAllCodePoints(USet *set, const UChar *str, int32_t length)
Complements EACH of the characters in this string.
U_CAPI int32_t uset_size(const USet *set)
Returns the number of characters and strings contained in this set.
U_CAPI int32_t uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition)
Returns the length of the initial substring of the input string which consists only of characters and...
U_CAPI void uset_removeAllStrings(USet *set)
Remove all strings from this set.
U_CAPI void uset_clear(USet *set)
Removes all of the elements from this set.
U_CAPI void uset_removeAllCodePoints(USet *set, const UChar *str, int32_t length)
Removes EACH of the characters in this string.
USetSpanCondition
Argument values for whether span() and similar functions continue while the current character is cont...
Definition: uset.h:185
@ USET_SPAN_NOT_CONTAINED
Continues a span() while there is no set element at the current position.
Definition: uset.h:198
@ USET_SPAN_CONTAINED
Spans the longest substring that is a concatenation of set elements (characters or strings).
Definition: uset.h:213
@ USET_SPAN_CONDITION_COUNT
One more than the last span condition.
Definition: uset.h:239
@ USET_SPAN_SIMPLE
Continues a span() while there is a set element at the current position.
Definition: uset.h:233
U_CAPI void uset_remove(USet *set, UChar32 c)
Removes the given character from the given USet.
U_CAPI void uset_removeRange(USet *set, UChar32 start, UChar32 end)
Removes the given range of characters from the given USet.
U_CAPI UBool uset_equals(const USet *set1, const USet *set2)
Returns true if set1 contains all of the characters and strings of set2, and vis versa.
U_CAPI UBool uset_isFrozen(const USet *set)
Determines whether the set has been frozen (made immutable) or not.
U_CAPI int32_t uset_getRangeCount(const USet *set)
U_CAPI USet * uset_clone(const USet *set)
Returns a copy of this object.
U_CAPI UBool uset_containsAllCodePoints(const USet *set, const UChar *str, int32_t strLen)
Returns true if this set contains all the characters of the given string.
U_CAPI USet * uset_openPattern(const UChar *pattern, int32_t patternLength, UErrorCode *ec)
Creates a set from the given pattern.
U_CAPI int32_t uset_serialize(const USet *set, uint16_t *dest, int32_t destCapacity, UErrorCode *pErrorCode)
Serializes this set into an array of 16-bit integers.
U_CAPI int32_t uset_indexOf(const USet *set, UChar32 c)
Returns the index of the given character within this set, where the set is ordered by ascending code ...
U_CAPI void uset_freeze(USet *set)
Freeze the set (make it immutable).
U_CAPI int32_t uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition)
Returns the length of the initial substring of the input string which consists only of characters and...
U_CAPI const UChar * uset_getString(const USet *set, int32_t index, int32_t *pLength)
Returns the index-th string (empty or multi-character) in the set.
struct USerializedSet USerializedSet
A serialized form of a Unicode set.
U_CAPI void uset_retain(USet *set, UChar32 start, UChar32 end)
Retain only the elements in this set that are contained in the specified range.
U_CAPI int32_t uset_toPattern(const USet *set, UChar *result, int32_t resultCapacity, UBool escapeUnprintable, UErrorCode *ec)
Returns a string representation of this set.
U_CAPI int32_t uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition)
Returns the start of the trailing substring of the input string which consists only of characters and...
U_CAPI void uset_complement(USet *set)
This is equivalent to uset_complementRange(set, 0, 0x10FFFF).
@ USET_SERIALIZED_STATIC_ARRAY_CAPACITY
Capacity of USerializedSet::staticArray.
Definition: uset.h:250
U_CAPI UBool uset_containsSome(const USet *set1, const USet *set2)
Returns true if set1 contains some of the characters and strings of set2.
U_CAPI int32_t uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition)
Returns the start of the trailing substring of the input string which consists only of characters and...
U_CAPI UBool uset_containsNone(const USet *set1, const USet *set2)
Returns true if set1 contains none of the characters and strings of set2.
U_CAPI void uset_retainAllCodePoints(USet *set, const UChar *str, int32_t length)
Retains EACH of the characters in this string.
U_CAPI UBool uset_contains(const USet *set, UChar32 c)
Returns true if the given USet contains the given character.
U_CAPI int32_t uset_applyPattern(USet *set, const UChar *pattern, int32_t patternLength, uint32_t options, UErrorCode *status)
Modifies the set to represent the set specified by the given pattern.
U_CAPI void uset_complementAll(USet *set, const USet *complement)
Complements in this set all elements contained in the specified set.
U_CAPI void uset_applyPropertyAlias(USet *set, const UChar *prop, int32_t propLength, const UChar *value, int32_t valueLength, UErrorCode *ec)
Modifies the set to contain those code points which have the given value for the given property.
U_CAPI void uset_close(USet *set)
Disposes of the storage used by a USet object.
struct USet USet
USet is the C API type corresponding to C++ class UnicodeSet.
Definition: uset.h:53
U_CAPI USet * uset_openPatternOptions(const UChar *pattern, int32_t patternLength, uint32_t options, UErrorCode *ec)
Creates a set from the given pattern.
U_CAPI USet * uset_openEmpty(void)
Create an empty USet object.
U_CAPI void uset_addAll(USet *set, const USet *additionalSet)
Adds all of the elements in the specified set to this set if they're not already present.
U_CAPI void uset_removeString(USet *set, const UChar *str, int32_t strLen)
Removes the given string to the given USet.
U_CAPI void uset_compact(USet *set)
Reallocate this objects internal structures to take up the least possible space, without changing thi...
U_CAPI int32_t uset_getItem(const USet *set, int32_t itemIndex, UChar32 *start, UChar32 *end, UChar *str, int32_t strCapacity, UErrorCode *ec)
Returns an item of this set.
U_CAPI void uset_closeOver(USet *set, int32_t attributes)
Close this set over the given attribute.
U_CAPI void uset_addAllCodePoints(USet *set, const UChar *str, int32_t strLen)
Adds each of the characters in this string to the set.
U_CAPI USet * uset_cloneAsThawed(const USet *set)
Clone the set and make the clone mutable.
U_CAPI void uset_retainString(USet *set, const UChar *str, int32_t length)
Retains only the specified string from this set if it is present.
U_CAPI void uset_removeAll(USet *set, const USet *removeSet)
Removes from this set all of its elements that are contained in the specified set.
U_CAPI UBool uset_containsRange(const USet *set, UChar32 start, UChar32 end)
Returns true if the given USet contains all characters c where start <= c && c <= end.
U_CAPI UBool uset_getSerializedRange(const USerializedSet *set, int32_t rangeIndex, UChar32 *pStart, UChar32 *pEnd)
Returns a range of characters contained in the given serialized set.
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Definition: utypes.h:430
@ U_ZERO_ERROR
No error, no warning.
Definition: utypes.h:465
#define U_SUCCESS(x)
Does the error code indicate success?
Definition: utypes.h:742