utf8.h 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120
  1. /*
  2. *
  3. * Embedded Linux library
  4. *
  5. * Copyright (C) 2011-2014 Intel Corporation. All rights reserved.
  6. *
  7. * This library is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * This library is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with this library; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
  20. *
  21. */
  22. #ifndef __ELL_UTF8_H
  23. #define __ELL_UTF8_H
  24. #include <stdbool.h>
  25. #include <wchar.h>
  26. #include <sys/types.h>
  27. #ifdef __cplusplus
  28. extern "C" {
  29. #endif
  30. extern unsigned char l_ascii_table[];
  31. enum l_ascii {
  32. L_ASCII_CNTRL = 0x80,
  33. L_ASCII_PRINT = 0x40,
  34. L_ASCII_PUNCT = 0x20,
  35. L_ASCII_SPACE = 0x10,
  36. L_ASCII_XDIGIT = 0x08,
  37. L_ASCII_UPPER = 0x04,
  38. L_ASCII_LOWER = 0x02,
  39. L_ASCII_DIGIT = 0x01,
  40. L_ASCII_ALPHA = L_ASCII_LOWER | L_ASCII_UPPER,
  41. L_ASCII_ALNUM = L_ASCII_ALPHA | L_ASCII_DIGIT,
  42. L_ASCII_GRAPH = L_ASCII_ALNUM | L_ASCII_PUNCT,
  43. };
  44. #define l_ascii_isalnum(c) \
  45. ((l_ascii_table[(unsigned char) (c)] & L_ASCII_ALNUM) != 0)
  46. #define l_ascii_isalpha(c) \
  47. ((l_ascii_table[(unsigned char) (c)] & L_ASCII_ALPHA) != 0)
  48. #define l_ascii_iscntrl(c) \
  49. ((l_ascii_table[(unsigned char) (c)] & L_ASCII_CNTRL) != 0)
  50. #define l_ascii_isdigit(c) \
  51. ((l_ascii_table[(unsigned char) (c)] & L_ASCII_DIGIT) != 0)
  52. #define l_ascii_isgraph(c) \
  53. ((l_ascii_table[(unsigned char) (c)] & L_ASCII_GRAPH) != 0)
  54. #define l_ascii_islower(c) \
  55. ((l_ascii_table[(unsigned char) (c)] & L_ASCII_LOWER) != 0)
  56. #define l_ascii_isprint(c) \
  57. ((l_ascii_table[(unsigned char) (c)] & L_ASCII_PRINT) != 0)
  58. #define l_ascii_ispunct(c) \
  59. ((l_ascii_table[(unsigned char) (c)] & L_ASCII_PUNCT) != 0)
  60. #define l_ascii_isspace(c) \
  61. ((l_ascii_table[(unsigned char) (c)] & L_ASCII_SPACE) != 0)
  62. #define l_ascii_isupper(c) \
  63. ((l_ascii_table[(unsigned char) (c)] & L_ASCII_UPPER) != 0)
  64. #define l_ascii_isxdigit(c) \
  65. ((l_ascii_table[(unsigned char) (c)] & L_ASCII_XDIGIT) != 0)
  66. #if __STDC_VERSION__ <= 199409L
  67. #define inline __inline__
  68. #endif
  69. static inline __attribute__ ((always_inline))
  70. bool l_ascii_isblank(unsigned char c)
  71. {
  72. if (c == ' ' || c == '\t')
  73. return true;
  74. return false;
  75. }
  76. static inline __attribute__ ((always_inline)) bool l_ascii_isascii(int c)
  77. {
  78. if (c <= 127)
  79. return true;
  80. return false;
  81. }
  82. bool l_utf8_validate(const char *src, size_t len, const char **end);
  83. size_t l_utf8_strlen(const char *str);
  84. int l_utf8_get_codepoint(const char *str, size_t len, wchar_t *cp);
  85. size_t l_utf8_from_wchar(wchar_t c, char *out_buf);
  86. char *l_utf8_from_utf16(const void *utf16, ssize_t utf16_size);
  87. void *l_utf8_to_utf16(const char *utf8, size_t *out_size);
  88. char *l_utf8_from_ucs2be(const void *ucs2be, ssize_t ucs2be_size);
  89. void *l_utf8_to_ucs2be(const char *utf8, size_t *out_size);
  90. #ifdef __cplusplus
  91. }
  92. #endif
  93. #endif /* __ELL_UTF8_H */