up_alphabetic {rebus.unicode} | R Documentation |
Unicode Properties
Description
Match a Unicode Property.
Usage
up_alphabetic(lo, hi, char_class = TRUE)
up_ascii_hex_digit(lo, hi, char_class = TRUE)
up_bidi_control(lo, hi, char_class = TRUE)
up_bidi_mirrored(lo, hi, char_class = TRUE)
up_case_ignorable(lo, hi, char_class = TRUE)
up_case_sensitive(lo, hi, char_class = TRUE)
up_cased(lo, hi, char_class = TRUE)
up_changes_when_casefolded(lo, hi, char_class = TRUE)
up_changes_when_casemapped(lo, hi, char_class = TRUE)
up_changes_when_lowercased(lo, hi, char_class = TRUE)
up_changes_when_nfkc_casefolded(lo, hi, char_class = TRUE)
up_changes_when_titlecased(lo, hi, char_class = TRUE)
up_changes_when_uppercased(lo, hi, char_class = TRUE)
up_dash(lo, hi, char_class = TRUE)
up_default_ignorable_code_point(lo, hi, char_class = TRUE)
up_deprecated(lo, hi, char_class = TRUE)
up_diacritic(lo, hi, char_class = TRUE)
up_extender(lo, hi, char_class = TRUE)
up_hex_digit(lo, hi, char_class = TRUE)
up_hyphen(lo, hi, char_class = TRUE)
up_id_continue(lo, hi, char_class = TRUE)
up_id_start(lo, hi, char_class = TRUE)
up_ideographic(lo, hi, char_class = TRUE)
up_lowercase(lo, hi, char_class = TRUE)
up_math(lo, hi, char_class = TRUE)
up_noncharacter_code_point(lo, hi, char_class = TRUE)
up_posix_alnum(lo, hi, char_class = TRUE)
up_posix_blank(lo, hi, char_class = TRUE)
up_posix_graph(lo, hi, char_class = TRUE)
up_posix_print(lo, hi, char_class = TRUE)
up_posix_xdigit(lo, hi, char_class = TRUE)
up_quotation_mark(lo, hi, char_class = TRUE)
up_soft_dotted(lo, hi, char_class = TRUE)
up_terminal_punctuation(lo, hi, char_class = TRUE)
up_uppercase(lo, hi, char_class = TRUE)
up_white_space(lo, hi, char_class = TRUE)
UP_ALPHABETIC
UP_ASCII_HEX_DIGIT
UP_BIDI_CONTROL
UP_BIDI_MIRRORED
UP_DASH
UP_DEFAULT_IGNORABLE_CODE_POINT
UP_DEPRECATED
UP_DIACRITIC
UP_EXTENDER
UP_HEX_DIGIT
UP_HYPHEN
UP_ID_CONTINUE
UP_ID_START
UP_IDEOGRAPHIC
UP_LOWERCASE
UP_MATH
UP_NONCHARACTER_CODE_POINT
UP_QUOTATION_MARK
UP_SOFT_DOTTED
UP_TERMINAL_PUNCTUATION
UP_UPPERCASE
UP_WHITE_SPACE
UP_CASE_SENSITIVE
UP_POSIX_ALNUM
UP_POSIX_BLANK
UP_POSIX_GRAPH
UP_POSIX_PRINT
UP_POSIX_XDIGIT
UP_CASED
UP_CASE_IGNORABLE
UP_CHANGES_WHEN_LOWERCASED
UP_CHANGES_WHEN_UPPERCASED
UP_CHANGES_WHEN_TITLECASED
UP_CHANGES_WHEN_CASEFOLDED
UP_CHANGES_WHEN_CASEMAPPED
UP_CHANGES_WHEN_NFKC_CASEFOLDED
Arguments
lo |
A non-negative integer. Minimum number of repeats, when grouped. |
hi |
positive integer. Maximum number of repeats, when grouped. |
char_class |
|
Format
An object of class regex
(inherits from character
) of length 1.
Value
A character vector representing part or all of a regular expression.
References
Table 12 of the Unicode Standard Annex #44 defines the Unicode General Categories. http://www.unicode.org/reports/tr44/
You can see which characters are contained in a category by visiting, e.g., http://www.fileformat.info/info/unicode/category/Nd/list.htm
See Also
unicode_general_category
, Unicode
,
stringi-search-charclass
Examples
# Classes
up_math()
up_posix_alnum()
up_changes_when_uppercased()
up_diacritic()
# With repetition
ugc_nonspacing_mark(3, 6)
up_quotation_mark(1, Inf)
up_posix_xdigit(0, Inf)
# Without a class wrapper
up_hyphen(char_class = FALSE)
# Constants
UP_ALPHABETIC
UP_DASH
UP_POSIX_ALNUM
UP_CHANGES_WHEN_LOWERCASED
## Not run:
# All the Unicode properties.
# Not run, since it generates lots of output
ls("package:rebus.unicode", pattern = "^up")
## End(Not run)
# Usage
# Hello in Samoan, Serbian, Persian, Simplified Chinese
hello <- "t\u101lofa, \u437\u434\u440\u430\u432\u43e, \u633\u644\u627\u645, \u4f60\u597d"
stringi::stri_extract_all_regex(hello, up_alphabetic(1, Inf))
stringi::stri_extract_all_regex(hello, up_case_sensitive(1, Inf))
[Package rebus.unicode version 0.0-2 Index]