diff --git a/go.mod b/go.mod index b5116180..e4ac9d1e 100644 --- a/go.mod +++ b/go.mod @@ -36,7 +36,7 @@ require ( github.com/spf13/cobra v1.7.0 github.com/spf13/pflag v1.0.5 github.com/stretchr/testify v1.8.4 - github.com/zclconf/go-cty v1.13.0 + github.com/zclconf/go-cty v1.14.1 go.opentelemetry.io/otel v1.14.0 go.opentelemetry.io/otel/trace v1.14.0 golang.org/x/mod v0.11.0 @@ -57,7 +57,6 @@ require ( github.com/Shopify/logrus-bugsnag v0.0.0-20171204204709-577dee27f20d // indirect github.com/agext/levenshtein v1.2.3 // indirect github.com/apparentlymart/go-cidr v1.0.1 // indirect - github.com/apparentlymart/go-textseg/v13 v13.0.0 // indirect github.com/apparentlymart/go-textseg/v15 v15.0.0 // indirect github.com/aws/aws-sdk-go-v2 v1.17.6 // indirect github.com/aws/aws-sdk-go-v2/credentials v1.13.16 // indirect diff --git a/go.sum b/go.sum index 8fd22743..a8638369 100644 --- a/go.sum +++ b/go.sum @@ -60,8 +60,6 @@ github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kd github.com/apparentlymart/go-cidr v1.0.1 h1:NmIwLZ/KdsjIUlhf+/Np40atNXm/+lZ5txfTJ/SpF+U= github.com/apparentlymart/go-cidr v1.0.1/go.mod h1:EBcsNrHc3zQeuaeCeCtQruQm+n9/YjEn/vI25Lg7Gwc= github.com/apparentlymart/go-textseg/v12 v12.0.0/go.mod h1:S/4uRK2UtaQttw1GenVJEynmyUenKwP++x/+DdGV/Ec= -github.com/apparentlymart/go-textseg/v13 v13.0.0 h1:Y+KvPE1NYz0xl601PVImeQfFyEy6iT90AvPUL1NNfNw= -github.com/apparentlymart/go-textseg/v13 v13.0.0/go.mod h1:ZK2fH7c4NqDTLtiYLvIkEghdlcqw7yxLeM89kiTRPUo= github.com/apparentlymart/go-textseg/v15 v15.0.0 h1:uYvfpb3DyLSCGWnctWKGj857c6ew1u1fNQOlOtuGxQY= github.com/apparentlymart/go-textseg/v15 v15.0.0/go.mod h1:K8XmNZdhEBkdlyDdvbmmsvpAG721bKi0joRfFdHIWJ4= github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio= @@ -531,8 +529,8 @@ github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9de github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/zclconf/go-cty v1.4.0/go.mod h1:nHzOclRkoj++EU9ZjSrZvRG0BXIWt8c7loYc0qXAFGQ= -github.com/zclconf/go-cty v1.13.0 h1:It5dfKTTZHe9aeppbNOda3mN7Ag7sg6QkBNm6TkyFa0= -github.com/zclconf/go-cty v1.13.0/go.mod h1:YKQzy/7pZ7iq2jNFzy5go57xdxdWoLLpaEp4u238AE0= +github.com/zclconf/go-cty v1.14.1 h1:t9fyA35fwjjUMcmL5hLER+e/rEPqrbCK1/OSE4SI9KA= +github.com/zclconf/go-cty v1.14.1/go.mod h1:VvMs5i0vgZdhYawQNq5kePSpLAoz8u1xvZgrPIxfnZE= go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU= go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8= go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= diff --git a/vendor/github.com/apparentlymart/go-textseg/v13/LICENSE b/vendor/github.com/apparentlymart/go-textseg/v13/LICENSE deleted file mode 100644 index 684b03b4..00000000 --- a/vendor/github.com/apparentlymart/go-textseg/v13/LICENSE +++ /dev/null @@ -1,95 +0,0 @@ -Copyright (c) 2017 Martin Atkins - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. - ---------- - -Unicode table generation programs are under a separate copyright and license: - -Copyright (c) 2014 Couchbase, Inc. -Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -except in compliance with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software distributed under the -License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -either express or implied. See the License for the specific language governing permissions -and limitations under the License. - ---------- - -Grapheme break data is provided as part of the Unicode character database, -copright 2016 Unicode, Inc, which is provided with the following license: - -Unicode Data Files include all data files under the directories -http://www.unicode.org/Public/, http://www.unicode.org/reports/, -http://www.unicode.org/cldr/data/, http://source.icu-project.org/repos/icu/, and -http://www.unicode.org/utility/trac/browser/. - -Unicode Data Files do not include PDF online code charts under the -directory http://www.unicode.org/Public/. - -Software includes any source code published in the Unicode Standard -or under the directories -http://www.unicode.org/Public/, http://www.unicode.org/reports/, -http://www.unicode.org/cldr/data/, http://source.icu-project.org/repos/icu/, and -http://www.unicode.org/utility/trac/browser/. - -NOTICE TO USER: Carefully read the following legal agreement. -BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S -DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"), -YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE -TERMS AND CONDITIONS OF THIS AGREEMENT. -IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE -THE DATA FILES OR SOFTWARE. - -COPYRIGHT AND PERMISSION NOTICE - -Copyright © 1991-2017 Unicode, Inc. All rights reserved. -Distributed under the Terms of Use in http://www.unicode.org/copyright.html. - -Permission is hereby granted, free of charge, to any person obtaining -a copy of the Unicode data files and any associated documentation -(the "Data Files") or Unicode software and any associated documentation -(the "Software") to deal in the Data Files or Software -without restriction, including without limitation the rights to use, -copy, modify, merge, publish, distribute, and/or sell copies of -the Data Files or Software, and to permit persons to whom the Data Files -or Software are furnished to do so, provided that either -(a) this copyright and permission notice appear with all copies -of the Data Files or Software, or -(b) this copyright and permission notice appear in associated -Documentation. - -THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF -ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE -WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -NONINFRINGEMENT OF THIRD PARTY RIGHTS. -IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS -NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL -DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, -DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER -TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR -PERFORMANCE OF THE DATA FILES OR SOFTWARE. - -Except as contained in this notice, the name of a copyright holder -shall not be used in advertising or otherwise to promote the sale, -use or other dealings in these Data Files or Software without prior -written authorization of the copyright holder. diff --git a/vendor/github.com/apparentlymart/go-textseg/v13/textseg/all_tokens.go b/vendor/github.com/apparentlymart/go-textseg/v13/textseg/all_tokens.go deleted file mode 100644 index 5752e9ef..00000000 --- a/vendor/github.com/apparentlymart/go-textseg/v13/textseg/all_tokens.go +++ /dev/null @@ -1,30 +0,0 @@ -package textseg - -import ( - "bufio" - "bytes" -) - -// AllTokens is a utility that uses a bufio.SplitFunc to produce a slice of -// all of the recognized tokens in the given buffer. -func AllTokens(buf []byte, splitFunc bufio.SplitFunc) ([][]byte, error) { - scanner := bufio.NewScanner(bytes.NewReader(buf)) - scanner.Split(splitFunc) - var ret [][]byte - for scanner.Scan() { - ret = append(ret, scanner.Bytes()) - } - return ret, scanner.Err() -} - -// TokenCount is a utility that uses a bufio.SplitFunc to count the number of -// recognized tokens in the given buffer. -func TokenCount(buf []byte, splitFunc bufio.SplitFunc) (int, error) { - scanner := bufio.NewScanner(bytes.NewReader(buf)) - scanner.Split(splitFunc) - var ret int - for scanner.Scan() { - ret++ - } - return ret, scanner.Err() -} diff --git a/vendor/github.com/apparentlymart/go-textseg/v13/textseg/emoji_table.rl b/vendor/github.com/apparentlymart/go-textseg/v13/textseg/emoji_table.rl deleted file mode 100644 index f2cb484a..00000000 --- a/vendor/github.com/apparentlymart/go-textseg/v13/textseg/emoji_table.rl +++ /dev/null @@ -1,525 +0,0 @@ -# The following Ragel file was autogenerated with unicode2ragel.rb -# from: https://www.unicode.org/Public/13.0.0/ucd/emoji/emoji-data.txt -# -# It defines ["Extended_Pictographic"]. -# -# To use this, make sure that your alphtype is set to byte, -# and that your input is in utf8. - -%%{ - machine Emoji; - - Extended_Pictographic = - 0xC2 0xA9 #E0.6 [1] (©️) copyright - | 0xC2 0xAE #E0.6 [1] (®️) registered - | 0xE2 0x80 0xBC #E0.6 [1] (‼️) double exclamation mark - | 0xE2 0x81 0x89 #E0.6 [1] (⁉️) exclamation question ... - | 0xE2 0x84 0xA2 #E0.6 [1] (™️) trade mark - | 0xE2 0x84 0xB9 #E0.6 [1] (ℹ️) information - | 0xE2 0x86 0x94..0x99 #E0.6 [6] (↔️..↙️) left-right arrow..do... - | 0xE2 0x86 0xA9..0xAA #E0.6 [2] (↩️..↪️) right arrow curving ... - | 0xE2 0x8C 0x9A..0x9B #E0.6 [2] (⌚..⌛) watch..hourglass done - | 0xE2 0x8C 0xA8 #E1.0 [1] (⌨️) keyboard - | 0xE2 0x8E 0x88 #E0.0 [1] (⎈) HELM SYMBOL - | 0xE2 0x8F 0x8F #E1.0 [1] (⏏️) eject button - | 0xE2 0x8F 0xA9..0xAC #E0.6 [4] (⏩..⏬) fast-forward button..f... - | 0xE2 0x8F 0xAD..0xAE #E0.7 [2] (⏭️..⏮️) next track button..l... - | 0xE2 0x8F 0xAF #E1.0 [1] (⏯️) play or pause button - | 0xE2 0x8F 0xB0 #E0.6 [1] (⏰) alarm clock - | 0xE2 0x8F 0xB1..0xB2 #E1.0 [2] (⏱️..⏲️) stopwatch..timer clock - | 0xE2 0x8F 0xB3 #E0.6 [1] (⏳) hourglass not done - | 0xE2 0x8F 0xB8..0xBA #E0.7 [3] (⏸️..⏺️) pause button..record... - | 0xE2 0x93 0x82 #E0.6 [1] (Ⓜ️) circled M - | 0xE2 0x96 0xAA..0xAB #E0.6 [2] (▪️..▫️) black small square..... - | 0xE2 0x96 0xB6 #E0.6 [1] (▶️) play button - | 0xE2 0x97 0x80 #E0.6 [1] (◀️) reverse button - | 0xE2 0x97 0xBB..0xBE #E0.6 [4] (◻️..◾) white medium square..... - | 0xE2 0x98 0x80..0x81 #E0.6 [2] (☀️..☁️) sun..cloud - | 0xE2 0x98 0x82..0x83 #E0.7 [2] (☂️..☃️) umbrella..snowman - | 0xE2 0x98 0x84 #E1.0 [1] (☄️) comet - | 0xE2 0x98 0x85 #E0.0 [1] (★) BLACK STAR - | 0xE2 0x98 0x87..0x8D #E0.0 [7] (☇..☍) LIGHTNING..OPPOSITION - | 0xE2 0x98 0x8E #E0.6 [1] (☎️) telephone - | 0xE2 0x98 0x8F..0x90 #E0.0 [2] (☏..☐) WHITE TELEPHONE..BALLO... - | 0xE2 0x98 0x91 #E0.6 [1] (☑️) check box with check - | 0xE2 0x98 0x92 #E0.0 [1] (☒) BALLOT BOX WITH X - | 0xE2 0x98 0x94..0x95 #E0.6 [2] (☔..☕) umbrella with rain dro... - | 0xE2 0x98 0x96..0x97 #E0.0 [2] (☖..☗) WHITE SHOGI PIECE..BLA... - | 0xE2 0x98 0x98 #E1.0 [1] (☘️) shamrock - | 0xE2 0x98 0x99..0x9C #E0.0 [4] (☙..☜) REVERSED ROTATED FLORA... - | 0xE2 0x98 0x9D #E0.6 [1] (☝️) index pointing up - | 0xE2 0x98 0x9E..0x9F #E0.0 [2] (☞..☟) WHITE RIGHT POINTING I... - | 0xE2 0x98 0xA0 #E1.0 [1] (☠️) skull and crossbones - | 0xE2 0x98 0xA1 #E0.0 [1] (☡) CAUTION SIGN - | 0xE2 0x98 0xA2..0xA3 #E1.0 [2] (☢️..☣️) radioactive..biohazard - | 0xE2 0x98 0xA4..0xA5 #E0.0 [2] (☤..☥) CADUCEUS..ANKH - | 0xE2 0x98 0xA6 #E1.0 [1] (☦️) orthodox cross - | 0xE2 0x98 0xA7..0xA9 #E0.0 [3] (☧..☩) CHI RHO..CROSS OF JERU... - | 0xE2 0x98 0xAA #E0.7 [1] (☪️) star and crescent - | 0xE2 0x98 0xAB..0xAD #E0.0 [3] (☫..☭) FARSI SYMBOL..HAMMER A... - | 0xE2 0x98 0xAE #E1.0 [1] (☮️) peace symbol - | 0xE2 0x98 0xAF #E0.7 [1] (☯️) yin yang - | 0xE2 0x98 0xB0..0xB7 #E0.0 [8] (☰..☷) TRIGRAM FOR HEAVEN..TR... - | 0xE2 0x98 0xB8..0xB9 #E0.7 [2] (☸️..☹️) wheel of dharma..fro... - | 0xE2 0x98 0xBA #E0.6 [1] (☺️) smiling face - | 0xE2 0x98 0xBB..0xBF #E0.0 [5] (☻..☿) BLACK SMILING FACE..ME... - | 0xE2 0x99 0x80 #E4.0 [1] (♀️) female sign - | 0xE2 0x99 0x81 #E0.0 [1] (♁) EARTH - | 0xE2 0x99 0x82 #E4.0 [1] (♂️) male sign - | 0xE2 0x99 0x83..0x87 #E0.0 [5] (♃..♇) JUPITER..PLUTO - | 0xE2 0x99 0x88..0x93 #E0.6 [12] (♈..♓) Aries..Pisces - | 0xE2 0x99 0x94..0x9E #E0.0 [11] (♔..♞) WHITE CHESS KING..BLAC... - | 0xE2 0x99 0x9F #E11.0 [1] (♟️) chess pawn - | 0xE2 0x99 0xA0 #E0.6 [1] (♠️) spade suit - | 0xE2 0x99 0xA1..0xA2 #E0.0 [2] (♡..♢) WHITE HEART SUIT..WHIT... - | 0xE2 0x99 0xA3 #E0.6 [1] (♣️) club suit - | 0xE2 0x99 0xA4 #E0.0 [1] (♤) WHITE SPADE SUIT - | 0xE2 0x99 0xA5..0xA6 #E0.6 [2] (♥️..♦️) heart suit..diamond ... - | 0xE2 0x99 0xA7 #E0.0 [1] (♧) WHITE CLUB SUIT - | 0xE2 0x99 0xA8 #E0.6 [1] (♨️) hot springs - | 0xE2 0x99 0xA9..0xBA #E0.0 [18] (♩..♺) QUARTER NOTE..RECYCLIN... - | 0xE2 0x99 0xBB #E0.6 [1] (♻️) recycling symbol - | 0xE2 0x99 0xBC..0xBD #E0.0 [2] (♼..♽) RECYCLED PAPER SYMBOL.... - | 0xE2 0x99 0xBE #E11.0 [1] (♾️) infinity - | 0xE2 0x99 0xBF #E0.6 [1] (♿) wheelchair symbol - | 0xE2 0x9A 0x80..0x85 #E0.0 [6] (⚀..⚅) DIE FACE-1..DIE FACE-6 - | 0xE2 0x9A 0x90..0x91 #E0.0 [2] (⚐..⚑) WHITE FLAG..BLACK FLAG - | 0xE2 0x9A 0x92 #E1.0 [1] (⚒️) hammer and pick - | 0xE2 0x9A 0x93 #E0.6 [1] (⚓) anchor - | 0xE2 0x9A 0x94 #E1.0 [1] (⚔️) crossed swords - | 0xE2 0x9A 0x95 #E4.0 [1] (⚕️) medical symbol - | 0xE2 0x9A 0x96..0x97 #E1.0 [2] (⚖️..⚗️) balance scale..alembic - | 0xE2 0x9A 0x98 #E0.0 [1] (⚘) FLOWER - | 0xE2 0x9A 0x99 #E1.0 [1] (⚙️) gear - | 0xE2 0x9A 0x9A #E0.0 [1] (⚚) STAFF OF HERMES - | 0xE2 0x9A 0x9B..0x9C #E1.0 [2] (⚛️..⚜️) atom symbol..fleur-d... - | 0xE2 0x9A 0x9D..0x9F #E0.0 [3] (⚝..⚟) OUTLINED WHITE STAR..T... - | 0xE2 0x9A 0xA0..0xA1 #E0.6 [2] (⚠️..⚡) warning..high voltage - | 0xE2 0x9A 0xA2..0xA6 #E0.0 [5] (⚢..⚦) DOUBLED FEMALE SIGN..M... - | 0xE2 0x9A 0xA7 #E13.0 [1] (⚧️) transgender symbol - | 0xE2 0x9A 0xA8..0xA9 #E0.0 [2] (⚨..⚩) VERTICAL MALE WITH STR... - | 0xE2 0x9A 0xAA..0xAB #E0.6 [2] (⚪..⚫) white circle..black ci... - | 0xE2 0x9A 0xAC..0xAF #E0.0 [4] (⚬..⚯) MEDIUM SMALL WHITE CIR... - | 0xE2 0x9A 0xB0..0xB1 #E1.0 [2] (⚰️..⚱️) coffin..funeral urn - | 0xE2 0x9A 0xB2..0xBC #E0.0 [11] (⚲..⚼) NEUTER..SESQUIQUADRATE - | 0xE2 0x9A 0xBD..0xBE #E0.6 [2] (⚽..⚾) soccer ball..baseball - | 0xE2 0x9A 0xBF..0xFF #E0.0 [5] (⚿..⛃) SQUARED KEY..BLACK DRA... - | 0xE2 0x9B 0x00..0x83 # - | 0xE2 0x9B 0x84..0x85 #E0.6 [2] (⛄..⛅) snowman without snow..... - | 0xE2 0x9B 0x86..0x87 #E0.0 [2] (⛆..⛇) RAIN..BLACK SNOWMAN - | 0xE2 0x9B 0x88 #E0.7 [1] (⛈️) cloud with lightning ... - | 0xE2 0x9B 0x89..0x8D #E0.0 [5] (⛉..⛍) TURNED WHITE SHOGI PIE... - | 0xE2 0x9B 0x8E #E0.6 [1] (⛎) Ophiuchus - | 0xE2 0x9B 0x8F #E0.7 [1] (⛏️) pick - | 0xE2 0x9B 0x90 #E0.0 [1] (⛐) CAR SLIDING - | 0xE2 0x9B 0x91 #E0.7 [1] (⛑️) rescue worker’s helmet - | 0xE2 0x9B 0x92 #E0.0 [1] (⛒) CIRCLED CROSSING LANES - | 0xE2 0x9B 0x93 #E0.7 [1] (⛓️) chains - | 0xE2 0x9B 0x94 #E0.6 [1] (⛔) no entry - | 0xE2 0x9B 0x95..0xA8 #E0.0 [20] (⛕..⛨) ALTERNATE ONE-WAY LEFT... - | 0xE2 0x9B 0xA9 #E0.7 [1] (⛩️) shinto shrine - | 0xE2 0x9B 0xAA #E0.6 [1] (⛪) church - | 0xE2 0x9B 0xAB..0xAF #E0.0 [5] (⛫..⛯) CASTLE..MAP SYMBOL FOR... - | 0xE2 0x9B 0xB0..0xB1 #E0.7 [2] (⛰️..⛱️) mountain..umbrella o... - | 0xE2 0x9B 0xB2..0xB3 #E0.6 [2] (⛲..⛳) fountain..flag in hole - | 0xE2 0x9B 0xB4 #E0.7 [1] (⛴️) ferry - | 0xE2 0x9B 0xB5 #E0.6 [1] (⛵) sailboat - | 0xE2 0x9B 0xB6 #E0.0 [1] (⛶) SQUARE FOUR CORNERS - | 0xE2 0x9B 0xB7..0xB9 #E0.7 [3] (⛷️..⛹️) skier..person bounci... - | 0xE2 0x9B 0xBA #E0.6 [1] (⛺) tent - | 0xE2 0x9B 0xBB..0xBC #E0.0 [2] (⛻..⛼) JAPANESE BANK SYMBOL..... - | 0xE2 0x9B 0xBD #E0.6 [1] (⛽) fuel pump - | 0xE2 0x9B 0xBE..0xFF #E0.0 [4] (⛾..✁) CUP ON BLACK SQUARE..U... - | 0xE2 0x9C 0x00..0x81 # - | 0xE2 0x9C 0x82 #E0.6 [1] (✂️) scissors - | 0xE2 0x9C 0x83..0x84 #E0.0 [2] (✃..✄) LOWER BLADE SCISSORS..... - | 0xE2 0x9C 0x85 #E0.6 [1] (✅) check mark button - | 0xE2 0x9C 0x88..0x8C #E0.6 [5] (✈️..✌️) airplane..victory hand - | 0xE2 0x9C 0x8D #E0.7 [1] (✍️) writing hand - | 0xE2 0x9C 0x8E #E0.0 [1] (✎) LOWER RIGHT PENCIL - | 0xE2 0x9C 0x8F #E0.6 [1] (✏️) pencil - | 0xE2 0x9C 0x90..0x91 #E0.0 [2] (✐..✑) UPPER RIGHT PENCIL..WH... - | 0xE2 0x9C 0x92 #E0.6 [1] (✒️) black nib - | 0xE2 0x9C 0x94 #E0.6 [1] (✔️) check mark - | 0xE2 0x9C 0x96 #E0.6 [1] (✖️) multiply - | 0xE2 0x9C 0x9D #E0.7 [1] (✝️) latin cross - | 0xE2 0x9C 0xA1 #E0.7 [1] (✡️) star of David - | 0xE2 0x9C 0xA8 #E0.6 [1] (✨) sparkles - | 0xE2 0x9C 0xB3..0xB4 #E0.6 [2] (✳️..✴️) eight-spoked asteris... - | 0xE2 0x9D 0x84 #E0.6 [1] (❄️) snowflake - | 0xE2 0x9D 0x87 #E0.6 [1] (❇️) sparkle - | 0xE2 0x9D 0x8C #E0.6 [1] (❌) cross mark - | 0xE2 0x9D 0x8E #E0.6 [1] (❎) cross mark button - | 0xE2 0x9D 0x93..0x95 #E0.6 [3] (❓..❕) question mark..white e... - | 0xE2 0x9D 0x97 #E0.6 [1] (❗) exclamation mark - | 0xE2 0x9D 0xA3 #E1.0 [1] (❣️) heart exclamation - | 0xE2 0x9D 0xA4 #E0.6 [1] (❤️) red heart - | 0xE2 0x9D 0xA5..0xA7 #E0.0 [3] (❥..❧) ROTATED HEAVY BLACK HE... - | 0xE2 0x9E 0x95..0x97 #E0.6 [3] (➕..➗) plus..divide - | 0xE2 0x9E 0xA1 #E0.6 [1] (➡️) right arrow - | 0xE2 0x9E 0xB0 #E0.6 [1] (➰) curly loop - | 0xE2 0x9E 0xBF #E1.0 [1] (➿) double curly loop - | 0xE2 0xA4 0xB4..0xB5 #E0.6 [2] (⤴️..⤵️) right arrow curving ... - | 0xE2 0xAC 0x85..0x87 #E0.6 [3] (⬅️..⬇️) left arrow..down arrow - | 0xE2 0xAC 0x9B..0x9C #E0.6 [2] (⬛..⬜) black large square..wh... - | 0xE2 0xAD 0x90 #E0.6 [1] (⭐) star - | 0xE2 0xAD 0x95 #E0.6 [1] (⭕) hollow red circle - | 0xE3 0x80 0xB0 #E0.6 [1] (〰️) wavy dash - | 0xE3 0x80 0xBD #E0.6 [1] (〽️) part alternation mark - | 0xE3 0x8A 0x97 #E0.6 [1] (㊗️) Japanese “congratulat... - | 0xE3 0x8A 0x99 #E0.6 [1] (㊙️) Japanese “secret” button - | 0xF0 0x9F 0x80 0x80..0x83 #E0.0 [4] (🀀..🀃) MAHJONG TILE EAST W... - | 0xF0 0x9F 0x80 0x84 #E0.6 [1] (🀄) mahjong red dragon - | 0xF0 0x9F 0x80 0x85..0xFF #E0.0 [202] (🀅..🃎) MAHJONG TILE ... - | 0xF0 0x9F 0x81..0x82 0x00..0xFF # - | 0xF0 0x9F 0x83 0x00..0x8E # - | 0xF0 0x9F 0x83 0x8F #E0.6 [1] (🃏) joker - | 0xF0 0x9F 0x83 0x90..0xBF #E0.0 [48] (🃐..🃿) ..<... - | 0xF0 0x9F 0x84 0x8D..0x8F #E0.0 [3] (🄍..🄏) CIRCLED ZERO WITH S... - | 0xF0 0x9F 0x84 0xAF #E0.0 [1] (🄯) COPYLEFT SYMBOL - | 0xF0 0x9F 0x85 0xAC..0xAF #E0.0 [4] (🅬..🅯) RAISED MR SIGN..CIR... - | 0xF0 0x9F 0x85 0xB0..0xB1 #E0.6 [2] (🅰️..🅱️) A button (blood t... - | 0xF0 0x9F 0x85 0xBE..0xBF #E0.6 [2] (🅾️..🅿️) O button (blood t... - | 0xF0 0x9F 0x86 0x8E #E0.6 [1] (🆎) AB button (blood type) - | 0xF0 0x9F 0x86 0x91..0x9A #E0.6 [10] (🆑..🆚) CL button..VS button - | 0xF0 0x9F 0x86 0xAD..0xFF #E0.0 [57] (🆭..🇥) MASK WORK SYMBOL..<... - | 0xF0 0x9F 0x87 0x00..0xA5 # - | 0xF0 0x9F 0x88 0x81..0x82 #E0.6 [2] (🈁..🈂️) Japanese “here” bu... - | 0xF0 0x9F 0x88 0x83..0x8F #E0.0 [13] (🈃..🈏) ..<... - | 0xF0 0x9F 0x88 0x9A #E0.6 [1] (🈚) Japanese “free of char... - | 0xF0 0x9F 0x88 0xAF #E0.6 [1] (🈯) Japanese “reserved” bu... - | 0xF0 0x9F 0x88 0xB2..0xBA #E0.6 [9] (🈲..🈺) Japanese “prohibite... - | 0xF0 0x9F 0x88 0xBC..0xBF #E0.0 [4] (🈼..🈿) ..<... - | 0xF0 0x9F 0x89 0x89..0x8F #E0.0 [7] (🉉..🉏) ..<... - | 0xF0 0x9F 0x89 0x90..0x91 #E0.6 [2] (🉐..🉑) Japanese “bargain” ... - | 0xF0 0x9F 0x89 0x92..0xFF #E0.0 [174] (🉒..🋿) ..<... - | 0xF0 0x9F 0x9B 0xA0..0xA5 #E0.7 [6] (🛠️..🛥️) hammer and wrench... - | 0xF0 0x9F 0x9B 0xA6..0xA8 #E0.0 [3] (🛦..🛨) UP-POINTING MILITAR... - | 0xF0 0x9F 0x9B 0xA9 #E0.7 [1] (🛩️) small airplane - | 0xF0 0x9F 0x9B 0xAA #E0.0 [1] (🛪) NORTHEAST-POINTING AIR... - | 0xF0 0x9F 0x9B 0xAB..0xAC #E1.0 [2] (🛫..🛬) airplane departure.... - | 0xF0 0x9F 0x9B 0xAD..0xAF #E0.0 [3] (🛭..🛯) ..<... - | 0xF0 0x9F 0x9B 0xB0 #E0.7 [1] (🛰️) satellite - | 0xF0 0x9F 0x9B 0xB1..0xB2 #E0.0 [2] (🛱..🛲) ONCOMING FIRE ENGIN... - | 0xF0 0x9F 0x9B 0xB3 #E0.7 [1] (🛳️) passenger ship - | 0xF0 0x9F 0x9B 0xB4..0xB6 #E3.0 [3] (🛴..🛶) kick scooter..canoe - | 0xF0 0x9F 0x9B 0xB7..0xB8 #E5.0 [2] (🛷..🛸) sled..flying saucer - | 0xF0 0x9F 0x9B 0xB9 #E11.0 [1] (🛹) skateboard - | 0xF0 0x9F 0x9B 0xBA #E12.0 [1] (🛺) auto rickshaw - | 0xF0 0x9F 0x9B 0xBB..0xBC #E13.0 [2] (🛻..🛼) pickup truck..rolle... - | 0xF0 0x9F 0x9B 0xBD..0xBF #E0.0 [3] (🛽..🛿) ..<... - | 0xF0 0x9F 0x9D 0xB4..0xBF #E0.0 [12] (🝴..🝿) ..<... - | 0xF0 0x9F 0x9F 0x95..0x9F #E0.0 [11] (🟕..🟟) CIRCLED TRIANGLE..<... - | 0xF0 0x9F 0x9F 0xA0..0xAB #E12.0 [12] (🟠..🟫) orange circle..brow... - | 0xF0 0x9F 0x9F 0xAC..0xBF #E0.0 [20] (🟬..🟿) ..<... - | 0xF0 0x9F 0xA0 0x8C..0x8F #E0.0 [4] (🠌..🠏) ..<... - | 0xF0 0x9F 0xA1 0x88..0x8F #E0.0 [8] (🡈..🡏) ..<... - | 0xF0 0x9F 0xA1 0x9A..0x9F #E0.0 [6] (🡚..🡟) ..<... - | 0xF0 0x9F 0xA2 0x88..0x8F #E0.0 [8] (🢈..🢏) ..<... - | 0xF0 0x9F 0xA2 0xAE..0xFF #E0.0 [82] (🢮..🣿) ..<... - | 0xF0 0x9F 0xA3 0x00..0xBF # - | 0xF0 0x9F 0xA4 0x8C #E13.0 [1] (🤌) pinched fingers - | 0xF0 0x9F 0xA4 0x8D..0x8F #E12.0 [3] (🤍..🤏) white heart..pinchi... - | 0xF0 0x9F 0xA4 0x90..0x98 #E1.0 [9] (🤐..🤘) zipper-mouth face..... - | 0xF0 0x9F 0xA4 0x99..0x9E #E3.0 [6] (🤙..🤞) call me hand..cross... - | 0xF0 0x9F 0xA4 0x9F #E5.0 [1] (🤟) love-you gesture - | 0xF0 0x9F 0xA4 0xA0..0xA7 #E3.0 [8] (🤠..🤧) cowboy hat face..sn... - | 0xF0 0x9F 0xA4 0xA8..0xAF #E5.0 [8] (🤨..🤯) face with raised ey... - | 0xF0 0x9F 0xA4 0xB0 #E3.0 [1] (🤰) pregnant woman - | 0xF0 0x9F 0xA4 0xB1..0xB2 #E5.0 [2] (🤱..🤲) breast-feeding..pal... - | 0xF0 0x9F 0xA4 0xB3..0xBA #E3.0 [8] (🤳..🤺) selfie..person fencing - | 0xF0 0x9F 0xA4 0xBC..0xBE #E3.0 [3] (🤼..🤾) people wrestling..p... - | 0xF0 0x9F 0xA4 0xBF #E12.0 [1] (🤿) diving mask - | 0xF0 0x9F 0xA5 0x80..0x85 #E3.0 [6] (🥀..🥅) wilted flower..goal... - | 0xF0 0x9F 0xA5 0x87..0x8B #E3.0 [5] (🥇..🥋) 1st place medal..ma... - | 0xF0 0x9F 0xA5 0x8C #E5.0 [1] (🥌) curling stone - | 0xF0 0x9F 0xA5 0x8D..0x8F #E11.0 [3] (🥍..🥏) lacrosse..flying disc - | 0xF0 0x9F 0xA5 0x90..0x9E #E3.0 [15] (🥐..🥞) croissant..pancakes - | 0xF0 0x9F 0xA5 0x9F..0xAB #E5.0 [13] (🥟..🥫) dumpling..canned food - | 0xF0 0x9F 0xA5 0xAC..0xB0 #E11.0 [5] (🥬..🥰) leafy green..smilin... - | 0xF0 0x9F 0xA5 0xB1 #E12.0 [1] (🥱) yawning face - | 0xF0 0x9F 0xA5 0xB2 #E13.0 [1] (🥲) smiling face with tear - | 0xF0 0x9F 0xA5 0xB3..0xB6 #E11.0 [4] (🥳..🥶) partying face..cold... - | 0xF0 0x9F 0xA5 0xB7..0xB8 #E13.0 [2] (🥷..🥸) ninja..disguised face - | 0xF0 0x9F 0xA5 0xB9 #E0.0 [1] (🥹) - | 0xF0 0x9F 0xA5 0xBA #E11.0 [1] (🥺) pleading face - | 0xF0 0x9F 0xA5 0xBB #E12.0 [1] (🥻) sari - | 0xF0 0x9F 0xA5 0xBC..0xBF #E11.0 [4] (🥼..🥿) lab coat..flat shoe - | 0xF0 0x9F 0xA6 0x80..0x84 #E1.0 [5] (🦀..🦄) crab..unicorn - | 0xF0 0x9F 0xA6 0x85..0x91 #E3.0 [13] (🦅..🦑) eagle..squid - | 0xF0 0x9F 0xA6 0x92..0x97 #E5.0 [6] (🦒..🦗) giraffe..cricket - | 0xF0 0x9F 0xA6 0x98..0xA2 #E11.0 [11] (🦘..🦢) kangaroo..swan - | 0xF0 0x9F 0xA6 0xA3..0xA4 #E13.0 [2] (🦣..🦤) mammoth..dodo - | 0xF0 0x9F 0xA6 0xA5..0xAA #E12.0 [6] (🦥..🦪) sloth..oyster - | 0xF0 0x9F 0xA6 0xAB..0xAD #E13.0 [3] (🦫..🦭) beaver..seal - | 0xF0 0x9F 0xA6 0xAE..0xAF #E12.0 [2] (🦮..🦯) guide dog..white cane - | 0xF0 0x9F 0xA6 0xB0..0xB9 #E11.0 [10] (🦰..🦹) red hair..supervillain - | 0xF0 0x9F 0xA6 0xBA..0xBF #E12.0 [6] (🦺..🦿) safety vest..mechan... - | 0xF0 0x9F 0xA7 0x80 #E1.0 [1] (🧀) cheese wedge - | 0xF0 0x9F 0xA7 0x81..0x82 #E11.0 [2] (🧁..🧂) cupcake..salt - | 0xF0 0x9F 0xA7 0x83..0x8A #E12.0 [8] (🧃..🧊) beverage box..ice - | 0xF0 0x9F 0xA7 0x8B #E13.0 [1] (🧋) bubble tea - | 0xF0 0x9F 0xA7 0x8C #E0.0 [1] (🧌) - | 0xF0 0x9F 0xA7 0x8D..0x8F #E12.0 [3] (🧍..🧏) person standing..de... - | 0xF0 0x9F 0xA7 0x90..0xA6 #E5.0 [23] (🧐..🧦) face with monocle..... - | 0xF0 0x9F 0xA7 0xA7..0xBF #E11.0 [25] (🧧..🧿) red envelope..nazar... - | 0xF0 0x9F 0xA8 0x80..0xFF #E0.0 [112] (🨀..🩯) NEUTRAL CHESS KING.... - | 0xF0 0x9F 0xA9 0x00..0xAF # - | 0xF0 0x9F 0xA9 0xB0..0xB3 #E12.0 [4] (🩰..🩳) ballet shoes..shorts - | 0xF0 0x9F 0xA9 0xB4 #E13.0 [1] (🩴) thong sandal - | 0xF0 0x9F 0xA9 0xB5..0xB7 #E0.0 [3] (🩵..🩷) ..<... - | 0xF0 0x9F 0xA9 0xB8..0xBA #E12.0 [3] (🩸..🩺) drop of blood..stet... - | 0xF0 0x9F 0xA9 0xBB..0xBF #E0.0 [5] (🩻..🩿) ..<... - | 0xF0 0x9F 0xAA 0x80..0x82 #E12.0 [3] (🪀..🪂) yo-yo..parachute - | 0xF0 0x9F 0xAA 0x83..0x86 #E13.0 [4] (🪃..🪆) boomerang..nesting ... - | 0xF0 0x9F 0xAA 0x87..0x8F #E0.0 [9] (🪇..🪏) ..<... - | 0xF0 0x9F 0xAA 0x90..0x95 #E12.0 [6] (🪐..🪕) ringed planet..banjo - | 0xF0 0x9F 0xAA 0x96..0xA8 #E13.0 [19] (🪖..🪨) military helmet..rock - | 0xF0 0x9F 0xAA 0xA9..0xAF #E0.0 [7] (🪩..🪯) ..<... - | 0xF0 0x9F 0xAA 0xB0..0xB6 #E13.0 [7] (🪰..🪶) fly..feather - | 0xF0 0x9F 0xAA 0xB7..0xBF #E0.0 [9] (🪷..🪿) ..<... - | 0xF0 0x9F 0xAB 0x80..0x82 #E13.0 [3] (🫀..🫂) anatomical heart..p... - | 0xF0 0x9F 0xAB 0x83..0x8F #E0.0 [13] (🫃..🫏) ..<... - | 0xF0 0x9F 0xAB 0x90..0x96 #E13.0 [7] (🫐..🫖) blueberries..teapot - | 0xF0 0x9F 0xAB 0x97..0xBF #E0.0 [41] (🫗..🫿) ..<... - | 0xF0 0x9F 0xB0 0x80..0xFF #E0.0[1022] (🰀..🿽) 0; _nacts-- { - _acts++ - switch _graphclust_actions[_acts-1] { - case 4: -//line NONE:1 - ts = p - -//line grapheme_clusters.go:3878 - } - } - - _keys = int(_graphclust_key_offsets[cs]) - _trans = int(_graphclust_index_offsets[cs]) - - _klen = int(_graphclust_single_lengths[cs]) - if _klen > 0 { - _lower := int(_keys) - var _mid int - _upper := int(_keys + _klen - 1) - for { - if _upper < _lower { - break - } - - _mid = _lower + ((_upper - _lower) >> 1) - switch { - case data[p] < _graphclust_trans_keys[_mid]: - _upper = _mid - 1 - case data[p] > _graphclust_trans_keys[_mid]: - _lower = _mid + 1 - default: - _trans += int(_mid - int(_keys)) - goto _match - } - } - _keys += _klen - _trans += _klen - } - - _klen = int(_graphclust_range_lengths[cs]) - if _klen > 0 { - _lower := int(_keys) - var _mid int - _upper := int(_keys + (_klen << 1) - 2) - for { - if _upper < _lower { - break - } - - _mid = _lower + (((_upper - _lower) >> 1) & ^1) - switch { - case data[p] < _graphclust_trans_keys[_mid]: - _upper = _mid - 2 - case data[p] > _graphclust_trans_keys[_mid+1]: - _lower = _mid + 2 - default: - _trans += int((_mid - int(_keys)) >> 1) - goto _match - } - } - _trans += _klen - } - - _match: - _trans = int(_graphclust_indicies[_trans]) - _eof_trans: - cs = int(_graphclust_trans_targs[_trans]) - - if _graphclust_trans_actions[_trans] == 0 { - goto _again - } - - _acts = int(_graphclust_trans_actions[_trans]) - _nacts = uint(_graphclust_actions[_acts]) - _acts++ - for ; _nacts > 0; _nacts-- { - _acts++ - switch _graphclust_actions[_acts-1] { - case 0: -//line grapheme_clusters.rl:47 - - startPos = p - - case 1: -//line grapheme_clusters.rl:51 - - endPos = p - - case 5: -//line NONE:1 - te = p + 1 - - case 6: -//line grapheme_clusters.rl:55 - act = 3 - case 7: -//line grapheme_clusters.rl:55 - act = 4 - case 8: -//line grapheme_clusters.rl:55 - te = p + 1 - { - return endPos + 1, data[startPos : endPos+1], nil - } - case 9: -//line grapheme_clusters.rl:55 - te = p + 1 - { - return endPos + 1, data[startPos : endPos+1], nil - } - case 10: -//line grapheme_clusters.rl:55 - te = p - p-- - { - return endPos + 1, data[startPos : endPos+1], nil - } - case 11: -//line grapheme_clusters.rl:55 - te = p - p-- - { - return endPos + 1, data[startPos : endPos+1], nil - } - case 12: -//line grapheme_clusters.rl:55 - te = p - p-- - { - return endPos + 1, data[startPos : endPos+1], nil - } - case 13: -//line grapheme_clusters.rl:55 - te = p - p-- - { - return endPos + 1, data[startPos : endPos+1], nil - } - case 14: -//line grapheme_clusters.rl:55 - te = p - p-- - { - return endPos + 1, data[startPos : endPos+1], nil - } - case 15: -//line grapheme_clusters.rl:55 - te = p - p-- - { - return endPos + 1, data[startPos : endPos+1], nil - } - case 16: -//line grapheme_clusters.rl:55 - p = (te) - 1 - { - return endPos + 1, data[startPos : endPos+1], nil - } - case 17: -//line grapheme_clusters.rl:55 - p = (te) - 1 - { - return endPos + 1, data[startPos : endPos+1], nil - } - case 18: -//line grapheme_clusters.rl:55 - p = (te) - 1 - { - return endPos + 1, data[startPos : endPos+1], nil - } - case 19: -//line grapheme_clusters.rl:55 - p = (te) - 1 - { - return endPos + 1, data[startPos : endPos+1], nil - } - case 20: -//line grapheme_clusters.rl:55 - p = (te) - 1 - { - return endPos + 1, data[startPos : endPos+1], nil - } - case 21: -//line grapheme_clusters.rl:55 - p = (te) - 1 - { - return endPos + 1, data[startPos : endPos+1], nil - } - case 22: -//line NONE:1 - switch act { - case 0: - { - cs = 0 - goto _again - } - case 3: - { - p = (te) - 1 - - return endPos + 1, data[startPos : endPos+1], nil - } - case 4: - { - p = (te) - 1 - - return endPos + 1, data[startPos : endPos+1], nil - } - } - -//line grapheme_clusters.go:4077 - } - } - - _again: - _acts = int(_graphclust_to_state_actions[cs]) - _nacts = uint(_graphclust_actions[_acts]) - _acts++ - for ; _nacts > 0; _nacts-- { - _acts++ - switch _graphclust_actions[_acts-1] { - case 2: -//line NONE:1 - ts = 0 - - case 3: -//line NONE:1 - act = 0 - -//line grapheme_clusters.go:4095 - } - } - - if cs == 0 { - goto _out - } - p++ - if p != pe { - goto _resume - } - _test_eof: - { - } - if p == eof { - if _graphclust_eof_trans[cs] > 0 { - _trans = int(_graphclust_eof_trans[cs] - 1) - goto _eof_trans - } - } - - _out: - { - } - } - -//line grapheme_clusters.rl:117 - - // If we fall out here then we were unable to complete a sequence. - // If we weren't able to complete a sequence then either we've - // reached the end of a partial buffer (so there's more data to come) - // or we have an isolated symbol that would normally be part of a - // grapheme cluster but has appeared in isolation here. - - if !atEOF { - // Request more - return 0, nil, nil - } - - // Just take the first UTF-8 sequence and return that. - _, seqLen := utf8.DecodeRune(data) - return seqLen, data[:seqLen], nil -} diff --git a/vendor/github.com/apparentlymart/go-textseg/v13/textseg/grapheme_clusters.rl b/vendor/github.com/apparentlymart/go-textseg/v13/textseg/grapheme_clusters.rl deleted file mode 100644 index 737db18b..00000000 --- a/vendor/github.com/apparentlymart/go-textseg/v13/textseg/grapheme_clusters.rl +++ /dev/null @@ -1,133 +0,0 @@ -package textseg - -import ( - "errors" - "unicode/utf8" -) - -// Generated from grapheme_clusters.rl. DO NOT EDIT -%%{ - # (except you are actually in grapheme_clusters.rl here, so edit away!) - - machine graphclust; - write data; -}%% - -var Error = errors.New("invalid UTF8 text") - -// ScanGraphemeClusters is a split function for bufio.Scanner that splits -// on grapheme cluster boundaries. -func ScanGraphemeClusters(data []byte, atEOF bool) (int, []byte, error) { - if len(data) == 0 { - return 0, nil, nil - } - - // Ragel state - cs := 0 // Current State - p := 0 // "Pointer" into data - pe := len(data) // End-of-data "pointer" - ts := 0 - te := 0 - act := 0 - eof := pe - - // Make Go compiler happy - _ = ts - _ = te - _ = act - _ = eof - - startPos := 0 - endPos := 0 - - %%{ - include GraphemeCluster "grapheme_clusters_table.rl"; - include Emoji "emoji_table.rl"; - - action start { - startPos = p - } - - action end { - endPos = p - } - - action emit { - return endPos+1, data[startPos:endPos+1], nil - } - - ZWJGlue = ZWJ (Extended_Pictographic Extend*)?; - AnyExtender = Extend | ZWJGlue | SpacingMark; - Extension = AnyExtender*; - ReplacementChar = (0xEF 0xBF 0xBD); - - CRLFSeq = CR LF; - ControlSeq = Control | ReplacementChar; - HangulSeq = ( - L+ (((LV? V+ | LVT) T*)?|LV?) | - LV V* T* | - V+ T* | - LVT T* | - T+ - ) Extension; - EmojiSeq = Extended_Pictographic Extend* Extension; - ZWJSeq = ZWJ (ZWJ | Extend | SpacingMark)*; - EmojiFlagSeq = Regional_Indicator Regional_Indicator? Extension; - - UTF8Cont = 0x80 .. 0xBF; - AnyUTF8 = ( - 0x00..0x7F | - 0xC0..0xDF . UTF8Cont | - 0xE0..0xEF . UTF8Cont . UTF8Cont | - 0xF0..0xF7 . UTF8Cont . UTF8Cont . UTF8Cont - ); - - # OtherSeq is any character that isn't at the start of one of the extended sequences above, followed by extension - OtherSeq = (AnyUTF8 - (CR|LF|Control|ReplacementChar|L|LV|V|LVT|T|Extended_Pictographic|ZWJ|Regional_Indicator|Prepend)) (Extend | ZWJ | SpacingMark)*; - - # PrependSeq is prepend followed by any of the other patterns above, except control characters which explicitly break - PrependSeq = Prepend+ (HangulSeq|EmojiSeq|ZWJSeq|EmojiFlagSeq|OtherSeq)?; - - CRLFTok = CRLFSeq >start @end; - ControlTok = ControlSeq >start @end; - HangulTok = HangulSeq >start @end; - EmojiTok = EmojiSeq >start @end; - ZWJTok = ZWJSeq >start @end; - EmojiFlagTok = EmojiFlagSeq >start @end; - OtherTok = OtherSeq >start @end; - PrependTok = PrependSeq >start @end; - - main := |* - CRLFTok => emit; - ControlTok => emit; - HangulTok => emit; - EmojiTok => emit; - ZWJTok => emit; - EmojiFlagTok => emit; - PrependTok => emit; - OtherTok => emit; - - # any single valid UTF-8 character would also be valid per spec, - # but we'll handle that separately after the loop so we can deal - # with requesting more bytes if we're not at EOF. - *|; - - write init; - write exec; - }%% - - // If we fall out here then we were unable to complete a sequence. - // If we weren't able to complete a sequence then either we've - // reached the end of a partial buffer (so there's more data to come) - // or we have an isolated symbol that would normally be part of a - // grapheme cluster but has appeared in isolation here. - - if !atEOF { - // Request more - return 0, nil, nil - } - - // Just take the first UTF-8 sequence and return that. - _, seqLen := utf8.DecodeRune(data) - return seqLen, data[:seqLen], nil -} \ No newline at end of file diff --git a/vendor/github.com/apparentlymart/go-textseg/v13/textseg/grapheme_clusters_table.rl b/vendor/github.com/apparentlymart/go-textseg/v13/textseg/grapheme_clusters_table.rl deleted file mode 100644 index 803dca19..00000000 --- a/vendor/github.com/apparentlymart/go-textseg/v13/textseg/grapheme_clusters_table.rl +++ /dev/null @@ -1,1609 +0,0 @@ -# The following Ragel file was autogenerated with unicode2ragel.rb -# from: https://www.unicode.org/Public/13.0.0/ucd/auxiliary/GraphemeBreakProperty.txt -# -# It defines ["Prepend", "CR", "LF", "Control", "Extend", "Regional_Indicator", "SpacingMark", "L", "V", "T", "LV", "LVT", "ZWJ"]. -# -# To use this, make sure that your alphtype is set to byte, -# and that your input is in utf8. - -%%{ - machine GraphemeCluster; - - Prepend = - 0xD8 0x80..0x85 #Cf [6] ARABIC NUMBER SIGN..ARABIC NUMBER ... - | 0xDB 0x9D #Cf ARABIC END OF AYAH - | 0xDC 0x8F #Cf SYRIAC ABBREVIATION MARK - | 0xE0 0xA3 0xA2 #Cf ARABIC DISPUTED END OF AYAH - | 0xE0 0xB5 0x8E #Lo MALAYALAM LETTER DOT REPH - | 0xF0 0x91 0x82 0xBD #Cf KAITHI NUMBER SIGN - | 0xF0 0x91 0x83 0x8D #Cf KAITHI NUMBER SIGN ABOVE - | 0xF0 0x91 0x87 0x82..0x83 #Lo [2] SHARADA SIGN JIHVAMULIYA..SHARA... - | 0xF0 0x91 0xA4 0xBF #Lo DIVES AKURU PREFIXED NASAL SIGN - | 0xF0 0x91 0xA5 0x81 #Lo DIVES AKURU INITIAL RA - | 0xF0 0x91 0xA8 0xBA #Lo ZANABAZAR SQUARE CLUSTER-INITIAL L... - | 0xF0 0x91 0xAA 0x84..0x89 #Lo [6] SOYOMBO SIGN JIHVAMULIYA..SOYOM... - | 0xF0 0x91 0xB5 0x86 #Lo MASARAM GONDI REPHA - ; - - CR = - 0x0D #Cc - ; - - LF = - 0x0A #Cc - ; - - Control = - 0x00..0x09 #Cc [10] .. - | 0x0B..0x0C #Cc [2] .. - | 0x0E..0x1F #Cc [18] .. - | 0x7F #Cc [33] .. - | 0xC2 0x80..0x9F # - | 0xC2 0xAD #Cf SOFT HYPHEN - | 0xD8 0x9C #Cf ARABIC LETTER MARK - | 0xE1 0xA0 0x8E #Cf MONGOLIAN VOWEL SEPARATOR - | 0xE2 0x80 0x8B #Cf ZERO WIDTH SPACE - | 0xE2 0x80 0x8E..0x8F #Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT ... - | 0xE2 0x80 0xA8 #Zl LINE SEPARATOR - | 0xE2 0x80 0xA9 #Zp PARAGRAPH SEPARATOR - | 0xE2 0x80 0xAA..0xAE #Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-... - | 0xE2 0x81 0xA0..0xA4 #Cf [5] WORD JOINER..INVISIBLE PLUS - | 0xE2 0x81 0xA5 #Cn - | 0xE2 0x81 0xA6..0xAF #Cf [10] LEFT-TO-RIGHT ISOLATE..NOMINAL DIG... - | 0xEF 0xBB 0xBF #Cf ZERO WIDTH NO-BREAK SPACE - | 0xEF 0xBF 0xB0..0xB8 #Cn [9] .. - | 0xEF 0xBF 0xB9..0xBB #Cf [3] INTERLINEAR ANNOTATION ANCHOR..INT... - | 0xF0 0x93 0x90 0xB0..0xB8 #Cf [9] EGYPTIAN HIEROGLYPH VERTICAL JO... - | 0xF0 0x9B 0xB2 0xA0..0xA3 #Cf [4] SHORTHAND FORMAT LETTER OVERLAP... - | 0xF0 0x9D 0x85 0xB3..0xBA #Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSI... - | 0xF3 0xA0 0x80 0x80 #Cn - | 0xF3 0xA0 0x80 0x81 #Cf LANGUAGE TAG - | 0xF3 0xA0 0x80 0x82..0x9F #Cn [30] .. - | 0xF3 0xA0 0x82 0x80..0xFF #Cn [128] .. - | 0xF3 0xA0 0x83 0x00..0xBF # - | 0xF3 0xA0 0x87 0xB0..0xFF #Cn [3600] .. -# -# This script uses the unicode spec to generate a Ragel state machine -# that recognizes unicode alphanumeric characters. It generates 5 -# character classes: uupper, ulower, ualpha, udigit, and ualnum. -# Currently supported encodings are UTF-8 [default] and UCS-4. -# -# Usage: unicode2ragel.rb [options] -# -e, --encoding [ucs4 | utf8] Data encoding -# -h, --help Show this message -# -# This script was originally written as part of the Ferret search -# engine library. -# -# Author: Rakan El-Khalil - -require 'optparse' -require 'open-uri' - -ENCODINGS = [ :utf8, :ucs4 ] -ALPHTYPES = { :utf8 => "byte", :ucs4 => "rune" } -DEFAULT_CHART_URL = "http://www.unicode.org/Public/5.1.0/ucd/DerivedCoreProperties.txt" -DEFAULT_MACHINE_NAME= "WChar" - -### -# Display vars & default option - -TOTAL_WIDTH = 80 -RANGE_WIDTH = 23 -@encoding = :utf8 -@chart_url = DEFAULT_CHART_URL -machine_name = DEFAULT_MACHINE_NAME -properties = [] -@output = $stdout - -### -# Option parsing - -cli_opts = OptionParser.new do |opts| - opts.on("-e", "--encoding [ucs4 | utf8]", "Data encoding") do |o| - @encoding = o.downcase.to_sym - end - opts.on("-h", "--help", "Show this message") do - puts opts - exit - end - opts.on("-u", "--url URL", "URL to process") do |o| - @chart_url = o - end - opts.on("-m", "--machine MACHINE_NAME", "Machine name") do |o| - machine_name = o - end - opts.on("-p", "--properties x,y,z", Array, "Properties to add to machine") do |o| - properties = o - end - opts.on("-o", "--output FILE", "output file") do |o| - @output = File.new(o, "w+") - end -end - -cli_opts.parse(ARGV) -unless ENCODINGS.member? @encoding - puts "Invalid encoding: #{@encoding}" - puts cli_opts - exit -end - -## -# Downloads the document at url and yields every alpha line's hex -# range and description. - -def each_alpha( url, property ) - URI.open( url ) do |file| - file.each_line do |line| - next if line =~ /^#/; - next if line !~ /; #{property} *#/; - - range, description = line.split(/;/) - range.strip! - description.gsub!(/.*#/, '').strip! - - if range =~ /\.\./ - start, stop = range.split '..' - else start = stop = range - end - - yield start.hex .. stop.hex, description - end - end -end - -### -# Formats to hex at minimum width - -def to_hex( n ) - r = "%0X" % n - r = "0#{r}" unless (r.length % 2).zero? - r -end - -### -# UCS4 is just a straight hex conversion of the unicode codepoint. - -def to_ucs4( range ) - rangestr = "0x" + to_hex(range.begin) - rangestr << "..0x" + to_hex(range.end) if range.begin != range.end - [ rangestr ] -end - -## -# 0x00 - 0x7f -> 0zzzzzzz[7] -# 0x80 - 0x7ff -> 110yyyyy[5] 10zzzzzz[6] -# 0x800 - 0xffff -> 1110xxxx[4] 10yyyyyy[6] 10zzzzzz[6] -# 0x010000 - 0x10ffff -> 11110www[3] 10xxxxxx[6] 10yyyyyy[6] 10zzzzzz[6] - -UTF8_BOUNDARIES = [0x7f, 0x7ff, 0xffff, 0x10ffff] - -def to_utf8_enc( n ) - r = 0 - if n <= 0x7f - r = n - elsif n <= 0x7ff - y = 0xc0 | (n >> 6) - z = 0x80 | (n & 0x3f) - r = y << 8 | z - elsif n <= 0xffff - x = 0xe0 | (n >> 12) - y = 0x80 | (n >> 6) & 0x3f - z = 0x80 | n & 0x3f - r = x << 16 | y << 8 | z - elsif n <= 0x10ffff - w = 0xf0 | (n >> 18) - x = 0x80 | (n >> 12) & 0x3f - y = 0x80 | (n >> 6) & 0x3f - z = 0x80 | n & 0x3f - r = w << 24 | x << 16 | y << 8 | z - end - - to_hex(r) -end - -def from_utf8_enc( n ) - n = n.hex - r = 0 - if n <= 0x7f - r = n - elsif n <= 0xdfff - y = (n >> 8) & 0x1f - z = n & 0x3f - r = y << 6 | z - elsif n <= 0xefffff - x = (n >> 16) & 0x0f - y = (n >> 8) & 0x3f - z = n & 0x3f - r = x << 10 | y << 6 | z - elsif n <= 0xf7ffffff - w = (n >> 24) & 0x07 - x = (n >> 16) & 0x3f - y = (n >> 8) & 0x3f - z = n & 0x3f - r = w << 18 | x << 12 | y << 6 | z - end - r -end - -### -# Given a range, splits it up into ranges that can be continuously -# encoded into utf8. Eg: 0x00 .. 0xff => [0x00..0x7f, 0x80..0xff] -# This is not strictly needed since the current [5.1] unicode standard -# doesn't have ranges that straddle utf8 boundaries. This is included -# for completeness as there is no telling if that will ever change. - -def utf8_ranges( range ) - ranges = [] - UTF8_BOUNDARIES.each do |max| - if range.begin <= max - if range.end <= max - ranges << range - return ranges - end - - ranges << (range.begin .. max) - range = (max + 1) .. range.end - end - end - ranges -end - -def build_range( start, stop ) - size = start.size/2 - left = size - 1 - return [""] if size < 1 - - a = start[0..1] - b = stop[0..1] - - ### - # Shared prefix - - if a == b - return build_range(start[2..-1], stop[2..-1]).map do |elt| - "0x#{a} " + elt - end - end - - ### - # Unshared prefix, end of run - - return ["0x#{a}..0x#{b} "] if left.zero? - - ### - # Unshared prefix, not end of run - # Range can be 0x123456..0x56789A - # Which is equivalent to: - # 0x123456 .. 0x12FFFF - # 0x130000 .. 0x55FFFF - # 0x560000 .. 0x56789A - - ret = [] - ret << build_range(start, a + "FF" * left) - - ### - # Only generate middle range if need be. - - if a.hex+1 != b.hex - max = to_hex(b.hex - 1) - max = "FF" if b == "FF" - ret << "0x#{to_hex(a.hex+1)}..0x#{max} " + "0x00..0xFF " * left - end - - ### - # Don't generate last range if it is covered by first range - - ret << build_range(b + "00" * left, stop) unless b == "FF" - ret.flatten! -end - -def to_utf8( range ) - utf8_ranges( range ).map do |r| - begin_enc = to_utf8_enc(r.begin) - end_enc = to_utf8_enc(r.end) - build_range begin_enc, end_enc - end.flatten! -end - -## -# Perform a 3-way comparison of the number of codepoints advertised by -# the unicode spec for the given range, the originally parsed range, -# and the resulting utf8 encoded range. - -def count_codepoints( code ) - code.split(' ').inject(1) do |acc, elt| - if elt =~ /0x(.+)\.\.0x(.+)/ - if @encoding == :utf8 - acc * (from_utf8_enc($2) - from_utf8_enc($1) + 1) - else - acc * ($2.hex - $1.hex + 1) - end - else - acc - end - end -end - -def is_valid?( range, desc, codes ) - spec_count = 1 - spec_count = $1.to_i if desc =~ /\[(\d+)\]/ - range_count = range.end - range.begin + 1 - - sum = codes.inject(0) { |acc, elt| acc + count_codepoints(elt) } - sum == spec_count and sum == range_count -end - -## -# Generate the state maching to stdout - -def generate_machine( name, property ) - pipe = " " - @output.puts " #{name} = " - each_alpha( @chart_url, property ) do |range, desc| - - codes = (@encoding == :ucs4) ? to_ucs4(range) : to_utf8(range) - - #raise "Invalid encoding of range #{range}: #{codes.inspect}" unless - # is_valid? range, desc, codes - - range_width = codes.map { |a| a.size }.max - range_width = RANGE_WIDTH if range_width < RANGE_WIDTH - - desc_width = TOTAL_WIDTH - RANGE_WIDTH - 11 - desc_width -= (range_width - RANGE_WIDTH) if range_width > RANGE_WIDTH - - if desc.size > desc_width - desc = desc[0..desc_width - 4] + "..." - end - - codes.each_with_index do |r, idx| - desc = "" unless idx.zero? - code = "%-#{range_width}s" % r - @output.puts " #{pipe} #{code} ##{desc}" - pipe = "|" - end - end - @output.puts " ;" - @output.puts "" -end - -@output.puts <