diff options
author | mitchell <70453897+667e-11@users.noreply.github.com> | 2016-03-26 13:57:02 -0400 |
---|---|---|
committer | mitchell <70453897+667e-11@users.noreply.github.com> | 2016-03-26 13:57:02 -0400 |
commit | 6be644ed9fe4fa893c0d561c6a25118aca548ae3 (patch) | |
tree | ddb6a91c8683c04c08e6dfe58ba355b142f206a4 /modules | |
parent | 274e7c46f33dcbc4d8af8264e24776a5c68c565c (diff) |
Support UTF-8 Lua pattern matching.
Makes use of an external luautf8 library, but only a subset of it.
Diffstat (limited to 'modules')
-rw-r--r-- | modules/lua/lua.luadoc | 36 | ||||
-rw-r--r-- | modules/textadept/find.lua | 13 |
2 files changed, 46 insertions, 3 deletions
diff --git a/modules/lua/lua.luadoc b/modules/lua/lua.luadoc index f92adc48..8902482d 100644 --- a/modules/lua/lua.luadoc +++ b/modules/lua/lua.luadoc @@ -2046,3 +2046,39 @@ function lfs.touch(filepath [, atime [, mtime]]) end -- Returns true if the operation was successful; in case of error, it returns -- nil plus an error string. function lfs.unlock(filehandle[, start[, length]]) end + +--- +-- UTF-8 version of `string.byte`. +function utf8.byte(s [, i [, j]]) end + +--- +-- UTF-8 version of `string.find`. +function utf8.find(s, pattern [, init [, plain]]) end + +--- +-- UTF-8 version of `string.gmatch`. +function utf8.gmatch(s, pattern) end + +--- +-- UTF-8 version of `string.gsub`. +function utf8.gsub(s, pattern, repl [, n]) end + +--- +-- UTF-8 version of `string.lower`. +function utf8.lower(s) end + +--- +-- UTF-8 version of `string.match`. +function utf8.match(s, pattern [, init]) end + +--- +-- UTF-8 version of `string.reverse`. +function utf8.reverse(s) end + +--- +-- UTF-8 version of `string.sub`. +function utf8.sub(s, i [, j]) end + +--- +-- UTF-8 version of `string.upper`. +function utf8.upper(s) end diff --git a/modules/textadept/find.lua b/modules/textadept/find.lua index ea6a287e..a9769cb8 100644 --- a/modules/textadept/find.lua +++ b/modules/textadept/find.lua @@ -154,14 +154,21 @@ local function find_(text, next, flags, no_wrap, wrapped) M.captures = nil -- clear captures from any previous Lua pattern searches elseif flags < 16 then -- Lua pattern search. + -- Note: I do not trust utf8.find completely, so only use it if there are + -- UTF-8 characters in patt. Otherwise default to string.find. local patt = text:gsub('\\[abfnrtv\\]', escapes) local s = next and buffer.current_pos or 0 local e = next and buffer.length or buffer.current_pos - local caps = {buffer:text_range(s, e):find(next and patt or '^.*()'..patt)} + local find = not patt:find('[\xC2-\xF4]') and string.find or utf8.find + local caps = {find(buffer:text_range(s, e), next and patt or '^.*()'..patt)} M.captures = {table.unpack(caps, next and 3 or 4)} if #caps > 0 and caps[2] >= caps[1] then - pos = buffer:position_relative(s, caps[next and 1 or 3] - 1) - e = buffer:position_relative(s, caps[2]) + if find == string.find then + pos, e = s + caps[next and 1 or 3] - 1, s + caps[2] + else + pos = buffer:position_relative(s, caps[next and 1 or 3] - 1) + e = buffer:position_relative(s, caps[2]) + end M.captures[0] = buffer:text_range(pos, e) buffer:set_sel(e, pos) end |