aboutsummaryrefslogtreecommitdiffhomepage
path: root/modules
diff options
context:
space:
mode:
authorGravatar mitchell <70453897+667e-11@users.noreply.github.com>2016-03-26 13:57:02 -0400
committerGravatar mitchell <70453897+667e-11@users.noreply.github.com>2016-03-26 13:57:02 -0400
commit6be644ed9fe4fa893c0d561c6a25118aca548ae3 (patch)
treeddb6a91c8683c04c08e6dfe58ba355b142f206a4 /modules
parent274e7c46f33dcbc4d8af8264e24776a5c68c565c (diff)
Support UTF-8 Lua pattern matching.
Makes use of an external luautf8 library, but only a subset of it.
Diffstat (limited to 'modules')
-rw-r--r--modules/lua/lua.luadoc36
-rw-r--r--modules/textadept/find.lua13
2 files changed, 46 insertions, 3 deletions
diff --git a/modules/lua/lua.luadoc b/modules/lua/lua.luadoc
index f92adc48..8902482d 100644
--- a/modules/lua/lua.luadoc
+++ b/modules/lua/lua.luadoc
@@ -2046,3 +2046,39 @@ function lfs.touch(filepath [, atime [, mtime]]) end
-- Returns true if the operation was successful; in case of error, it returns
-- nil plus an error string.
function lfs.unlock(filehandle[, start[, length]]) end
+
+---
+-- UTF-8 version of `string.byte`.
+function utf8.byte(s [, i [, j]]) end
+
+---
+-- UTF-8 version of `string.find`.
+function utf8.find(s, pattern [, init [, plain]]) end
+
+---
+-- UTF-8 version of `string.gmatch`.
+function utf8.gmatch(s, pattern) end
+
+---
+-- UTF-8 version of `string.gsub`.
+function utf8.gsub(s, pattern, repl [, n]) end
+
+---
+-- UTF-8 version of `string.lower`.
+function utf8.lower(s) end
+
+---
+-- UTF-8 version of `string.match`.
+function utf8.match(s, pattern [, init]) end
+
+---
+-- UTF-8 version of `string.reverse`.
+function utf8.reverse(s) end
+
+---
+-- UTF-8 version of `string.sub`.
+function utf8.sub(s, i [, j]) end
+
+---
+-- UTF-8 version of `string.upper`.
+function utf8.upper(s) end
diff --git a/modules/textadept/find.lua b/modules/textadept/find.lua
index ea6a287e..a9769cb8 100644
--- a/modules/textadept/find.lua
+++ b/modules/textadept/find.lua
@@ -154,14 +154,21 @@ local function find_(text, next, flags, no_wrap, wrapped)
M.captures = nil -- clear captures from any previous Lua pattern searches
elseif flags < 16 then
-- Lua pattern search.
+ -- Note: I do not trust utf8.find completely, so only use it if there are
+ -- UTF-8 characters in patt. Otherwise default to string.find.
local patt = text:gsub('\\[abfnrtv\\]', escapes)
local s = next and buffer.current_pos or 0
local e = next and buffer.length or buffer.current_pos
- local caps = {buffer:text_range(s, e):find(next and patt or '^.*()'..patt)}
+ local find = not patt:find('[\xC2-\xF4]') and string.find or utf8.find
+ local caps = {find(buffer:text_range(s, e), next and patt or '^.*()'..patt)}
M.captures = {table.unpack(caps, next and 3 or 4)}
if #caps > 0 and caps[2] >= caps[1] then
- pos = buffer:position_relative(s, caps[next and 1 or 3] - 1)
- e = buffer:position_relative(s, caps[2])
+ if find == string.find then
+ pos, e = s + caps[next and 1 or 3] - 1, s + caps[2]
+ else
+ pos = buffer:position_relative(s, caps[next and 1 or 3] - 1)
+ e = buffer:position_relative(s, caps[2])
+ end
M.captures[0] = buffer:text_range(pos, e)
buffer:set_sel(e, pos)
end