aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar mitchell <70453897+667e-11@users.noreply.github.com>2009-03-01 16:32:59 -0500
committerGravatar mitchell <70453897+667e-11@users.noreply.github.com>2009-03-01 16:32:59 -0500
commit5919fc6b3a124f215c50bf1d187b37dad5a37858 (patch)
treee71e49a5b34f83945de48b0d255ea6c133ed3e63
parentc2c910627f6b8dc4bd638d2d83b3dcf83d92b857 (diff)
Added support for multiple character encodings through g_convert().
Can open, save, and convert between different character encodings now.
-rw-r--r--core/.buffer.lua13
-rw-r--r--core/events.lua6
-rw-r--r--core/ext/menu.lua31
-rw-r--r--core/file_io.lua97
-rw-r--r--core/locale.conf44
5 files changed, 151 insertions, 40 deletions
diff --git a/core/.buffer.lua b/core/.buffer.lua
index 62672fa5..af1221b9 100644
--- a/core/.buffer.lua
+++ b/core/.buffer.lua
@@ -19,6 +19,9 @@ module('buffer')
-- since it was last saved.
-- @field filename The absolute path to the file associated with this buffer.
-- It is encoded in UTF-8. Use 'textadept.iconv()' for charset conversions.
+-- @field encoding The encoding of the file on the hard disk. It will be nil if
+-- the file is a binary file.
+-- @field encoding_bom The byte-order mark of the file encoding (if any).
-- @field anchor The position of the opposite end of the selection to the
-- caret.
-- @field auto_c_auto_hide Flag indicating whether or not autocompletion is
@@ -229,7 +232,8 @@ module('buffer')
-- @field x_offset The horizontal scroll position.
-- @field zoom The zoom level added to all font sizes. +: magnify, -: reduce.
buffer = {
- doc_pointer = nil, dirty = nil, filename = nil
+ doc_pointer = nil, dirty = nil, filename = nil,
+ encoding = nil, encoding_bom = nil,
anchor = nil,
auto_c_auto_hide = nil,
auto_c_cancel_at_start = nil,
@@ -374,10 +378,9 @@ function buffer:text_range(start_pos, end_pos)
---
-- Deletes the current buffer.
--- The indexed buffer must be the currently focused one.
--- WARNING: this function buffer:should NOT be called via scripts.
--- textadept.io provides a close() function buffer:for buffers to prompt for
--- confirmation if necessary while this function buffer:does not.
+-- The indexed buffer must be the currently focused one. WARNING: this function
+-- should NOT be called via scripts. textadept.io provides a close() function
+-- for buffers to prompt for confirmation if necessary; this function does not.
-- Activates the 'buffer_deleted' signal.
function buffer:delete()
diff --git a/core/events.lua b/core/events.lua
index 6767ebac..67b4f813 100644
--- a/core/events.lua
+++ b/core/events.lua
@@ -399,12 +399,12 @@ add_handler('update_ui',
local line, max = buffer:line_from_position(pos) + 1, buffer.line_count
local col = buffer.column[pos] + 1
local lexer = buffer:get_lexer_language()
- local mode = buffer.overtype and locale.STATUS_OVR or locale.STATUS_INS
local eol = EOLs[buffer.eol_mode + 1]
local tabs = (buffer.use_tabs and locale.STATUS_TABS or
locale.STATUS_SPACES)..buffer.indent
+ local enc = buffer.encoding or ''
textadept.docstatusbar_text =
- locale.DOCSTATUSBAR_TEXT:format(line, max, col, lexer, mode, eol, tabs)
+ locale.DOCSTATUSBAR_TEXT:format(line, max, col, lexer, eol, tabs, enc)
end)
add_handler('margin_click',
@@ -418,9 +418,11 @@ add_handler('buffer_new',
function() -- set additional buffer functions
local buffer = buffer
buffer.reload = textadept.io.reload
+ buffer.set_encoding = textadept.io.set_encoding
buffer.save = textadept.io.save
buffer.save_as = textadept.io.save_as
buffer.close = textadept.io.close
+ buffer.encoding = 'UTF-8'
set_title(buffer)
end)
diff --git a/core/ext/menu.lua b/core/ext/menu.lua
index eafcefbe..963967ab 100644
--- a/core/ext/menu.lua
+++ b/core/ext/menu.lua
@@ -100,7 +100,7 @@ local ID = {
START_RECORDING_MACRO = 413,
STOP_RECORDING_MACRO = 414,
PLAY_MACRO = 415,
- -- Buffers
+ -- Buffer
NEXT_BUFFER = 501,
PREV_BUFFER = 502,
TOGGLE_VIEW_EOL = 503,
@@ -111,8 +111,13 @@ local ID = {
EOL_MODE_CRLF = 509,
EOL_MODE_CR = 510,
EOL_MODE_LF = 511,
+ ENCODING_UTF8 = 512,
+ ENCODING_ASCII = 513,
+ ENCODING_ISO88591 = 514,
+ ENCODING_MACROMAN = 515,
+ ENCODING_UTF16 = 516,
REFRESH_SYNTAX_HIGHLIGHTING = 508,
- -- Views
+ -- View
NEXT_VIEW = 601,
PREV_VIEW = 602,
SPLIT_VIEW_VERTICAL = 603,
@@ -261,6 +266,13 @@ local menubar = {
{ l.MENU_BUF_EOL_MODE_CR, ID.EOL_MODE_CR },
{ l.MENU_BUF_EOL_MODE_LF, ID.EOL_MODE_LF },
},
+ { title = l.MENU_BUF_ENCODING_TITLE,
+ { l.MENU_BUF_ENCODING_UTF8, ID.ENCODING_UTF8 },
+ { l.MENU_BUF_ENCODING_ASCII, ID.ENCODING_ASCII },
+ { l.MENU_BUF_ENCODING_ISO88591, ID.ENCODING_ISO88591 },
+ { l.MENU_BUF_ENCODING_MACROMAN, ID.ENCODING_MACROMAN },
+ { l.MENU_BUF_ENCODING_UTF16, ID.ENCODING_UTF16 },
+ },
{ SEPARATOR, ID.SEPARATOR },
{ l.MENU_BUF_REFRESH, ID.REFRESH_SYNTAX_HIGHLIGHTING },
},
@@ -297,6 +309,10 @@ local m_bookmarks = _m.textadept.bookmarks
local m_macros = _m.textadept.macros
local m_run = _m.textadept.run
+local function set_encoding(encoding)
+ buffer:set_encoding(encoding)
+ t.events.update_ui() -- for updating statusbar
+end
local function toggle_setting(setting)
local state = buffer[setting]
if type(state) == 'boolean' then
@@ -314,7 +330,7 @@ end
local function set_lexer_language(lexer)
buffer:set_lexer_language(lexer)
buffer:colourise(0, -1)
- textadept.events.update_ui() -- for updating statusbar
+ t.events.update_ui() -- for updating statusbar
end
local actions = {
@@ -416,7 +432,7 @@ local actions = {
[ID.START_RECORDING_MACRO] = { m_macros.start_recording },
[ID.STOP_RECORDING_MACRO] = { m_macros.stop_recording },
[ID.PLAY_MACRO] = { m_macros.play },
- -- Buffers
+ -- Buffer
[ID.NEXT_BUFFER] = { 'goto_buffer', v, 1, false },
[ID.PREV_BUFFER] = { 'goto_buffer', v, -1, false },
[ID.TOGGLE_VIEW_EOL] = { toggle_setting, 'view_eol' },
@@ -427,8 +443,13 @@ local actions = {
[ID.EOL_MODE_CRLF] = { set_eol_mode, 0 },
[ID.EOL_MODE_CR] = { set_eol_mode, 1 },
[ID.EOL_MODE_LF] = { set_eol_mode, 2 },
+ [ID.ENCODING_UTF8] = { set_encoding, 'UTF-8' },
+ [ID.ENCODING_ASCII] = { set_encoding, 'ASCII' },
+ [ID.ENCODING_ISO88591] = { set_encoding, 'ISO-8859-1' },
+ [ID.ENCODING_MACROMAN] = { set_encoding, 'MacRoman' },
+ [ID.ENCODING_UTF16] = { set_encoding, 'UTF-16LE' },
[ID.REFRESH_SYNTAX_HIGHLIGHTING] = { 'colourise', b, 0, -1 },
- -- Views
+ -- View
[ID.NEXT_VIEW] = { t.goto_view, 1, false },
[ID.PREV_VIEW] = { t.goto_view, -1, false },
[ID.SPLIT_VIEW_VERTICAL] = { 'split', v },
diff --git a/core/file_io.lua b/core/file_io.lua
index 58494db0..e6415542 100644
--- a/core/file_io.lua
+++ b/core/file_io.lua
@@ -21,6 +21,41 @@ local lfs = require 'lfs'
recent_files = {}
---
+-- List of byte-order marks (BOMs).
+-- @class table
+-- @name boms
+boms = {
+ ['UTF-16BE'] = string.char(254, 255),
+ ['UTF-16LE'] = string.char(255, 254),
+ ['UTF-32BE'] = string.char(0, 0, 254, 255),
+ ['UTF-32LE'] = string.char(255, 254, 0, 0)
+}
+
+---
+-- [Local function] Attempt to detect the encoding of the given text.
+-- @param text Text to determine encoding from.
+-- @return encoding string for textadept.iconv(), byte-order mark (BOM) string
+-- or nil. If encoding string is nil, the text belongs to a binary file.
+local function detect_encoding(text)
+ local b1, b2, b3, b4 = string.byte(text, 1, 4)
+ if b1 == 239 and b2 == 187 and b3 == 191 then
+ return 'UTF-8', string.char(239, 187, 191)
+ elseif b1 == 254 and b2 == 255 then
+ return 'UTF-16BE', boms[encoding]
+ elseif b1 == 255 and b2 == 254 then
+ return 'UTF-16LE', boms[encoding]
+ elseif b1 == 0 and b2 == 0 and b3 == 254 and b4 == 255 then
+ return 'UTF-32BE', boms[encoding]
+ elseif b1 == 255 and b2 == 254 and b3 == 0 and b4 == 0 then
+ return 'UTF-32LE', boms[encoding]
+ else
+ local chunk = #text > 65536 and text:sub(1, 65536) or text
+ if chunk:find('\0') then return nil end -- binary file
+ end
+ return 'UTF-8'
+end
+
+---
-- [Local function] Opens a file or goes to its already open buffer.
-- @param utf8_filename The absolute path to the file to open. Must be UTF-8
-- encoded.
@@ -43,11 +78,14 @@ local function open_helper(utf8_filename)
end
local buffer = textadept.new_buffer()
if text then
- -- Check for binary file. If it is one, it's not UTF-8
- local chunk = #text > 65536 and text:sub(1, 65536) or text
- if chunk:find('\0') then buffer.code_page = 0 end
- -- Tries to set the buffer's EOL mode appropriately based on the file.
local c = textadept.constants
+ -- Tries to detect character encoding and convert text from it to UTF-8.
+ local encoding, encoding_bom = detect_encoding(text)
+ if encoding_bom then text = text:sub(#encoding_bom + 1, -1) end
+ if encoding then text = textadept.iconv(text, 'UTF-8', encoding) end
+ buffer.encoding, buffer.encoding_bom = encoding, encoding_bom
+ buffer.code_page = encoding and c.SC_CP_UTF8 or 0
+ -- Tries to set the buffer's EOL mode appropriately based on the file.
local s, e = text:find('\r\n?')
if s and e then
buffer.eol_mode = (s == e and c.SC_EOL_CR or c.SC_EOL_CRLF)
@@ -99,18 +137,46 @@ end
function reload(buffer)
textadept.check_focused_buffer(buffer)
if not buffer.filename then return end
- local utf8_filename = buffer.filename
- local filename = textadept.iconv(utf8_filename, _CHARSET, 'UTF-8')
- local f = io.open(filename, 'rb')
- if not f then return end
local pos = buffer.current_pos
local first_visible_line = buffer.first_visible_line
- buffer:set_text(f:read('*all'))
+ local filename = textadept.iconv(buffer.filename, _CHARSET, 'UTF-8')
+ local f, err = io.open(filename, 'rb')
+ if not f then return end
+ local text = f:read('*all')
+ f:close()
+ local encoding, encoding_bom = buffer.encoding, buffer.encoding_bom
+ if encoding_bom then text = text:sub(#encoding_bom + 1, -1) end
+ if encoding then text = textadept.iconv(text, 'UTF-8', encoding) end
+ buffer:clear_all()
+ buffer:add_text(text, #text)
buffer:line_scroll(0, first_visible_line)
buffer:goto_pos(pos)
buffer:set_save_point()
buffer.modification_time = lfs.attributes(filename).modification
- f:close()
+end
+
+---
+-- Sets the encoding for the buffer, converting its contents in the process.
+-- @param buffer The buffer to set the encoding for. It must be the currently
+-- focused buffer.
+-- @param encoding The encoding to set. Valid encodings are ones that GTK's
+-- g_convert() function accepts (typically GNU iconv's encodings).
+-- @usage buffer:set_encoding('ASCII')
+function set_encoding(buffer, encoding)
+ textadept.check_focused_buffer(buffer)
+ if not buffer.encoding then error('Cannot change binary file encoding') end
+ local iconv = textadept.iconv
+ local pos = buffer.current_pos
+ local first_visible_line = buffer.first_visible_line
+ local text = buffer:text_range(0, buffer.length)
+ text = iconv(text, buffer.encoding, 'UTF-8')
+ text = iconv(text, encoding, buffer.encoding)
+ text = iconv(text, 'UTF-8', encoding)
+ buffer:clear_all()
+ buffer:add_text(text, #text)
+ buffer:line_scroll(0, first_visible_line)
+ buffer:goto_pos(pos)
+ buffer.encoding, buffer.encoding_bom = encoding, boms[encoding]
end
---
@@ -123,12 +189,15 @@ function save(buffer)
if not buffer.filename then return save_as(buffer) end
prepare = _m.textadept.editing.prepare_for_save
if prepare then prepare() end
- local utf8_filename = buffer.filename
- local filename = textadept.iconv(utf8_filename, _CHARSET, 'UTF-8')
+ local text = buffer:text_range(0, buffer.length)
+ if buffer.encoding then
+ local bom = buffer.encoding_bom or ''
+ text = bom..textadept.iconv(text, buffer.encoding, 'UTF-8')
+ end
+ local filename = textadept.iconv(buffer.filename, _CHARSET, 'UTF-8')
local f, err = io.open(filename, 'wb')
if f then
- local txt, _ = buffer:get_text(buffer.length)
- f:write(txt)
+ f:write(text)
f:close()
buffer:set_save_point()
buffer.modification_time = lfs.attributes(filename).modification
diff --git a/core/locale.conf b/core/locale.conf
index 576aaf9f..4004e4ff 100644
--- a/core/locale.conf
+++ b/core/locale.conf
@@ -28,14 +28,6 @@ UNTITLED "Untitled"
MESSAGE_BUFFER "[Message Buffer]"
% core/events.lua
-% "OVR"
-STATUS_OVR "OVR"
-
-% core/events.lua
-% "INS"
-STATUS_INS "INS"
-
-% core/events.lua
% "CRLF"
STATUS_CRLF "CRLF"
@@ -56,8 +48,8 @@ STATUS_TABS "Tabs: "
STATUS_SPACES "Spaces: "
% core/events.lua
-% "Line: %d/%d Col: %d Lexer: %s %s %s %s"
-DOCSTATUSBAR_TEXT "Line: %d/%d Col: %d Lexer: %s %s %s %s"
+% "Line: %d/%d Col: %d %s %s %s %s"
+DOCSTATUSBAR_TEXT "Line: %d/%d Col: %d %s %s %s %s"
% core/events.lua
% "Save?"
@@ -536,8 +528,8 @@ MENU_TOOLS_MACROS_STOP "S_top Recording"
MENU_TOOLS_MACROS_PLAY "_Play Macro"
% core/ext/menu.lua
-% "_Buffers"
-MENU_BUF_TITLE "_Buffers"
+% "_Buffer"
+MENU_BUF_TITLE "_Buffer"
% core/ext/menu.lua
% "_Next Buffer"
@@ -584,12 +576,36 @@ MENU_BUF_EOL_MODE_CR "CR"
MENU_BUF_EOL_MODE_LF "LF"
% core/ext/menu.lua
+% "Encoding"
+MENU_BUF_ENCODING_TITLE "Encoding"
+
+% core/ext/menu.lua
+% "UTF-8"
+MENU_BUF_ENCODING_UTF8 "UTF-8"
+
+% core/ext/menu.lua
+% "ASCII"
+MENU_BUF_ENCODING_ASCII "ASCII"
+
+% core/ext/menu.lua
+% "ISO-8859-1"
+MENU_BUF_ENCODING_ISO88591 "ISO-8859-1"
+
+% core/ext/menu.lua
+% "MacRoman"
+MENU_BUF_ENCODING_MACROMAN "MacRoman"
+
+% core/ext/menu.lua
+% "UTF-16"
+MENU_BUF_ENCODING_UTF16 "UTF-16"
+
+% core/ext/menu.lua
% "_Refresh Syntax Highlighting"
MENU_BUF_REFRESH "_Refresh Syntax Highlighting"
% core/ext/menu.lua
-% "_Views"
-MENU_VIEW_TITLE "_Views"
+% "_View"
+MENU_VIEW_TITLE "_View"
% core/ext/menu.lua
% "_Next View"