diff options
-rw-r--r-- | core/.buffer.lua | 13 | ||||
-rw-r--r-- | core/events.lua | 6 | ||||
-rw-r--r-- | core/ext/menu.lua | 31 | ||||
-rw-r--r-- | core/file_io.lua | 97 | ||||
-rw-r--r-- | core/locale.conf | 44 |
5 files changed, 151 insertions, 40 deletions
diff --git a/core/.buffer.lua b/core/.buffer.lua index 62672fa5..af1221b9 100644 --- a/core/.buffer.lua +++ b/core/.buffer.lua @@ -19,6 +19,9 @@ module('buffer') -- since it was last saved. -- @field filename The absolute path to the file associated with this buffer. -- It is encoded in UTF-8. Use 'textadept.iconv()' for charset conversions. +-- @field encoding The encoding of the file on the hard disk. It will be nil if +-- the file is a binary file. +-- @field encoding_bom The byte-order mark of the file encoding (if any). -- @field anchor The position of the opposite end of the selection to the -- caret. -- @field auto_c_auto_hide Flag indicating whether or not autocompletion is @@ -229,7 +232,8 @@ module('buffer') -- @field x_offset The horizontal scroll position. -- @field zoom The zoom level added to all font sizes. +: magnify, -: reduce. buffer = { - doc_pointer = nil, dirty = nil, filename = nil + doc_pointer = nil, dirty = nil, filename = nil, + encoding = nil, encoding_bom = nil, anchor = nil, auto_c_auto_hide = nil, auto_c_cancel_at_start = nil, @@ -374,10 +378,9 @@ function buffer:text_range(start_pos, end_pos) --- -- Deletes the current buffer. --- The indexed buffer must be the currently focused one. --- WARNING: this function buffer:should NOT be called via scripts. --- textadept.io provides a close() function buffer:for buffers to prompt for --- confirmation if necessary while this function buffer:does not. +-- The indexed buffer must be the currently focused one. WARNING: this function +-- should NOT be called via scripts. textadept.io provides a close() function +-- for buffers to prompt for confirmation if necessary; this function does not. -- Activates the 'buffer_deleted' signal. function buffer:delete() diff --git a/core/events.lua b/core/events.lua index 6767ebac..67b4f813 100644 --- a/core/events.lua +++ b/core/events.lua @@ -399,12 +399,12 @@ add_handler('update_ui', local line, max = buffer:line_from_position(pos) + 1, buffer.line_count local col = buffer.column[pos] + 1 local lexer = buffer:get_lexer_language() - local mode = buffer.overtype and locale.STATUS_OVR or locale.STATUS_INS local eol = EOLs[buffer.eol_mode + 1] local tabs = (buffer.use_tabs and locale.STATUS_TABS or locale.STATUS_SPACES)..buffer.indent + local enc = buffer.encoding or '' textadept.docstatusbar_text = - locale.DOCSTATUSBAR_TEXT:format(line, max, col, lexer, mode, eol, tabs) + locale.DOCSTATUSBAR_TEXT:format(line, max, col, lexer, eol, tabs, enc) end) add_handler('margin_click', @@ -418,9 +418,11 @@ add_handler('buffer_new', function() -- set additional buffer functions local buffer = buffer buffer.reload = textadept.io.reload + buffer.set_encoding = textadept.io.set_encoding buffer.save = textadept.io.save buffer.save_as = textadept.io.save_as buffer.close = textadept.io.close + buffer.encoding = 'UTF-8' set_title(buffer) end) diff --git a/core/ext/menu.lua b/core/ext/menu.lua index eafcefbe..963967ab 100644 --- a/core/ext/menu.lua +++ b/core/ext/menu.lua @@ -100,7 +100,7 @@ local ID = { START_RECORDING_MACRO = 413, STOP_RECORDING_MACRO = 414, PLAY_MACRO = 415, - -- Buffers + -- Buffer NEXT_BUFFER = 501, PREV_BUFFER = 502, TOGGLE_VIEW_EOL = 503, @@ -111,8 +111,13 @@ local ID = { EOL_MODE_CRLF = 509, EOL_MODE_CR = 510, EOL_MODE_LF = 511, + ENCODING_UTF8 = 512, + ENCODING_ASCII = 513, + ENCODING_ISO88591 = 514, + ENCODING_MACROMAN = 515, + ENCODING_UTF16 = 516, REFRESH_SYNTAX_HIGHLIGHTING = 508, - -- Views + -- View NEXT_VIEW = 601, PREV_VIEW = 602, SPLIT_VIEW_VERTICAL = 603, @@ -261,6 +266,13 @@ local menubar = { { l.MENU_BUF_EOL_MODE_CR, ID.EOL_MODE_CR }, { l.MENU_BUF_EOL_MODE_LF, ID.EOL_MODE_LF }, }, + { title = l.MENU_BUF_ENCODING_TITLE, + { l.MENU_BUF_ENCODING_UTF8, ID.ENCODING_UTF8 }, + { l.MENU_BUF_ENCODING_ASCII, ID.ENCODING_ASCII }, + { l.MENU_BUF_ENCODING_ISO88591, ID.ENCODING_ISO88591 }, + { l.MENU_BUF_ENCODING_MACROMAN, ID.ENCODING_MACROMAN }, + { l.MENU_BUF_ENCODING_UTF16, ID.ENCODING_UTF16 }, + }, { SEPARATOR, ID.SEPARATOR }, { l.MENU_BUF_REFRESH, ID.REFRESH_SYNTAX_HIGHLIGHTING }, }, @@ -297,6 +309,10 @@ local m_bookmarks = _m.textadept.bookmarks local m_macros = _m.textadept.macros local m_run = _m.textadept.run +local function set_encoding(encoding) + buffer:set_encoding(encoding) + t.events.update_ui() -- for updating statusbar +end local function toggle_setting(setting) local state = buffer[setting] if type(state) == 'boolean' then @@ -314,7 +330,7 @@ end local function set_lexer_language(lexer) buffer:set_lexer_language(lexer) buffer:colourise(0, -1) - textadept.events.update_ui() -- for updating statusbar + t.events.update_ui() -- for updating statusbar end local actions = { @@ -416,7 +432,7 @@ local actions = { [ID.START_RECORDING_MACRO] = { m_macros.start_recording }, [ID.STOP_RECORDING_MACRO] = { m_macros.stop_recording }, [ID.PLAY_MACRO] = { m_macros.play }, - -- Buffers + -- Buffer [ID.NEXT_BUFFER] = { 'goto_buffer', v, 1, false }, [ID.PREV_BUFFER] = { 'goto_buffer', v, -1, false }, [ID.TOGGLE_VIEW_EOL] = { toggle_setting, 'view_eol' }, @@ -427,8 +443,13 @@ local actions = { [ID.EOL_MODE_CRLF] = { set_eol_mode, 0 }, [ID.EOL_MODE_CR] = { set_eol_mode, 1 }, [ID.EOL_MODE_LF] = { set_eol_mode, 2 }, + [ID.ENCODING_UTF8] = { set_encoding, 'UTF-8' }, + [ID.ENCODING_ASCII] = { set_encoding, 'ASCII' }, + [ID.ENCODING_ISO88591] = { set_encoding, 'ISO-8859-1' }, + [ID.ENCODING_MACROMAN] = { set_encoding, 'MacRoman' }, + [ID.ENCODING_UTF16] = { set_encoding, 'UTF-16LE' }, [ID.REFRESH_SYNTAX_HIGHLIGHTING] = { 'colourise', b, 0, -1 }, - -- Views + -- View [ID.NEXT_VIEW] = { t.goto_view, 1, false }, [ID.PREV_VIEW] = { t.goto_view, -1, false }, [ID.SPLIT_VIEW_VERTICAL] = { 'split', v }, diff --git a/core/file_io.lua b/core/file_io.lua index 58494db0..e6415542 100644 --- a/core/file_io.lua +++ b/core/file_io.lua @@ -21,6 +21,41 @@ local lfs = require 'lfs' recent_files = {} --- +-- List of byte-order marks (BOMs). +-- @class table +-- @name boms +boms = { + ['UTF-16BE'] = string.char(254, 255), + ['UTF-16LE'] = string.char(255, 254), + ['UTF-32BE'] = string.char(0, 0, 254, 255), + ['UTF-32LE'] = string.char(255, 254, 0, 0) +} + +--- +-- [Local function] Attempt to detect the encoding of the given text. +-- @param text Text to determine encoding from. +-- @return encoding string for textadept.iconv(), byte-order mark (BOM) string +-- or nil. If encoding string is nil, the text belongs to a binary file. +local function detect_encoding(text) + local b1, b2, b3, b4 = string.byte(text, 1, 4) + if b1 == 239 and b2 == 187 and b3 == 191 then + return 'UTF-8', string.char(239, 187, 191) + elseif b1 == 254 and b2 == 255 then + return 'UTF-16BE', boms[encoding] + elseif b1 == 255 and b2 == 254 then + return 'UTF-16LE', boms[encoding] + elseif b1 == 0 and b2 == 0 and b3 == 254 and b4 == 255 then + return 'UTF-32BE', boms[encoding] + elseif b1 == 255 and b2 == 254 and b3 == 0 and b4 == 0 then + return 'UTF-32LE', boms[encoding] + else + local chunk = #text > 65536 and text:sub(1, 65536) or text + if chunk:find('\0') then return nil end -- binary file + end + return 'UTF-8' +end + +--- -- [Local function] Opens a file or goes to its already open buffer. -- @param utf8_filename The absolute path to the file to open. Must be UTF-8 -- encoded. @@ -43,11 +78,14 @@ local function open_helper(utf8_filename) end local buffer = textadept.new_buffer() if text then - -- Check for binary file. If it is one, it's not UTF-8 - local chunk = #text > 65536 and text:sub(1, 65536) or text - if chunk:find('\0') then buffer.code_page = 0 end - -- Tries to set the buffer's EOL mode appropriately based on the file. local c = textadept.constants + -- Tries to detect character encoding and convert text from it to UTF-8. + local encoding, encoding_bom = detect_encoding(text) + if encoding_bom then text = text:sub(#encoding_bom + 1, -1) end + if encoding then text = textadept.iconv(text, 'UTF-8', encoding) end + buffer.encoding, buffer.encoding_bom = encoding, encoding_bom + buffer.code_page = encoding and c.SC_CP_UTF8 or 0 + -- Tries to set the buffer's EOL mode appropriately based on the file. local s, e = text:find('\r\n?') if s and e then buffer.eol_mode = (s == e and c.SC_EOL_CR or c.SC_EOL_CRLF) @@ -99,18 +137,46 @@ end function reload(buffer) textadept.check_focused_buffer(buffer) if not buffer.filename then return end - local utf8_filename = buffer.filename - local filename = textadept.iconv(utf8_filename, _CHARSET, 'UTF-8') - local f = io.open(filename, 'rb') - if not f then return end local pos = buffer.current_pos local first_visible_line = buffer.first_visible_line - buffer:set_text(f:read('*all')) + local filename = textadept.iconv(buffer.filename, _CHARSET, 'UTF-8') + local f, err = io.open(filename, 'rb') + if not f then return end + local text = f:read('*all') + f:close() + local encoding, encoding_bom = buffer.encoding, buffer.encoding_bom + if encoding_bom then text = text:sub(#encoding_bom + 1, -1) end + if encoding then text = textadept.iconv(text, 'UTF-8', encoding) end + buffer:clear_all() + buffer:add_text(text, #text) buffer:line_scroll(0, first_visible_line) buffer:goto_pos(pos) buffer:set_save_point() buffer.modification_time = lfs.attributes(filename).modification - f:close() +end + +--- +-- Sets the encoding for the buffer, converting its contents in the process. +-- @param buffer The buffer to set the encoding for. It must be the currently +-- focused buffer. +-- @param encoding The encoding to set. Valid encodings are ones that GTK's +-- g_convert() function accepts (typically GNU iconv's encodings). +-- @usage buffer:set_encoding('ASCII') +function set_encoding(buffer, encoding) + textadept.check_focused_buffer(buffer) + if not buffer.encoding then error('Cannot change binary file encoding') end + local iconv = textadept.iconv + local pos = buffer.current_pos + local first_visible_line = buffer.first_visible_line + local text = buffer:text_range(0, buffer.length) + text = iconv(text, buffer.encoding, 'UTF-8') + text = iconv(text, encoding, buffer.encoding) + text = iconv(text, 'UTF-8', encoding) + buffer:clear_all() + buffer:add_text(text, #text) + buffer:line_scroll(0, first_visible_line) + buffer:goto_pos(pos) + buffer.encoding, buffer.encoding_bom = encoding, boms[encoding] end --- @@ -123,12 +189,15 @@ function save(buffer) if not buffer.filename then return save_as(buffer) end prepare = _m.textadept.editing.prepare_for_save if prepare then prepare() end - local utf8_filename = buffer.filename - local filename = textadept.iconv(utf8_filename, _CHARSET, 'UTF-8') + local text = buffer:text_range(0, buffer.length) + if buffer.encoding then + local bom = buffer.encoding_bom or '' + text = bom..textadept.iconv(text, buffer.encoding, 'UTF-8') + end + local filename = textadept.iconv(buffer.filename, _CHARSET, 'UTF-8') local f, err = io.open(filename, 'wb') if f then - local txt, _ = buffer:get_text(buffer.length) - f:write(txt) + f:write(text) f:close() buffer:set_save_point() buffer.modification_time = lfs.attributes(filename).modification diff --git a/core/locale.conf b/core/locale.conf index 576aaf9f..4004e4ff 100644 --- a/core/locale.conf +++ b/core/locale.conf @@ -28,14 +28,6 @@ UNTITLED "Untitled" MESSAGE_BUFFER "[Message Buffer]" % core/events.lua -% "OVR" -STATUS_OVR "OVR" - -% core/events.lua -% "INS" -STATUS_INS "INS" - -% core/events.lua % "CRLF" STATUS_CRLF "CRLF" @@ -56,8 +48,8 @@ STATUS_TABS "Tabs: " STATUS_SPACES "Spaces: " % core/events.lua -% "Line: %d/%d Col: %d Lexer: %s %s %s %s" -DOCSTATUSBAR_TEXT "Line: %d/%d Col: %d Lexer: %s %s %s %s" +% "Line: %d/%d Col: %d %s %s %s %s" +DOCSTATUSBAR_TEXT "Line: %d/%d Col: %d %s %s %s %s" % core/events.lua % "Save?" @@ -536,8 +528,8 @@ MENU_TOOLS_MACROS_STOP "S_top Recording" MENU_TOOLS_MACROS_PLAY "_Play Macro" % core/ext/menu.lua -% "_Buffers" -MENU_BUF_TITLE "_Buffers" +% "_Buffer" +MENU_BUF_TITLE "_Buffer" % core/ext/menu.lua % "_Next Buffer" @@ -584,12 +576,36 @@ MENU_BUF_EOL_MODE_CR "CR" MENU_BUF_EOL_MODE_LF "LF" % core/ext/menu.lua +% "Encoding" +MENU_BUF_ENCODING_TITLE "Encoding" + +% core/ext/menu.lua +% "UTF-8" +MENU_BUF_ENCODING_UTF8 "UTF-8" + +% core/ext/menu.lua +% "ASCII" +MENU_BUF_ENCODING_ASCII "ASCII" + +% core/ext/menu.lua +% "ISO-8859-1" +MENU_BUF_ENCODING_ISO88591 "ISO-8859-1" + +% core/ext/menu.lua +% "MacRoman" +MENU_BUF_ENCODING_MACROMAN "MacRoman" + +% core/ext/menu.lua +% "UTF-16" +MENU_BUF_ENCODING_UTF16 "UTF-16" + +% core/ext/menu.lua % "_Refresh Syntax Highlighting" MENU_BUF_REFRESH "_Refresh Syntax Highlighting" % core/ext/menu.lua -% "_Views" -MENU_VIEW_TITLE "_Views" +% "_View" +MENU_VIEW_TITLE "_View" % core/ext/menu.lua % "_Next View" |