/* Mosh: the mobile shell Copyright 2012 Keith Winstein This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include #include #include #include #include "parser.h" static void append_or_delete( Parser::Action *act, std::list&vec ) { assert( act ); if ( typeid( *act ) != typeid( Parser::Ignore ) ) { vec.push_back( act ); } else { delete act; } } std::list Parser::Parser::input( wchar_t ch ) { std::list ret; Transition tx = state->input( ch ); if ( tx.next_state != NULL ) { append_or_delete( state->exit(), ret ); } append_or_delete( tx.action, ret ); if ( tx.next_state != NULL ) { append_or_delete( tx.next_state->enter(), ret ); state = tx.next_state; } return ret; } Parser::UTF8Parser::UTF8Parser() : parser(), buf_len( 0 ) { assert( BUF_SIZE >= (size_t)MB_CUR_MAX ); } std::list Parser::UTF8Parser::input( char c ) { assert( buf_len < BUF_SIZE ); buf[ buf_len++ ] = c; /* This function will only work in a UTF-8 locale. */ wchar_t pwc; mbstate_t ps; memset( &ps, 0, sizeof( ps ) ); size_t total_bytes_parsed = 0; size_t orig_buf_len = buf_len; std::list ret; /* this routine is somewhat complicated in order to comply with Unicode 6.0, section 3.9, "Best Practices for using U+FFFD" */ while ( total_bytes_parsed != orig_buf_len ) { assert( total_bytes_parsed < orig_buf_len ); assert( buf_len > 0 ); size_t bytes_parsed = mbrtowc( &pwc, buf, buf_len, &ps ); /* this returns 0 when n = 0! */ /* This function annoying returns a size_t so we have to check the negative values first before the "> 0" branch */ if ( bytes_parsed == 0 ) { /* character was NUL, accept and clear buffer */ assert( buf_len == 1 ); buf_len = 0; pwc = L'\0'; bytes_parsed = 1; } else if ( bytes_parsed == (size_t) -1 ) { /* invalid sequence, use replacement character and try again with last char */ assert( errno == EILSEQ ); if ( buf_len > 1 ) { buf[ 0 ] = buf[ buf_len - 1 ]; bytes_parsed = buf_len - 1; buf_len = 1; } else { buf_len = 0; bytes_parsed = 1; } pwc = (wchar_t) 0xFFFD; } else if ( bytes_parsed == (size_t) -2 ) { /* can't parse incomplete multibyte character */ total_bytes_parsed += buf_len; continue; } else if ( bytes_parsed > 0 ) { /* parsed into pwc, accept */ assert( bytes_parsed <= buf_len ); memcpy( buf, buf + bytes_parsed, buf_len - bytes_parsed ); buf_len = buf_len - bytes_parsed; } else { throw std::string( "Unknown return value from mbrtowc" ); } if ( (pwc < 0) || (pwc > 0x10FFFF) ) { /* outside Unicode range */ pwc = (wchar_t) 0xFFFD; } if ( (pwc >= 0xD800) && (pwc <= 0xDFFF) ) { /* surrogate code point */ /* OS X unfortunately allows these sequences without EILSEQ, but they are ill-formed UTF-8 and we shouldn't repeat them to the user's terminal. */ pwc = (wchar_t) 0xFFFD; } std::list vec = parser.input( pwc ); ret.insert( ret.end(), vec.begin(), vec.end() ); total_bytes_parsed += bytes_parsed; } return ret; } Parser::Parser::Parser( const Parser &other ) : state( other.state ) {} Parser::Parser & Parser::Parser::operator=( const Parser &other ) { state = other.state; return *this; }