fix encoding issues on windows console streams
Always use the byte sized input/output operations ReadConsoleA/WriteConsoleA and do all the conversion to unicode by ourself. Moreover, expand the set of known encodings for Windows codepages and print a warning if we encounter an unsupported codepage. Fixes #582.
This commit is contained in:
parent
0eedaf533d
commit
892a34e66d
5 changed files with 103 additions and 68 deletions
94
src/c/file.d
94
src/c/file.d
|
|
@ -4042,34 +4042,17 @@ wcon_stream_read_byte8(cl_object strm, unsigned char *c, cl_index n)
|
|||
unlikely_if (strm->stream.byte_stack != ECL_NIL) {
|
||||
return consume_byte_stack(strm, c, n);
|
||||
} else {
|
||||
cl_index len = 0;
|
||||
cl_env_ptr the_env = ecl_process_env();
|
||||
HANDLE h = (HANDLE)IO_FILE_DESCRIPTOR(strm);
|
||||
DWORD nchars;
|
||||
unsigned char aux[4];
|
||||
WCHAR waux[1];
|
||||
for (len = 0; len < n; ) {
|
||||
int i, ok;
|
||||
int ok;
|
||||
ecl_disable_interrupts_env(the_env);
|
||||
ok = ReadConsoleW(h, waux, 1, &nchars, NULL);
|
||||
if (ok) {
|
||||
nchars = WideCharToMultiByte(GetConsoleCP(), 0, waux, 1, aux, 4, NULL, NULL);
|
||||
}
|
||||
ok = ReadConsoleA(h, c, n, &nchars, NULL);
|
||||
ecl_enable_interrupts_env(the_env);
|
||||
unlikely_if (!ok) {
|
||||
FEwin32_error("Cannot read from console", 0);
|
||||
}
|
||||
for (i = 0; i < nchars; i++) {
|
||||
if (len < n) {
|
||||
c[len++] = aux[i];
|
||||
} else {
|
||||
strm->stream.byte_stack =
|
||||
ecl_nconc(strm->stream.byte_stack,
|
||||
ecl_list1(ecl_make_fixnum(aux[i])));
|
||||
}
|
||||
}
|
||||
}
|
||||
return (len > 0) ? len : EOF;
|
||||
return (nchars > 0) ? nchars : EOF;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -4078,7 +4061,7 @@ wcon_stream_write_byte8(cl_object strm, unsigned char *c, cl_index n)
|
|||
{
|
||||
HANDLE h = (HANDLE)IO_FILE_DESCRIPTOR(strm);
|
||||
DWORD nchars;
|
||||
unlikely_if(!WriteConsole(h, c, n, &nchars, NULL)) {
|
||||
unlikely_if(!WriteConsoleA(h, c, n, &nchars, NULL)) {
|
||||
FEwin32_error("Cannot write to console.", 0);
|
||||
}
|
||||
return nchars;
|
||||
|
|
@ -4203,6 +4186,67 @@ maybe_make_windows_console_fd(cl_object fname, int desc, enum ecl_smmode smm,
|
|||
}
|
||||
return output;
|
||||
}
|
||||
|
||||
cl_object
|
||||
si_windows_codepage_encoding()
|
||||
{
|
||||
/* Mapping from windows codepages to encoding names used by ECL */
|
||||
DWORD cp = GetConsoleCP();
|
||||
cl_object encoding;
|
||||
switch (cp) {
|
||||
#ifdef ECL_UNICODE
|
||||
case 437: return ecl_make_keyword("DOS-CP437");
|
||||
case 708: return ecl_make_keyword("ISO-8859-6");
|
||||
case 850: return ecl_make_keyword("DOS-CP850");
|
||||
case 852: return ecl_make_keyword("DOS-CP852");
|
||||
case 855: return ecl_make_keyword("DOS-CP855");
|
||||
case 857: return ecl_make_keyword("DOS-CP857");
|
||||
case 858: return ecl_make_keyword("DOS-CP858");
|
||||
case 860: return ecl_make_keyword("DOS-CP860");
|
||||
case 861: return ecl_make_keyword("DOS-CP861");
|
||||
case 862: return ecl_make_keyword("DOS-CP862");
|
||||
case 863: return ecl_make_keyword("DOS-CP863");
|
||||
case 864: return ecl_make_keyword("DOS-CP864");
|
||||
case 865: return ecl_make_keyword("DOS-CP865");
|
||||
case 866: return ecl_make_keyword("DOS-CP866");
|
||||
case 869: return ecl_make_keyword("DOS-CP869");
|
||||
case 932: return ecl_make_keyword("WINDOWS-CP932");
|
||||
case 936: return ecl_make_keyword("WINDOWS-CP936");
|
||||
case 949: return ecl_make_keyword("WINDOWS-CP949");
|
||||
case 950: return ecl_make_keyword("WINDOWS-CP950");
|
||||
case 1200: return ecl_make_keyword("UCS-2LE");
|
||||
case 1201: return ecl_make_keyword("UCS-2BE");
|
||||
case 1250: return ecl_make_keyword("WINDOWS-CP1250");
|
||||
case 1251: return ecl_make_keyword("WINDOWS-CP1251");
|
||||
case 1252: return ecl_make_keyword("WINDOWS-CP1252");
|
||||
case 1253: return ecl_make_keyword("WINDOWS-CP1253");
|
||||
case 1254: return ecl_make_keyword("WINDOWS-CP1254");
|
||||
case 1255: return ecl_make_keyword("WINDOWS-CP1255");
|
||||
case 1256: return ecl_make_keyword("WINDOWS-CP1256");
|
||||
case 1257: return ecl_make_keyword("WINDOWS-CP1257");
|
||||
case 1258: return ecl_make_keyword("WINDOWS-CP1258");
|
||||
case 12000: return ecl_make_keyword("UCS-4LE");
|
||||
case 12001: return ecl_make_keyword("UCS-4BE");
|
||||
case 20932: return ecl_make_keyword("JISX0212");
|
||||
case 21866: return ecl_make_keyword("KOI8-U");
|
||||
case 28591: return ecl_make_keyword("ISO-8859-1");
|
||||
case 28592: return ecl_make_keyword("ISO-8859-2");
|
||||
case 28593: return ecl_make_keyword("ISO-8859-3");
|
||||
case 28594: return ecl_make_keyword("ISO-8859-4");
|
||||
case 28595: return ecl_make_keyword("ISO-8859-5");
|
||||
case 28596: return ecl_make_keyword("ISO-8859-6");
|
||||
case 28597: return ecl_make_keyword("ISO-8859-7");
|
||||
case 28598: return ecl_make_keyword("ISO-8859-8");
|
||||
case 28599: return ecl_make_keyword("ISO-8859-9");
|
||||
case 28603: return ecl_make_keyword("ISO-8859-13");
|
||||
case 28605: return ecl_make_keyword("ISO-8859-15");
|
||||
case 50220: return ecl_make_keyword("ISO-2022-JP");
|
||||
case 65001: return ecl_make_keyword("UTF-8");
|
||||
#endif
|
||||
/* Nothing we can do here, try our best with :pass-through */
|
||||
default: return @':pass-through';
|
||||
}
|
||||
}
|
||||
#else
|
||||
#define maybe_make_windows_console_FILE ecl_make_stream_from_FILE
|
||||
#define maybe_make_windows_console_fd ecl_make_file_stream_from_fd
|
||||
|
|
@ -5788,11 +5832,15 @@ init_file(void)
|
|||
cl_object null_stream;
|
||||
cl_object external_format = ECL_NIL;
|
||||
#if defined(ECL_MS_WINDOWS_HOST)
|
||||
/* We start out with :pass-through external format for standard
|
||||
* input/output for bootstrap reasons (some of the external format
|
||||
* support is implemented in lisp and not available on start of
|
||||
* ECL). The correct format is later on set using the encoding
|
||||
* specified by the current codepage. */
|
||||
external_format = cl_list(2, @':pass-through', @':crlf');
|
||||
# ifdef ECL_UNICODE
|
||||
external_format = cl_list(2, @':latin-1', @':crlf');
|
||||
flags = 0;
|
||||
# else
|
||||
external_format = cl_list(2, @':crlf', @':pass-through');
|
||||
flags = ECL_STREAM_DEFAULT_FORMAT;
|
||||
# endif
|
||||
#else
|
||||
|
|
|
|||
38
src/c/main.d
38
src/c/main.d
|
|
@ -454,40 +454,16 @@ struct cl_core_struct cl_core = {
|
|||
static void
|
||||
maybe_fix_console_stream(cl_object stream)
|
||||
{
|
||||
DWORD cp = GetConsoleCP();
|
||||
const char *encoding;
|
||||
cl_object external_format;
|
||||
int i;
|
||||
static const struct {
|
||||
int code;
|
||||
const char *name;
|
||||
} known_cp[] = {
|
||||
{874, "WINDOWS-CP874"},
|
||||
{932, "WINDOWS-CP932"},
|
||||
{936, "WINDOWS-CP936"},
|
||||
{949, "WINDOWS-CP949"},
|
||||
{950, "WINDOWS-CP950"},
|
||||
{1200, "WINDOWS-CP1200"},
|
||||
{1201, "WINDOWS-CP1201"},
|
||||
{1250, "WINDOWS-CP1250"},
|
||||
{1251, "WINDOWS-CP1251"},
|
||||
{1252, "WINDOWS-CP1252"},
|
||||
{1253, "WINDOWS-CP1253"},
|
||||
{1254, "WINDOWS-CP1254"},
|
||||
{1255, "WINDOWS-CP1255"},
|
||||
{1256, "WINDOWS-CP1256"},
|
||||
{1257, "WINDOWS-CP1257"},
|
||||
{1258, "WINDOWS-CP1258"},
|
||||
{65001, "UTF8"},
|
||||
{0,"LATIN-1"}
|
||||
};
|
||||
if (stream->stream.mode != ecl_smm_io_wcon)
|
||||
return;
|
||||
for (i = 0; known_cp[i].code && known_cp[i].code != cp; i++)
|
||||
{}
|
||||
external_format = cl_list(2, ecl_make_keyword(known_cp[i].name),
|
||||
@':crlf');
|
||||
si_stream_external_format_set(stream, external_format);
|
||||
external_format = si_windows_codepage_encoding();
|
||||
if (external_format == @':pass-through')
|
||||
fprintf(stderr,
|
||||
"Unsupported codepage %d, input/output encoding may be wrong.\n"
|
||||
"Use the chcp command to change codepages, e.g. 'chcp 65001' to change to utf-8.\n",
|
||||
GetConsoleCP());
|
||||
si_stream_external_format_set(stream, cl_list(2, external_format, @':crlf'));
|
||||
stream->stream.eof_char = 26;
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -79,6 +79,11 @@ typedef struct {
|
|||
#else
|
||||
# define IF_COMPLEX_FLOAT(x) NULL
|
||||
#endif
|
||||
#ifdef ECL_MS_WINDOWS_HOST
|
||||
# define IF_WINDOWS(x) x
|
||||
#else
|
||||
# define IF_WINDOWS(x) NULL
|
||||
#endif
|
||||
|
||||
/* XXX When the symbol has the associated function its name must
|
||||
follow the naming convention, otherwise si:mangle-name will
|
||||
|
|
@ -1811,6 +1816,8 @@ cl_symbols[] = {
|
|||
|
||||
{EXT_ "*ACTION-ON-UNDEFINED-VARIABLE*", EXT_SPECIAL, NULL, -1, ECL_NIL},
|
||||
|
||||
{SYS_ "WINDOWS-CODEPAGE-ENCODING", SI_ORDINARY, IF_WINDOWS(si_windows_codepage_encoding), 0, OBJNULL},
|
||||
|
||||
{EXT_ "SET-BUFFERING-MODE", EXT_ORDINARY, si_set_buffering_mode, 2, OBJNULL},
|
||||
{KEY_ "NONE", KEYWORD, NULL, -1, OBJNULL},
|
||||
{KEY_ "LINE-BUFFERED", KEYWORD, NULL, -1, OBJNULL},
|
||||
|
|
@ -1923,6 +1930,7 @@ cl_symbols[] = {
|
|||
{KEY_ "CR", KEYWORD, NULL, -1, OBJNULL},
|
||||
{KEY_ "LF", KEYWORD, NULL, -1, OBJNULL},
|
||||
{KEY_ "CRLF", KEYWORD, NULL, -1, OBJNULL},
|
||||
|
||||
{KEY_ "UCS-2BE", KEYWORD, NULL, -1, OBJNULL},
|
||||
{KEY_ "UCS-4BE", KEYWORD, NULL, -1, OBJNULL},
|
||||
{KEY_ "UCS-2LE", KEYWORD, NULL, -1, OBJNULL},
|
||||
|
|
@ -2049,11 +2057,7 @@ cl_symbols[] = {
|
|||
#endif
|
||||
{SYS_ "RUN-PROGRAM-INNER", SI_ORDINARY, si_run_program_inner, 4, OBJNULL},
|
||||
{SYS_ "SPAWN-SUBPROCESS", SI_ORDINARY, si_spawn_subprocess, 6, OBJNULL},
|
||||
#if defined(ECL_MS_WINDOWS_HOST)
|
||||
{SYS_ "CLOSE-WINDOWS-HANDLE", SI_ORDINARY, si_close_windows_handle, 1, OBJNULL},
|
||||
#else
|
||||
{SYS_ "CLOSE-WINDOWS-HANDLE", SI_ORDINARY, NULL, -1, OBJNULL},
|
||||
#endif
|
||||
{SYS_ "CLOSE-WINDOWS-HANDLE", SI_ORDINARY, IF_WINDOWS(si_close_windows_handle), 1, OBJNULL},
|
||||
/* ~ */
|
||||
|
||||
{EXT_ "*INVOKE-DEBUGGER-HOOK*", EXT_SPECIAL, NULL, -1, ECL_NIL},
|
||||
|
|
|
|||
|
|
@ -79,6 +79,11 @@ typedef struct {
|
|||
#else
|
||||
# define IF_COMPLEX_FLOAT(x) NULL
|
||||
#endif
|
||||
#ifdef ECL_MS_WINDOWS_HOST
|
||||
# define IF_WINDOWS(x) x
|
||||
#else
|
||||
# define IF_WINDOWS(x) NULL
|
||||
#endif
|
||||
|
||||
/* XXX When the symbol has the associated function its name must
|
||||
follow the naming convention, otherwise si:mangle-name will
|
||||
|
|
@ -1811,6 +1816,8 @@ cl_symbols[] = {
|
|||
|
||||
{EXT_ "*ACTION-ON-UNDEFINED-VARIABLE*",NULL,-1},
|
||||
|
||||
{SYS_ "WINDOWS-CODEPAGE-ENCODING",IF_WINDOWS("si_windows_codepage_encoding"),0},
|
||||
|
||||
{EXT_ "SET-BUFFERING-MODE","si_set_buffering_mode",2},
|
||||
{KEY_ "NONE",NULL,-1},
|
||||
{KEY_ "LINE-BUFFERED",NULL,-1},
|
||||
|
|
@ -1923,6 +1930,7 @@ cl_symbols[] = {
|
|||
{KEY_ "CR",NULL,-1},
|
||||
{KEY_ "LF",NULL,-1},
|
||||
{KEY_ "CRLF",NULL,-1},
|
||||
|
||||
{KEY_ "UCS-2BE",NULL,-1},
|
||||
{KEY_ "UCS-4BE",NULL,-1},
|
||||
{KEY_ "UCS-2LE",NULL,-1},
|
||||
|
|
@ -2049,11 +2057,7 @@ cl_symbols[] = {
|
|||
#endif
|
||||
{SYS_ "RUN-PROGRAM-INNER","si_run_program_inner",4},
|
||||
{SYS_ "SPAWN-SUBPROCESS","si_spawn_subprocess",6},
|
||||
#if defined(ECL_MS_WINDOWS_HOST)
|
||||
{SYS_ "CLOSE-WINDOWS-HANDLE","si_close_windows_handle",1},
|
||||
#else
|
||||
{SYS_ "CLOSE-WINDOWS-HANDLE",NULL,-1},
|
||||
#endif
|
||||
{SYS_ "CLOSE-WINDOWS-HANDLE",IF_WINDOWS("si_close_windows_handle"),1},
|
||||
/* ~ */
|
||||
|
||||
{EXT_ "*INVOKE-DEBUGGER-HOOK*",NULL,-1},
|
||||
|
|
|
|||
|
|
@ -718,6 +718,9 @@ extern ECL_API cl_object si_do_write_sequence(cl_object string, cl_object stream
|
|||
extern ECL_API cl_object si_do_read_sequence(cl_object string, cl_object stream, cl_object start, cl_object end);
|
||||
extern ECL_API cl_object si_file_column(cl_object strm);
|
||||
extern ECL_API cl_object cl_interactive_stream_p(cl_object strm);
|
||||
#if defined(ECL_MS_WINDOWS_HOST)
|
||||
extern ECL_API cl_object si_windows_codepage_encoding();
|
||||
#endif
|
||||
extern ECL_API cl_object si_set_buffering_mode(cl_object strm, cl_object mode);
|
||||
extern ECL_API cl_object si_stream_external_format_set(cl_object strm, cl_object format);
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue