diff options
| author | Armin Wolf <W_Armin@gmx.de> | 2025-11-11 14:11:22 +0100 |
|---|---|---|
| committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2025-12-18 14:03:33 +0100 |
| commit | 4836f912acc702d6e70eab3f1d796895906a540d (patch) | |
| tree | d6413db141531c563b6e730065c02c7db0d1b5b3 | |
| parent | f0f456c327f5728ca632746cdfae0bf4f6bc5188 (diff) | |
| download | linux-4836f912acc702d6e70eab3f1d796895906a540d.tar.gz linux-4836f912acc702d6e70eab3f1d796895906a540d.tar.bz2 linux-4836f912acc702d6e70eab3f1d796895906a540d.zip | |
fs/nls: Fix utf16 to utf8 conversion
[ Upstream commit 25524b6190295577e4918c689644451365e6466d ]
Currently the function responsible for converting between utf16 and
utf8 strings will ignore any characters that cannot be converted. This
however also includes multi-byte characters that do not fit into the
provided string buffer.
This can cause problems if such a multi-byte character is followed by
a single-byte character. In such a case the multi-byte character might
be ignored when the provided string buffer is too small, but the
single-byte character might fit and is thus still copied into the
resulting string.
Fix this by stop filling the provided string buffer once a character
does not fit. In order to be able to do this extend utf32_to_utf8()
to return useful errno codes instead of -1.
Fixes: 74675a58507e ("NLS: update handling of Unicode")
Signed-off-by: Armin Wolf <W_Armin@gmx.de>
Link: https://patch.msgid.link/20251111131125.3379-2-W_Armin@gmx.de
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
| -rw-r--r-- | fs/nls/nls_base.c | 16 |
1 files changed, 12 insertions, 4 deletions
diff --git a/fs/nls/nls_base.c b/fs/nls/nls_base.c index 18d597e49a19..d434c4463a8f 100644 --- a/fs/nls/nls_base.c +++ b/fs/nls/nls_base.c @@ -94,7 +94,7 @@ int utf32_to_utf8(unicode_t u, u8 *s, int maxout) l = u; if (l > UNICODE_MAX || (l & SURROGATE_MASK) == SURROGATE_PAIR) - return -1; + return -EILSEQ; nc = 0; for (t = utf8_table; t->cmask && maxout; t++, maxout--) { @@ -110,7 +110,7 @@ int utf32_to_utf8(unicode_t u, u8 *s, int maxout) return nc; } } - return -1; + return -EOVERFLOW; } EXPORT_SYMBOL(utf32_to_utf8); @@ -217,8 +217,16 @@ int utf16s_to_utf8s(const wchar_t *pwcs, int inlen, enum utf16_endian endian, inlen--; } size = utf32_to_utf8(u, op, maxout); - if (size == -1) { - /* Ignore character and move on */ + if (size < 0) { + if (size == -EILSEQ) { + /* Ignore character and move on */ + continue; + } + /* + * Stop filling the buffer with data once a character + * does not fit anymore. + */ + break; } else { op += size; maxout -= size; |
