// SPDX-FileCopyrightText: 2021 Serokell
//
// SPDX-License-Identifier: MPL-2.0
#include
#include
#include
#include
#include
#include
/* Read a newline-terminated string into `buf`
* from `fin` respecting the locale encoding.
*
* If the text that the user enters before ending the line does not fit
* into the buffer, the extra characters will be silently discarded
* (similar to what `readpassphrase` does on BSD).
*
* Now, here is a plot twist: this function (hopefully) works correctly
* with any character encoding and treats multi-byte characters correctly.
* It _guarantees_ that the last character will not be broken apart if
* it does not fit completely – instead it will discard the entire codepoint.
*
* Another plot twist is that the text that the user enters
* will be represented in their locale encoding, in other words,
* pressing the same sequence of buttons on the keyboard on different
* systems might result in different byte sequences.
* This is extremely tricky and, no, ignoring the locale when reading
* the user’s input will make things even worse. If it is desired
* that the resulting byte sequences are independent of the system locale,
* then it is a good idea to decode the bytes that this function returns
* using the system locale encoding and then re-encode them as UTF-8.
* Would be cool to do it right here, but this is C :).
* (Note that this means that the resulting byte strings can _still_
* turn out different if the user’s input fits into the buffer on one
* system, but does not fit on another, so allocate generously.)
*
* The resulting string is not null-terminated.
*
* Returns the size (in bytes) of the string read or a negative
* number if an error occurred.
*
* -1 = there was an error when reading (see fgetwc).
* -2 = something is off with the locale (see wctomb). This is actually impossible.
* In either case, `errno` will contain information on the actual error.
*
* A noteworthy case is when this function returns -1 and errno == EILSEQ,
* which means that the user’s terminal is sending bytes that are invalid
* in their configured system locale encoding, so their setup is messed up
* and there is absolutely no way for us to interpret their input. Too bad.
*/
int readline_max(int fd, char *buf, int buf_size) {
// On Windows ignore `fd` and always read from `stdin`.
#if defined(_WIN32) /* windows */
#define READWCHAR() _getwch()
#define CLOSE() {}
#else /* not windows => unix */
FILE* fin = fdopen(fd, "rt");
setvbuf(fin, 0, _IONBF, 0); // disable buffering
#define READWCHAR() fgetwc(fin)
#define CLOSE() fclose(fin)
#endif
// TODO: Ok, we also need to handle SIGTSTP (and other signals) to restore
// terminal echo and, if our process will be resumed, forget all the input
// and start reading it from scratch (since the user, obviously, does not
// remember what they entered before pausing the process) – that is what
// `sudo` does. This means that we need to move the echo control and initial
// prompt from Haskell to C, at which point we are already looking at an
// entire C library :(.
// https://github.com/serokell/haskell-crypto/issues/27
char *p = (char*)buf;
int no_more_space = 0;
errno = 0;
wint_t wc;
char encoded[MB_CUR_MAX];
while ((wc = READWCHAR()) != WEOF) {
// Read a unicode codepoint and see if it is a line terminator
// (note: we only accept these two, not what Unicode defines)
if (wc == L'\n' || wc == L'\r') {
break;
} else if (no_more_space) {
// we have decided that we are discarding the rest
continue;
} else {
// Now we encode the codepoint back into bytes.
// XXX: wchar_t is messed up on Windows, no idea if this
// actually works there. All I know is POSIX says it has to :/.
int size = wctomb(encoded, (wchar_t)wc);
if (size < 0) {
CLOSE();
return -2;
}
if (p + size <= buf + buf_size) {
// it still fits!
memcpy(p, encoded, size);
p += size;
} else {
// discard the remainder
no_more_space = 1;
}
}
}
CLOSE();
if (errno != 0) {
return -1;
} else {
return p - buf;
}
}