mirror of
https://github.com/antirez/linenoise.git
synced 2026-03-27 05:29:55 +00:00
Add UTF-8 and grapheme cluster support
Implement comprehensive UTF-8 handling for linenoise: Core UTF-8 support: - Proper multi-byte character navigation (left/right arrows) - Correct backspace deletion for multi-byte characters - Display width calculation for cursor positioning - Wide character support (CJK, emoji) as 2-column display Grapheme cluster support for complex emoji: - Variation selectors (U+FE0E, U+FE0F) for emoji style - Skin tone modifiers (U+1F3FB-U+1F3FF) - Zero Width Joiner (U+200D) sequences like rainbow flag - Regional indicators for flag emoji - Combining diacritical marks Navigation and deletion now treat entire grapheme clusters as single units. For example, 🏳️🌈 (14 bytes, 4 codepoints) is handled as one character for cursor movement and backspace. Multiline mode fixes: - Fix history navigation regression where going from multi-row to single-row entries left dirty rows on screen - Save actual cursor row position (oldrpos) instead of recalculating Updates to linenoise.c: - Add helper functions for UTF-8 decoding and grapheme detection - Rewrite utf8PrevCharLen/utf8NextCharLen for grapheme clusters - Add utf8CharWidth with proper zero-width character handling - Add utf8StrWidth with ZWJ sequence support - Fix refreshMultiLine cursor row tracking Updates to linenoise.h: - Add oldrpos field to linenoiseState for multiline cursor tracking Updates to README: - Document UTF-8 support for multi-byte characters and emoji - Update line count from ~850 to ~1100 - Add "Running the tests" section 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -8,7 +8,8 @@ MongoDB, Android and many other projects.
|
||||
* Completion.
|
||||
* Hints (suggestions at the right of the prompt as you type).
|
||||
* Multiplexing mode, with prompt hiding/restoring for asynchronous output.
|
||||
* About ~850 lines (comments and spaces excluded) of BSD license source code.
|
||||
* UTF-8 support for multi-byte characters and emoji.
|
||||
* About ~1100 lines (comments and spaces excluded) of BSD license source code.
|
||||
* Only uses a subset of VT100 escapes (ANSI.SYS compatible).
|
||||
|
||||
## Can a line editing library be 20k lines of code?
|
||||
@@ -341,7 +342,22 @@ example using select(2) and the asynchronous API:
|
||||
|
||||
You can test the example by running the example program with the `--async` option.
|
||||
|
||||
## Running the tests
|
||||
|
||||
Linenoise has a test suite that uses a VT100 terminal emulator to verify correct behavior. The tests cover basic editing, cursor movement, UTF-8 handling, horizontal scrolling, and multiline mode.
|
||||
|
||||
To run the tests:
|
||||
|
||||
make test
|
||||
|
||||
Or build and run separately:
|
||||
|
||||
make linenoise-test
|
||||
./linenoise-test
|
||||
|
||||
The test harness forks linenoise_example, communicates via pipes, and uses a VT100 emulator to verify screen output and cursor positioning.
|
||||
|
||||
## Related projects
|
||||
|
||||
* [Linenoise NG](https://github.com/arangodb/linenoise-ng) is a fork of Linenoise that aims to add more advanced features like UTF-8 support, Windows support and other features. Uses C++ instead of C as development language.
|
||||
* [Linenoise NG](https://github.com/arangodb/linenoise-ng) is a fork of Linenoise that aims to add more advanced features like Windows support and other features. Uses C++ instead of C as development language.
|
||||
* [Linenoise-swift](https://github.com/andybest/linenoise-swift) is a reimplementation of Linenoise written in Swift.
|
||||
|
||||
561
linenoise.c
561
linenoise.c
@@ -115,6 +115,7 @@
|
||||
#include <sys/types.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <unistd.h>
|
||||
#include <stdint.h>
|
||||
#include "linenoise.h"
|
||||
|
||||
#define LINENOISE_DEFAULT_HISTORY_MAX_LEN 100
|
||||
@@ -136,6 +137,315 @@ static int history_max_len = LINENOISE_DEFAULT_HISTORY_MAX_LEN;
|
||||
static int history_len = 0;
|
||||
static char **history = NULL;
|
||||
|
||||
/* =========================== UTF-8 support ================================ */
|
||||
|
||||
/* Return the number of bytes that compose the UTF-8 character starting at
|
||||
* 'c'. This function assumes a valid UTF-8 encoding and handles the four
|
||||
* standard byte patterns:
|
||||
* 0xxxxxxx -> 1 byte (ASCII)
|
||||
* 110xxxxx -> 2 bytes
|
||||
* 1110xxxx -> 3 bytes
|
||||
* 11110xxx -> 4 bytes */
|
||||
static int utf8ByteLen(char c) {
|
||||
unsigned char uc = (unsigned char)c;
|
||||
if ((uc & 0x80) == 0) return 1; /* 0xxxxxxx: ASCII */
|
||||
if ((uc & 0xE0) == 0xC0) return 2; /* 110xxxxx: 2-byte seq */
|
||||
if ((uc & 0xF0) == 0xE0) return 3; /* 1110xxxx: 3-byte seq */
|
||||
if ((uc & 0xF8) == 0xF0) return 4; /* 11110xxx: 4-byte seq */
|
||||
return 1; /* Fallback for invalid encoding, treat as single byte. */
|
||||
}
|
||||
|
||||
/* Decode a UTF-8 sequence starting at 's' into a Unicode codepoint.
|
||||
* Returns the codepoint value. Assumes valid UTF-8 encoding. */
|
||||
static uint32_t utf8DecodeChar(const char *s, size_t *len) {
|
||||
unsigned char *p = (unsigned char *)s;
|
||||
uint32_t cp;
|
||||
|
||||
if ((*p & 0x80) == 0) {
|
||||
*len = 1;
|
||||
return *p;
|
||||
} else if ((*p & 0xE0) == 0xC0) {
|
||||
*len = 2;
|
||||
cp = (*p & 0x1F) << 6;
|
||||
cp |= (p[1] & 0x3F);
|
||||
return cp;
|
||||
} else if ((*p & 0xF0) == 0xE0) {
|
||||
*len = 3;
|
||||
cp = (*p & 0x0F) << 12;
|
||||
cp |= (p[1] & 0x3F) << 6;
|
||||
cp |= (p[2] & 0x3F);
|
||||
return cp;
|
||||
} else if ((*p & 0xF8) == 0xF0) {
|
||||
*len = 4;
|
||||
cp = (*p & 0x07) << 18;
|
||||
cp |= (p[1] & 0x3F) << 12;
|
||||
cp |= (p[2] & 0x3F) << 6;
|
||||
cp |= (p[3] & 0x3F);
|
||||
return cp;
|
||||
}
|
||||
*len = 1;
|
||||
return *p; /* Fallback for invalid sequences. */
|
||||
}
|
||||
|
||||
/* Check if codepoint is a variation selector (emoji style modifiers). */
|
||||
static int isVariationSelector(uint32_t cp) {
|
||||
return cp == 0xFE0E || cp == 0xFE0F; /* Text/emoji style */
|
||||
}
|
||||
|
||||
/* Check if codepoint is a skin tone modifier. */
|
||||
static int isSkinToneModifier(uint32_t cp) {
|
||||
return cp >= 0x1F3FB && cp <= 0x1F3FF;
|
||||
}
|
||||
|
||||
/* Check if codepoint is Zero Width Joiner. */
|
||||
static int isZWJ(uint32_t cp) {
|
||||
return cp == 0x200D;
|
||||
}
|
||||
|
||||
/* Check if codepoint is a Regional Indicator (for flag emoji). */
|
||||
static int isRegionalIndicator(uint32_t cp) {
|
||||
return cp >= 0x1F1E6 && cp <= 0x1F1FF;
|
||||
}
|
||||
|
||||
/* Check if codepoint is a combining mark or other zero-width character. */
|
||||
static int isCombiningMark(uint32_t cp) {
|
||||
return (cp >= 0x0300 && cp <= 0x036F) || /* Combining Diacriticals */
|
||||
(cp >= 0x1AB0 && cp <= 0x1AFF) || /* Combining Diacriticals Extended */
|
||||
(cp >= 0x1DC0 && cp <= 0x1DFF) || /* Combining Diacriticals Supplement */
|
||||
(cp >= 0x20D0 && cp <= 0x20FF) || /* Combining Diacriticals for Symbols */
|
||||
(cp >= 0xFE20 && cp <= 0xFE2F); /* Combining Half Marks */
|
||||
}
|
||||
|
||||
/* Check if codepoint extends the previous character (doesn't start a new grapheme). */
|
||||
static int isGraphemeExtend(uint32_t cp) {
|
||||
return isVariationSelector(cp) || isSkinToneModifier(cp) ||
|
||||
isZWJ(cp) || isCombiningMark(cp);
|
||||
}
|
||||
|
||||
/* Decode the UTF-8 codepoint ending at position 'pos' (exclusive) and
|
||||
* return its value. Also sets *cplen to the byte length of the codepoint. */
|
||||
static uint32_t utf8DecodePrev(const char *buf, size_t pos, size_t *cplen) {
|
||||
if (pos == 0) {
|
||||
*cplen = 0;
|
||||
return 0;
|
||||
}
|
||||
/* Scan backwards to find the start byte. */
|
||||
size_t i = pos;
|
||||
do {
|
||||
i--;
|
||||
} while (i > 0 && (pos - i) < 4 && ((unsigned char)buf[i] & 0xC0) == 0x80);
|
||||
*cplen = pos - i;
|
||||
size_t dummy;
|
||||
return utf8DecodeChar(buf + i, &dummy);
|
||||
}
|
||||
|
||||
/* Given a buffer and a position, return the byte length of the grapheme
|
||||
* cluster before that position. A grapheme cluster includes:
|
||||
* - The base character
|
||||
* - Any following variation selectors, skin tone modifiers
|
||||
* - ZWJ sequences (emoji joined by Zero Width Joiner)
|
||||
* - Regional indicator pairs (flag emoji) */
|
||||
static size_t utf8PrevCharLen(const char *buf, size_t pos) {
|
||||
if (pos == 0) return 0;
|
||||
|
||||
size_t total = 0;
|
||||
size_t curpos = pos;
|
||||
|
||||
/* First, get the last codepoint. */
|
||||
size_t cplen;
|
||||
uint32_t cp = utf8DecodePrev(buf, curpos, &cplen);
|
||||
if (cplen == 0) return 0;
|
||||
total += cplen;
|
||||
curpos -= cplen;
|
||||
|
||||
/* If we're at an extending character, we need to find what it extends.
|
||||
* Keep going back through the grapheme cluster. */
|
||||
while (curpos > 0) {
|
||||
size_t prevlen;
|
||||
uint32_t prevcp = utf8DecodePrev(buf, curpos, &prevlen);
|
||||
if (prevlen == 0) break;
|
||||
|
||||
if (isZWJ(prevcp)) {
|
||||
/* ZWJ joins two emoji. Include the ZWJ and continue to get
|
||||
* the preceding character. */
|
||||
total += prevlen;
|
||||
curpos -= prevlen;
|
||||
/* Now get the character before ZWJ. */
|
||||
prevcp = utf8DecodePrev(buf, curpos, &prevlen);
|
||||
if (prevlen == 0) break;
|
||||
total += prevlen;
|
||||
curpos -= prevlen;
|
||||
cp = prevcp;
|
||||
continue; /* Check if there's more extending before this. */
|
||||
} else if (isGraphemeExtend(cp)) {
|
||||
/* Current cp is an extending character; include previous. */
|
||||
total += prevlen;
|
||||
curpos -= prevlen;
|
||||
cp = prevcp;
|
||||
continue;
|
||||
} else if (isRegionalIndicator(cp) && isRegionalIndicator(prevcp)) {
|
||||
/* Two regional indicators form a flag. But we need to be careful:
|
||||
* flags are always pairs, so only join if we're at an even boundary.
|
||||
* For simplicity, just join one pair. */
|
||||
total += prevlen;
|
||||
curpos -= prevlen;
|
||||
break;
|
||||
} else {
|
||||
/* No more extending; we've found the start of the cluster. */
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return total;
|
||||
}
|
||||
|
||||
/* Given a buffer, position and total length, return the byte length of the
|
||||
* grapheme cluster at the current position. */
|
||||
static size_t utf8NextCharLen(const char *buf, size_t pos, size_t len) {
|
||||
if (pos >= len) return 0;
|
||||
|
||||
size_t total = 0;
|
||||
size_t curpos = pos;
|
||||
|
||||
/* Get the first codepoint. */
|
||||
size_t cplen;
|
||||
uint32_t cp = utf8DecodeChar(buf + curpos, &cplen);
|
||||
total += cplen;
|
||||
curpos += cplen;
|
||||
|
||||
int isRI = isRegionalIndicator(cp);
|
||||
|
||||
/* Consume any extending characters that follow. */
|
||||
while (curpos < len) {
|
||||
size_t nextlen;
|
||||
uint32_t nextcp = utf8DecodeChar(buf + curpos, &nextlen);
|
||||
|
||||
if (isZWJ(nextcp) && curpos + nextlen < len) {
|
||||
/* ZWJ: include it and the following character. */
|
||||
total += nextlen;
|
||||
curpos += nextlen;
|
||||
/* Get the character after ZWJ. */
|
||||
nextcp = utf8DecodeChar(buf + curpos, &nextlen);
|
||||
total += nextlen;
|
||||
curpos += nextlen;
|
||||
continue; /* Check for more extending after the joined char. */
|
||||
} else if (isGraphemeExtend(nextcp)) {
|
||||
/* Variation selector, skin tone, combining mark, etc. */
|
||||
total += nextlen;
|
||||
curpos += nextlen;
|
||||
continue;
|
||||
} else if (isRI && isRegionalIndicator(nextcp)) {
|
||||
/* Second regional indicator for a flag pair. */
|
||||
total += nextlen;
|
||||
curpos += nextlen;
|
||||
isRI = 0; /* Only pair once. */
|
||||
continue;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return total;
|
||||
}
|
||||
|
||||
/* Return the display width of a Unicode codepoint. This is a heuristic
|
||||
* that works for most common cases:
|
||||
* - Control chars and zero-width: 0 columns
|
||||
* - Grapheme-extending chars (VS, skin tone, ZWJ): 0 columns
|
||||
* - ASCII printable: 1 column
|
||||
* - Wide chars (CJK, emoji, fullwidth): 2 columns
|
||||
* - Everything else: 1 column
|
||||
*
|
||||
* This is not a full wcwidth() implementation, but a minimal heuristic
|
||||
* that handles emoji and CJK characters reasonably well. */
|
||||
static int utf8CharWidth(uint32_t cp) {
|
||||
/* Control characters and combining marks: zero width. */
|
||||
if (cp < 32 || (cp >= 0x7F && cp < 0xA0)) return 0;
|
||||
if (isCombiningMark(cp)) return 0;
|
||||
|
||||
/* Grapheme-extending characters: zero width.
|
||||
* These modify the preceding character rather than taking space. */
|
||||
if (isVariationSelector(cp)) return 0;
|
||||
if (isSkinToneModifier(cp)) return 0;
|
||||
if (isZWJ(cp)) return 0;
|
||||
|
||||
/* Wide character ranges - these display as 2 columns:
|
||||
* - CJK Unified Ideographs and Extensions
|
||||
* - Fullwidth forms
|
||||
* - Various emoji ranges */
|
||||
if (cp >= 0x1100 &&
|
||||
(cp <= 0x115F || /* Hangul Jamo */
|
||||
cp == 0x2329 || cp == 0x232A || /* Angle brackets */
|
||||
(cp >= 0x231A && cp <= 0x231B) || /* Watch, Hourglass */
|
||||
(cp >= 0x23E9 && cp <= 0x23F3) || /* Various symbols */
|
||||
(cp >= 0x23F8 && cp <= 0x23FA) || /* Various symbols */
|
||||
(cp >= 0x25AA && cp <= 0x25AB) || /* Small squares */
|
||||
(cp >= 0x25B6 && cp <= 0x25C0) || /* Play/reverse buttons */
|
||||
(cp >= 0x25FB && cp <= 0x25FE) || /* Squares */
|
||||
(cp >= 0x2600 && cp <= 0x26FF) || /* Misc Symbols (sun, cloud, etc) */
|
||||
(cp >= 0x2700 && cp <= 0x27BF) || /* Dingbats (❤, ✂, etc) */
|
||||
(cp >= 0x2934 && cp <= 0x2935) || /* Arrows */
|
||||
(cp >= 0x2B05 && cp <= 0x2B07) || /* Arrows */
|
||||
(cp >= 0x2B1B && cp <= 0x2B1C) || /* Squares */
|
||||
cp == 0x2B50 || cp == 0x2B55 || /* Star, circle */
|
||||
(cp >= 0x2E80 && cp <= 0xA4CF &&
|
||||
cp != 0x303F) || /* CJK ... Yi */
|
||||
(cp >= 0xAC00 && cp <= 0xD7A3) || /* Hangul Syllables */
|
||||
(cp >= 0xF900 && cp <= 0xFAFF) || /* CJK Compatibility Ideographs */
|
||||
(cp >= 0xFE10 && cp <= 0xFE1F) || /* Vertical forms */
|
||||
(cp >= 0xFE30 && cp <= 0xFE6F) || /* CJK Compatibility Forms */
|
||||
(cp >= 0xFF00 && cp <= 0xFF60) || /* Fullwidth Forms */
|
||||
(cp >= 0xFFE0 && cp <= 0xFFE6) || /* Fullwidth Signs */
|
||||
(cp >= 0x1F1E6 && cp <= 0x1F1FF) || /* Regional Indicators (flags) */
|
||||
(cp >= 0x1F300 && cp <= 0x1F64F) || /* Misc Symbols and Emoticons */
|
||||
(cp >= 0x1F680 && cp <= 0x1F6FF) || /* Transport and Map Symbols */
|
||||
(cp >= 0x1F900 && cp <= 0x1F9FF) || /* Supplemental Symbols */
|
||||
(cp >= 0x1FA00 && cp <= 0x1FAFF) || /* Chess, Extended-A */
|
||||
(cp >= 0x20000 && cp <= 0x2FFFF))) /* CJK Extension B and beyond */
|
||||
return 2;
|
||||
|
||||
return 1; /* Default: single width */
|
||||
}
|
||||
|
||||
/* Calculate the display width of a UTF-8 string of 'len' bytes.
|
||||
* This is used for cursor positioning in the terminal.
|
||||
* Handles grapheme clusters: characters joined by ZWJ contribute 0 width
|
||||
* after the first character in the sequence. */
|
||||
static size_t utf8StrWidth(const char *s, size_t len) {
|
||||
size_t width = 0;
|
||||
size_t i = 0;
|
||||
int after_zwj = 0; /* Track if previous char was ZWJ */
|
||||
|
||||
while (i < len) {
|
||||
size_t clen;
|
||||
uint32_t cp = utf8DecodeChar(s + i, &clen);
|
||||
|
||||
if (after_zwj) {
|
||||
/* Character after ZWJ: don't add width, it's joined.
|
||||
* But do check for extending chars after it. */
|
||||
after_zwj = 0;
|
||||
} else {
|
||||
width += utf8CharWidth(cp);
|
||||
}
|
||||
|
||||
/* Check if this is a ZWJ - next char will be joined. */
|
||||
if (isZWJ(cp)) {
|
||||
after_zwj = 1;
|
||||
}
|
||||
|
||||
i += clen;
|
||||
}
|
||||
return width;
|
||||
}
|
||||
|
||||
/* Return the display width of a single UTF-8 character at position 's'. */
|
||||
static int utf8SingleCharWidth(const char *s, size_t len) {
|
||||
if (len == 0) return 0;
|
||||
size_t clen;
|
||||
uint32_t cp = utf8DecodeChar(s, &clen);
|
||||
return utf8CharWidth(cp);
|
||||
}
|
||||
|
||||
enum KEY_ACTION{
|
||||
KEY_NULL = 0, /* NULL */
|
||||
CTRL_A = 1, /* Ctrl+a */
|
||||
@@ -220,6 +530,13 @@ static int isUnsupportedTerm(void) {
|
||||
static int enableRawMode(int fd) {
|
||||
struct termios raw;
|
||||
|
||||
/* Test mode: when LINENOISE_ASSUME_TTY is set, skip terminal setup.
|
||||
* This allows testing via pipes without a real terminal. */
|
||||
if (getenv("LINENOISE_ASSUME_TTY")) {
|
||||
rawmode = 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!isatty(STDIN_FILENO)) goto fatal;
|
||||
if (!atexit_registered) {
|
||||
atexit(linenoiseAtExit);
|
||||
@@ -253,6 +570,11 @@ fatal:
|
||||
}
|
||||
|
||||
static void disableRawMode(int fd) {
|
||||
/* Test mode: nothing to restore. */
|
||||
if (getenv("LINENOISE_ASSUME_TTY")) {
|
||||
rawmode = 0;
|
||||
return;
|
||||
}
|
||||
/* Don't even check the return value as it's too late. */
|
||||
if (rawmode && tcsetattr(fd,TCSAFLUSH,&orig_termios) != -1)
|
||||
rawmode = 0;
|
||||
@@ -288,6 +610,10 @@ static int getCursorPosition(int ifd, int ofd) {
|
||||
static int getColumns(int ifd, int ofd) {
|
||||
struct winsize ws;
|
||||
|
||||
/* Test mode: use LINENOISE_COLS env var for fixed width. */
|
||||
char *cols_env = getenv("LINENOISE_COLS");
|
||||
if (cols_env) return atoi(cols_env);
|
||||
|
||||
if (ioctl(1, TIOCGWINSZ, &ws) == -1 || ws.ws_col == 0) {
|
||||
/* ioctl() failed. Try to query the terminal itself. */
|
||||
int start, cols;
|
||||
@@ -505,16 +831,29 @@ static void abFree(struct abuf *ab) {
|
||||
}
|
||||
|
||||
/* Helper of refreshSingleLine() and refreshMultiLine() to show hints
|
||||
* to the right of the prompt. */
|
||||
void refreshShowHints(struct abuf *ab, struct linenoiseState *l, int plen) {
|
||||
* to the right of the prompt. Now uses display widths for proper UTF-8. */
|
||||
void refreshShowHints(struct abuf *ab, struct linenoiseState *l, int pwidth) {
|
||||
char seq[64];
|
||||
if (hintsCallback && plen+l->len < l->cols) {
|
||||
size_t bufwidth = utf8StrWidth(l->buf, l->len);
|
||||
if (hintsCallback && pwidth + bufwidth < l->cols) {
|
||||
int color = -1, bold = 0;
|
||||
char *hint = hintsCallback(l->buf,&color,&bold);
|
||||
if (hint) {
|
||||
int hintlen = strlen(hint);
|
||||
int hintmaxlen = l->cols-(plen+l->len);
|
||||
if (hintlen > hintmaxlen) hintlen = hintmaxlen;
|
||||
size_t hintlen = strlen(hint);
|
||||
size_t hintwidth = utf8StrWidth(hint, hintlen);
|
||||
size_t hintmaxwidth = l->cols - (pwidth + bufwidth);
|
||||
/* Truncate hint to fit, respecting UTF-8 boundaries. */
|
||||
if (hintwidth > hintmaxwidth) {
|
||||
size_t i = 0, w = 0;
|
||||
while (i < hintlen) {
|
||||
size_t clen = utf8NextCharLen(hint, i, hintlen);
|
||||
int cwidth = utf8SingleCharWidth(hint + i, clen);
|
||||
if (w + cwidth > hintmaxwidth) break;
|
||||
w += cwidth;
|
||||
i += clen;
|
||||
}
|
||||
hintlen = i;
|
||||
}
|
||||
if (bold == 1 && color == -1) color = 37;
|
||||
if (color != -1 || bold != 0)
|
||||
snprintf(seq,64,"\033[%d;%d;49m",bold,color);
|
||||
@@ -536,23 +875,44 @@ void refreshShowHints(struct abuf *ab, struct linenoiseState *l, int plen) {
|
||||
* cursor position, and number of columns of the terminal.
|
||||
*
|
||||
* Flags is REFRESH_* macros. The function can just remove the old
|
||||
* prompt, just write it, or both. */
|
||||
* prompt, just write it, or both.
|
||||
*
|
||||
* This function is UTF-8 aware and uses display widths (not byte counts)
|
||||
* for cursor positioning and horizontal scrolling. */
|
||||
static void refreshSingleLine(struct linenoiseState *l, int flags) {
|
||||
char seq[64];
|
||||
size_t plen = strlen(l->prompt);
|
||||
size_t pwidth = utf8StrWidth(l->prompt, l->plen); /* Prompt display width */
|
||||
int fd = l->ofd;
|
||||
char *buf = l->buf;
|
||||
size_t len = l->len;
|
||||
size_t pos = l->pos;
|
||||
size_t len = l->len; /* Byte length of buffer to display */
|
||||
size_t pos = l->pos; /* Byte position of cursor */
|
||||
size_t poscol; /* Display column of cursor */
|
||||
size_t lencol; /* Display width of buffer */
|
||||
struct abuf ab;
|
||||
|
||||
while((plen+pos) >= l->cols) {
|
||||
buf++;
|
||||
len--;
|
||||
pos--;
|
||||
/* Calculate the display width up to cursor and total display width. */
|
||||
poscol = utf8StrWidth(buf, pos);
|
||||
lencol = utf8StrWidth(buf, len);
|
||||
|
||||
/* Scroll the buffer horizontally if cursor is past the right edge.
|
||||
* We need to trim full UTF-8 characters from the left until the
|
||||
* cursor position fits within the terminal width. */
|
||||
while (pwidth + poscol >= l->cols) {
|
||||
size_t clen = utf8NextCharLen(buf, 0, len);
|
||||
int cwidth = utf8SingleCharWidth(buf, clen);
|
||||
buf += clen;
|
||||
len -= clen;
|
||||
pos -= clen;
|
||||
poscol -= cwidth;
|
||||
lencol -= cwidth;
|
||||
}
|
||||
while (plen+len > l->cols) {
|
||||
len--;
|
||||
|
||||
/* Trim from the right if the line still doesn't fit. */
|
||||
while (pwidth + lencol > l->cols) {
|
||||
size_t clen = utf8PrevCharLen(buf, len);
|
||||
int cwidth = utf8SingleCharWidth(buf + len - clen, clen);
|
||||
len -= clen;
|
||||
lencol -= cwidth;
|
||||
}
|
||||
|
||||
abInit(&ab);
|
||||
@@ -562,14 +922,19 @@ static void refreshSingleLine(struct linenoiseState *l, int flags) {
|
||||
|
||||
if (flags & REFRESH_WRITE) {
|
||||
/* Write the prompt and the current buffer content */
|
||||
abAppend(&ab,l->prompt,strlen(l->prompt));
|
||||
abAppend(&ab,l->prompt,l->plen);
|
||||
if (maskmode == 1) {
|
||||
while (len--) abAppend(&ab,"*",1);
|
||||
/* In mask mode, we output one '*' per UTF-8 character, not byte */
|
||||
size_t i = 0;
|
||||
while (i < len) {
|
||||
abAppend(&ab,"*",1);
|
||||
i += utf8NextCharLen(buf, i, len);
|
||||
}
|
||||
} else {
|
||||
abAppend(&ab,buf,len);
|
||||
}
|
||||
/* Show hits if any. */
|
||||
refreshShowHints(&ab,l,plen);
|
||||
/* Show hints if any. */
|
||||
refreshShowHints(&ab,l,pwidth);
|
||||
}
|
||||
|
||||
/* Erase to right */
|
||||
@@ -577,8 +942,8 @@ static void refreshSingleLine(struct linenoiseState *l, int flags) {
|
||||
abAppend(&ab,seq,strlen(seq));
|
||||
|
||||
if (flags & REFRESH_WRITE) {
|
||||
/* Move cursor to original position. */
|
||||
snprintf(seq,sizeof(seq),"\r\x1b[%dC", (int)(pos+plen));
|
||||
/* Move cursor to original position (using display column, not byte). */
|
||||
snprintf(seq,sizeof(seq),"\r\x1b[%dC", (int)(poscol+pwidth));
|
||||
abAppend(&ab,seq,strlen(seq));
|
||||
}
|
||||
|
||||
@@ -592,14 +957,18 @@ static void refreshSingleLine(struct linenoiseState *l, int flags) {
|
||||
* cursor position, and number of columns of the terminal.
|
||||
*
|
||||
* Flags is REFRESH_* macros. The function can just remove the old
|
||||
* prompt, just write it, or both. */
|
||||
* prompt, just write it, or both.
|
||||
*
|
||||
* This function is UTF-8 aware and uses display widths for positioning. */
|
||||
static void refreshMultiLine(struct linenoiseState *l, int flags) {
|
||||
char seq[64];
|
||||
int plen = strlen(l->prompt);
|
||||
int rows = (plen+l->len+l->cols-1)/l->cols; /* rows used by current buf. */
|
||||
int rpos = (plen+l->oldpos+l->cols)/l->cols; /* cursor relative row. */
|
||||
size_t pwidth = utf8StrWidth(l->prompt, l->plen); /* Prompt display width */
|
||||
size_t bufwidth = utf8StrWidth(l->buf, l->len); /* Buffer display width */
|
||||
size_t poswidth = utf8StrWidth(l->buf, l->pos); /* Cursor display width */
|
||||
int rows = (pwidth+bufwidth+l->cols-1)/l->cols; /* rows used by current buf. */
|
||||
int rpos = l->oldrpos; /* cursor relative row from previous refresh. */
|
||||
int rpos2; /* rpos after refresh. */
|
||||
int col; /* colum position, zero-based. */
|
||||
int col; /* column position, zero-based. */
|
||||
int old_rows = l->oldrows;
|
||||
int fd = l->ofd, j;
|
||||
struct abuf ab;
|
||||
@@ -634,22 +1003,26 @@ static void refreshMultiLine(struct linenoiseState *l, int flags) {
|
||||
|
||||
if (flags & REFRESH_WRITE) {
|
||||
/* Write the prompt and the current buffer content */
|
||||
abAppend(&ab,l->prompt,strlen(l->prompt));
|
||||
abAppend(&ab,l->prompt,l->plen);
|
||||
if (maskmode == 1) {
|
||||
unsigned int i;
|
||||
for (i = 0; i < l->len; i++) abAppend(&ab,"*",1);
|
||||
/* In mask mode, output one '*' per UTF-8 character, not byte */
|
||||
size_t i = 0;
|
||||
while (i < l->len) {
|
||||
abAppend(&ab,"*",1);
|
||||
i += utf8NextCharLen(l->buf, i, l->len);
|
||||
}
|
||||
} else {
|
||||
abAppend(&ab,l->buf,l->len);
|
||||
}
|
||||
|
||||
/* Show hits if any. */
|
||||
refreshShowHints(&ab,l,plen);
|
||||
/* Show hints if any. */
|
||||
refreshShowHints(&ab,l,pwidth);
|
||||
|
||||
/* If we are at the very end of the screen with our prompt, we need to
|
||||
* emit a newline and move the prompt to the first column. */
|
||||
if (l->pos &&
|
||||
l->pos == l->len &&
|
||||
(l->pos+plen) % l->cols == 0)
|
||||
(poswidth+pwidth) % l->cols == 0)
|
||||
{
|
||||
lndebug("<newline>");
|
||||
abAppend(&ab,"\n",1);
|
||||
@@ -660,10 +1033,10 @@ static void refreshMultiLine(struct linenoiseState *l, int flags) {
|
||||
}
|
||||
|
||||
/* Move cursor to right position. */
|
||||
rpos2 = (plen+l->pos+l->cols)/l->cols; /* Current cursor relative row */
|
||||
rpos2 = (pwidth+poswidth+l->cols)/l->cols; /* Current cursor relative row */
|
||||
lndebug("rpos2 %d", rpos2);
|
||||
|
||||
/* Go up till we reach the expected positon. */
|
||||
/* Go up till we reach the expected position. */
|
||||
if (rows-rpos2 > 0) {
|
||||
lndebug("go-up %d", rows-rpos2);
|
||||
snprintf(seq,64,"\x1b[%dA", rows-rpos2);
|
||||
@@ -671,7 +1044,7 @@ static void refreshMultiLine(struct linenoiseState *l, int flags) {
|
||||
}
|
||||
|
||||
/* Set column. */
|
||||
col = (plen+(int)l->pos) % (int)l->cols;
|
||||
col = (pwidth+poswidth) % l->cols;
|
||||
lndebug("set col %d", 1+col);
|
||||
if (col)
|
||||
snprintf(seq,64,"\r\x1b[%dC", col);
|
||||
@@ -682,6 +1055,7 @@ static void refreshMultiLine(struct linenoiseState *l, int flags) {
|
||||
|
||||
lndebug("\n");
|
||||
l->oldpos = l->pos;
|
||||
if (flags & REFRESH_WRITE) l->oldrpos = rpos2;
|
||||
|
||||
if (write(fd,ab.b,ab.len) == -1) {} /* Can't recover from write error. */
|
||||
abFree(&ab);
|
||||
@@ -718,29 +1092,37 @@ void linenoiseShow(struct linenoiseState *l) {
|
||||
}
|
||||
}
|
||||
|
||||
/* Insert the character 'c' at cursor current position.
|
||||
/* Insert the character(s) 'c' of length 'clen' at cursor current position.
|
||||
* This handles both single-byte ASCII and multi-byte UTF-8 sequences.
|
||||
*
|
||||
* On error writing to the terminal -1 is returned, otherwise 0. */
|
||||
int linenoiseEditInsert(struct linenoiseState *l, char c) {
|
||||
if (l->len < l->buflen) {
|
||||
int linenoiseEditInsert(struct linenoiseState *l, const char *c, size_t clen) {
|
||||
if (l->len + clen <= l->buflen) {
|
||||
if (l->len == l->pos) {
|
||||
l->buf[l->pos] = c;
|
||||
l->pos++;
|
||||
l->len++;
|
||||
/* Append at end of line. */
|
||||
memcpy(l->buf+l->pos, c, clen);
|
||||
l->pos += clen;
|
||||
l->len += clen;
|
||||
l->buf[l->len] = '\0';
|
||||
if ((!mlmode && l->plen+l->len < l->cols && !hintsCallback)) {
|
||||
/* Avoid a full update of the line in the
|
||||
* trivial case. */
|
||||
char d = (maskmode==1) ? '*' : c;
|
||||
if (write(l->ofd,&d,1) == -1) return -1;
|
||||
if ((!mlmode &&
|
||||
utf8StrWidth(l->prompt,l->plen)+utf8StrWidth(l->buf,l->len) < l->cols &&
|
||||
!hintsCallback)) {
|
||||
/* Avoid a full update of the line in the trivial case:
|
||||
* single-width char, no hints, fits in one line. */
|
||||
if (maskmode == 1) {
|
||||
if (write(l->ofd,"*",1) == -1) return -1;
|
||||
} else {
|
||||
if (write(l->ofd,c,clen) == -1) return -1;
|
||||
}
|
||||
} else {
|
||||
refreshLine(l);
|
||||
}
|
||||
} else {
|
||||
memmove(l->buf+l->pos+1,l->buf+l->pos,l->len-l->pos);
|
||||
l->buf[l->pos] = c;
|
||||
l->len++;
|
||||
l->pos++;
|
||||
/* Insert in the middle of the line. */
|
||||
memmove(l->buf+l->pos+clen, l->buf+l->pos, l->len-l->pos);
|
||||
memcpy(l->buf+l->pos, c, clen);
|
||||
l->len += clen;
|
||||
l->pos += clen;
|
||||
l->buf[l->len] = '\0';
|
||||
refreshLine(l);
|
||||
}
|
||||
@@ -748,18 +1130,18 @@ int linenoiseEditInsert(struct linenoiseState *l, char c) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Move cursor on the left. */
|
||||
/* Move cursor on the left. Moves by one UTF-8 character, not byte. */
|
||||
void linenoiseEditMoveLeft(struct linenoiseState *l) {
|
||||
if (l->pos > 0) {
|
||||
l->pos--;
|
||||
l->pos -= utf8PrevCharLen(l->buf, l->pos);
|
||||
refreshLine(l);
|
||||
}
|
||||
}
|
||||
|
||||
/* Move cursor on the right. */
|
||||
/* Move cursor on the right. Moves by one UTF-8 character, not byte. */
|
||||
void linenoiseEditMoveRight(struct linenoiseState *l) {
|
||||
if (l->pos != l->len) {
|
||||
l->pos++;
|
||||
l->pos += utf8NextCharLen(l->buf, l->pos, l->len);
|
||||
refreshLine(l);
|
||||
}
|
||||
}
|
||||
@@ -807,39 +1189,44 @@ void linenoiseEditHistoryNext(struct linenoiseState *l, int dir) {
|
||||
}
|
||||
|
||||
/* Delete the character at the right of the cursor without altering the cursor
|
||||
* position. Basically this is what happens with the "Delete" keyboard key. */
|
||||
* position. Basically this is what happens with the "Delete" keyboard key.
|
||||
* Now handles multi-byte UTF-8 characters. */
|
||||
void linenoiseEditDelete(struct linenoiseState *l) {
|
||||
if (l->len > 0 && l->pos < l->len) {
|
||||
memmove(l->buf+l->pos,l->buf+l->pos+1,l->len-l->pos-1);
|
||||
l->len--;
|
||||
size_t clen = utf8NextCharLen(l->buf, l->pos, l->len);
|
||||
memmove(l->buf+l->pos, l->buf+l->pos+clen, l->len-l->pos-clen);
|
||||
l->len -= clen;
|
||||
l->buf[l->len] = '\0';
|
||||
refreshLine(l);
|
||||
}
|
||||
}
|
||||
|
||||
/* Backspace implementation. */
|
||||
/* Backspace implementation. Deletes the UTF-8 character before the cursor. */
|
||||
void linenoiseEditBackspace(struct linenoiseState *l) {
|
||||
if (l->pos > 0 && l->len > 0) {
|
||||
memmove(l->buf+l->pos-1,l->buf+l->pos,l->len-l->pos);
|
||||
l->pos--;
|
||||
l->len--;
|
||||
size_t clen = utf8PrevCharLen(l->buf, l->pos);
|
||||
memmove(l->buf+l->pos-clen, l->buf+l->pos, l->len-l->pos);
|
||||
l->pos -= clen;
|
||||
l->len -= clen;
|
||||
l->buf[l->len] = '\0';
|
||||
refreshLine(l);
|
||||
}
|
||||
}
|
||||
|
||||
/* Delete the previosu word, maintaining the cursor at the start of the
|
||||
* current word. */
|
||||
/* Delete the previous word, maintaining the cursor at the start of the
|
||||
* current word. Handles UTF-8 by moving character-by-character. */
|
||||
void linenoiseEditDeletePrevWord(struct linenoiseState *l) {
|
||||
size_t old_pos = l->pos;
|
||||
size_t diff;
|
||||
|
||||
/* Skip spaces before the word (move backwards by UTF-8 chars). */
|
||||
while (l->pos > 0 && l->buf[l->pos-1] == ' ')
|
||||
l->pos--;
|
||||
l->pos -= utf8PrevCharLen(l->buf, l->pos);
|
||||
/* Skip non-space characters (move backwards by UTF-8 chars). */
|
||||
while (l->pos > 0 && l->buf[l->pos-1] != ' ')
|
||||
l->pos--;
|
||||
l->pos -= utf8PrevCharLen(l->buf, l->pos);
|
||||
diff = old_pos - l->pos;
|
||||
memmove(l->buf+l->pos,l->buf+old_pos,l->len-old_pos+1);
|
||||
memmove(l->buf+l->pos, l->buf+old_pos, l->len-old_pos+1);
|
||||
l->len -= diff;
|
||||
refreshLine(l);
|
||||
}
|
||||
@@ -886,6 +1273,7 @@ int linenoiseEditStart(struct linenoiseState *l, int stdin_fd, int stdout_fd, ch
|
||||
|
||||
l->cols = getColumns(stdin_fd, stdout_fd);
|
||||
l->oldrows = 0;
|
||||
l->oldrpos = 1; /* Cursor starts on row 1. */
|
||||
l->history_index = 0;
|
||||
|
||||
/* Buffer starts empty. */
|
||||
@@ -895,7 +1283,7 @@ int linenoiseEditStart(struct linenoiseState *l, int stdin_fd, int stdout_fd, ch
|
||||
/* If stdin is not a tty, stop here with the initialization. We
|
||||
* will actually just read a line from standard input in blocking
|
||||
* mode later, in linenoiseEditFeed(). */
|
||||
if (!isatty(l->ifd)) return 0;
|
||||
if (!isatty(l->ifd) && !getenv("LINENOISE_ASSUME_TTY")) return 0;
|
||||
|
||||
/* The latest history entry is always our current buffer, that
|
||||
* initially is just an empty string. */
|
||||
@@ -928,7 +1316,7 @@ char *linenoiseEditMore = "If you see this, you are misusing the API: when linen
|
||||
char *linenoiseEditFeed(struct linenoiseState *l) {
|
||||
/* Not a TTY, pass control to line reading without character
|
||||
* count limits. */
|
||||
if (!isatty(l->ifd)) return linenoiseNoTTY();
|
||||
if (!isatty(l->ifd) && !getenv("LINENOISE_ASSUME_TTY")) return linenoiseNoTTY();
|
||||
|
||||
char c;
|
||||
int nread;
|
||||
@@ -985,11 +1373,17 @@ char *linenoiseEditFeed(struct linenoiseState *l) {
|
||||
}
|
||||
break;
|
||||
case CTRL_T: /* ctrl-t, swaps current character with previous. */
|
||||
/* Handle UTF-8: swap the two UTF-8 characters around cursor. */
|
||||
if (l->pos > 0 && l->pos < l->len) {
|
||||
int aux = l->buf[l->pos-1];
|
||||
l->buf[l->pos-1] = l->buf[l->pos];
|
||||
l->buf[l->pos] = aux;
|
||||
if (l->pos != l->len-1) l->pos++;
|
||||
char tmp[32];
|
||||
size_t prevlen = utf8PrevCharLen(l->buf, l->pos);
|
||||
size_t currlen = utf8NextCharLen(l->buf, l->pos, l->len);
|
||||
size_t prevstart = l->pos - prevlen;
|
||||
/* Copy current char to tmp, move previous char right, paste tmp. */
|
||||
memcpy(tmp, l->buf + l->pos, currlen);
|
||||
memmove(l->buf + prevstart + currlen, l->buf + prevstart, prevlen);
|
||||
memcpy(l->buf + prevstart, tmp, currlen);
|
||||
if (l->pos + currlen <= l->len) l->pos += currlen;
|
||||
refreshLine(l);
|
||||
}
|
||||
break;
|
||||
@@ -1061,7 +1455,22 @@ char *linenoiseEditFeed(struct linenoiseState *l) {
|
||||
}
|
||||
break;
|
||||
default:
|
||||
if (linenoiseEditInsert(l,c)) return NULL;
|
||||
/* Handle UTF-8 multi-byte sequences. When we receive the first byte
|
||||
* of a multi-byte UTF-8 character, read the remaining bytes to
|
||||
* complete the sequence before inserting. */
|
||||
{
|
||||
char utf8[4];
|
||||
int utf8len = utf8ByteLen(c);
|
||||
utf8[0] = c;
|
||||
if (utf8len > 1) {
|
||||
/* Read remaining bytes of the UTF-8 sequence. */
|
||||
int i;
|
||||
for (i = 1; i < utf8len; i++) {
|
||||
if (read(l->ifd, utf8+i, 1) != 1) break;
|
||||
}
|
||||
}
|
||||
if (linenoiseEditInsert(l, utf8, utf8len)) return NULL;
|
||||
}
|
||||
break;
|
||||
case CTRL_U: /* Ctrl+u, delete the whole line. */
|
||||
l->buf[0] = '\0';
|
||||
@@ -1095,7 +1504,7 @@ char *linenoiseEditFeed(struct linenoiseState *l) {
|
||||
* returns something different than NULL. At this point the user input
|
||||
* is in the buffer, and we can restore the terminal in normal mode. */
|
||||
void linenoiseEditStop(struct linenoiseState *l) {
|
||||
if (!isatty(l->ifd)) return;
|
||||
if (!isatty(l->ifd) && !getenv("LINENOISE_ASSUME_TTY")) return;
|
||||
disableRawMode(l->ifd);
|
||||
printf("\n");
|
||||
}
|
||||
@@ -1193,7 +1602,7 @@ static char *linenoiseNoTTY(void) {
|
||||
char *linenoise(const char *prompt) {
|
||||
char buf[LINENOISE_MAX_LINE];
|
||||
|
||||
if (!isatty(STDIN_FILENO)) {
|
||||
if (!isatty(STDIN_FILENO) && !getenv("LINENOISE_ASSUME_TTY")) {
|
||||
/* Not a tty: read from file / pipe. In this mode we don't want any
|
||||
* limit to the line size, so we call a function to handle that. */
|
||||
return linenoiseNoTTY();
|
||||
|
||||
@@ -65,6 +65,7 @@ struct linenoiseState {
|
||||
size_t len; /* Current edited line length. */
|
||||
size_t cols; /* Number of columns in terminal. */
|
||||
size_t oldrows; /* Rows used by last refrehsed line (multiline mode) */
|
||||
int oldrpos; /* Cursor row from last refresh (for multiline clearing). */
|
||||
int history_index; /* The history index we are currently editing. */
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user