1
0
mirror of https://github.com/ohmyzsh/ohmyzsh.git synced 2026-04-10 16:32:00 +00:00

fix(git-lib): avoid regex error with non-ASCII branch names

_omz_git_prompt_status was emitting "regex matching error: illegal byte
sequence" when the git branch name contained non-ASCII characters (e.g.
Chinese). The zsh =~ operator uses locale-aware POSIX ERE, so character
classes like [^ ]+ reject multibyte sequences unless LC_ALL=C is set.
Fix by setting LC_ALL=C around all regex operations in the function and
restoring it afterwards.

Fixes #13330
This commit is contained in:
Green Orange
2026-04-06 00:27:33 +02:00
parent 887a864aba
commit 1d04379d4f
2 changed files with 81 additions and 0 deletions

View File

@@ -103,6 +103,11 @@ function _omz_git_prompt_status() {
local status_lines
status_lines=("${(@f)${status_text}}")
# Use C locale for regex matching to avoid "illegal byte sequence" errors
# when branch names or file paths contain non-ASCII characters (e.g. Chinese)
local _omz_lc_all=$LC_ALL
LC_ALL=C
# If the tracking line exists, get and parse it
if [[ "$status_lines[1]" =~ "^## [^ ]+ \[(.*)\]" ]]; then
local branch_statuses
@@ -126,6 +131,8 @@ function _omz_git_prompt_status() {
fi
done
LC_ALL=$_omz_lc_all
# Display the seen statuses in the order specified
local status_prompt
for status_constant in $status_constants; do

74
lib/tests/git.test.zsh Normal file
View File

@@ -0,0 +1,74 @@
#!/usr/bin/zsh -df
# Regression tests for lib/git.zsh
local -i _failures=0
run_test() {
local description="$1"
local got="$2"
local expected="$3"
print -u2 "Test: $description"
if [[ "$got" == "$expected" ]]; then
print -u2 "\e[32mSuccess\e[0m"
else
print -u2 "\e[31mError\e[0m"
print -u2 " expected: ${(q)expected}"
print -u2 " got: ${(q)got}"
(( _failures++ ))
fi
print -u2 ""
}
# ---------------------------------------------------------------------------
# Set up: source git.zsh and override __git_prompt_git with a controllable mock
# ---------------------------------------------------------------------------
source "${0:h:h}/git.zsh" 2>/dev/null
# The mock returns canned `git status --porcelain -b` output and denies stash.
# Callers set _mock_status_output before calling _omz_git_prompt_status.
_mock_status_output=""
function __git_prompt_git() {
case "$*" in
"config --get oh-my-zsh.hide-status") return 1 ;;
"rev-parse --verify refs/stash") return 1 ;;
"status --porcelain -b") printf "%s\n" "$_mock_status_output" ;;
*) return 1 ;;
esac
}
# ---------------------------------------------------------------------------
# Bug #13330: _omz_git_prompt_status emits "regex matching error: illegal byte
# sequence" when the git branch name contains non-ASCII characters (e.g. Chinese).
# Root cause: zsh's =~ operator with [^ ]+ is locale-aware and rejects multibyte
# sequences unless LC_ALL=C is set for the match.
# ---------------------------------------------------------------------------
# Chinese branch with upstream tracking info
_mock_status_output="## 中文-1.0.0-中文...origin/中文-1.0.0-中文 [ahead 1]"
stderr_output=$( { _omz_git_prompt_status } 2>&1 1>/dev/null )
run_test \
"no 'illegal byte sequence' error with Chinese branch name (bug #13330)" \
"${stderr_output}" \
""
# Chinese branch with no tracking info (the regex should simply not match)
_mock_status_output="## 中文-branch"
stderr_output=$( { _omz_git_prompt_status } 2>&1 1>/dev/null )
run_test \
"no error when Chinese branch has no tracking info" \
"${stderr_output}" \
""
# Regression: ASCII branch names must still be parsed correctly
_mock_status_output="## main...origin/main [behind 3]"
ZSH_THEME_GIT_PROMPT_BEHIND="<"
output=$( _omz_git_prompt_status 2>/dev/null )
run_test \
"ASCII branch with 'behind' tracking info still detected" \
"${output}" \
"<"
exit $_failures