From 1d04379d4f898e60e65bb2ef8856f8cdf6f0d2b9 Mon Sep 17 00:00:00 2001 From: Green Orange Date: Mon, 6 Apr 2026 00:27:33 +0200 Subject: [PATCH] fix(git-lib): avoid regex error with non-ASCII branch names _omz_git_prompt_status was emitting "regex matching error: illegal byte sequence" when the git branch name contained non-ASCII characters (e.g. Chinese). The zsh =~ operator uses locale-aware POSIX ERE, so character classes like [^ ]+ reject multibyte sequences unless LC_ALL=C is set. Fix by setting LC_ALL=C around all regex operations in the function and restoring it afterwards. Fixes #13330 --- lib/git.zsh | 7 ++++ lib/tests/git.test.zsh | 74 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+) create mode 100644 lib/tests/git.test.zsh diff --git a/lib/git.zsh b/lib/git.zsh index 8d38f3268..0cecd118f 100644 --- a/lib/git.zsh +++ b/lib/git.zsh @@ -103,6 +103,11 @@ function _omz_git_prompt_status() { local status_lines status_lines=("${(@f)${status_text}}") + # Use C locale for regex matching to avoid "illegal byte sequence" errors + # when branch names or file paths contain non-ASCII characters (e.g. Chinese) + local _omz_lc_all=$LC_ALL + LC_ALL=C + # If the tracking line exists, get and parse it if [[ "$status_lines[1]" =~ "^## [^ ]+ \[(.*)\]" ]]; then local branch_statuses @@ -126,6 +131,8 @@ function _omz_git_prompt_status() { fi done + LC_ALL=$_omz_lc_all + # Display the seen statuses in the order specified local status_prompt for status_constant in $status_constants; do diff --git a/lib/tests/git.test.zsh b/lib/tests/git.test.zsh new file mode 100644 index 000000000..6f2147ac1 --- /dev/null +++ b/lib/tests/git.test.zsh @@ -0,0 +1,74 @@ +#!/usr/bin/zsh -df + +# Regression tests for lib/git.zsh + +local -i _failures=0 + +run_test() { + local description="$1" + local got="$2" + local expected="$3" + + print -u2 "Test: $description" + if [[ "$got" == "$expected" ]]; then + print -u2 "\e[32mSuccess\e[0m" + else + print -u2 "\e[31mError\e[0m" + print -u2 " expected: ${(q)expected}" + print -u2 " got: ${(q)got}" + (( _failures++ )) + fi + print -u2 "" +} + +# --------------------------------------------------------------------------- +# Set up: source git.zsh and override __git_prompt_git with a controllable mock +# --------------------------------------------------------------------------- + +source "${0:h:h}/git.zsh" 2>/dev/null + +# The mock returns canned `git status --porcelain -b` output and denies stash. +# Callers set _mock_status_output before calling _omz_git_prompt_status. +_mock_status_output="" +function __git_prompt_git() { + case "$*" in + "config --get oh-my-zsh.hide-status") return 1 ;; + "rev-parse --verify refs/stash") return 1 ;; + "status --porcelain -b") printf "%s\n" "$_mock_status_output" ;; + *) return 1 ;; + esac +} + +# --------------------------------------------------------------------------- +# Bug #13330: _omz_git_prompt_status emits "regex matching error: illegal byte +# sequence" when the git branch name contains non-ASCII characters (e.g. Chinese). +# Root cause: zsh's =~ operator with [^ ]+ is locale-aware and rejects multibyte +# sequences unless LC_ALL=C is set for the match. +# --------------------------------------------------------------------------- + +# Chinese branch with upstream tracking info +_mock_status_output="## 中文-1.0.0-中文...origin/中文-1.0.0-中文 [ahead 1]" +stderr_output=$( { _omz_git_prompt_status } 2>&1 1>/dev/null ) +run_test \ + "no 'illegal byte sequence' error with Chinese branch name (bug #13330)" \ + "${stderr_output}" \ + "" + +# Chinese branch with no tracking info (the regex should simply not match) +_mock_status_output="## 中文-branch" +stderr_output=$( { _omz_git_prompt_status } 2>&1 1>/dev/null ) +run_test \ + "no error when Chinese branch has no tracking info" \ + "${stderr_output}" \ + "" + +# Regression: ASCII branch names must still be parsed correctly +_mock_status_output="## main...origin/main [behind 3]" +ZSH_THEME_GIT_PROMPT_BEHIND="<" +output=$( _omz_git_prompt_status 2>/dev/null ) +run_test \ + "ASCII branch with 'behind' tracking info still detected" \ + "${output}" \ + "<" + +exit $_failures