diff options
| author | Daniel Smith <daniel.smith@qt.io> | 2025-10-27 08:50:01 +0100 |
|---|---|---|
| committer | Daniel Smith <daniel.smith@qt.io> | 2025-10-28 10:37:52 +0000 |
| commit | e2bf9392470a0efa6ca4e8c1799eaaef9fe03d5f (patch) | |
| tree | c4561c391e6ae555eefcf35d57747bff0189937f | |
| parent | ff94fcf7077b349cbc0dbad1c37c83b78cbf3495 (diff) | |
To protect against potential security risks associated with
invisible Unicode characters, this patch adds an additional check
to both commit messages and files. This blocks the most dangerous
invisible characters while allowing:
- U+200C (ZWNJ) and U+200D (ZWJ): Essential for emoji and complex scripts
- U+FE00-FE0F (variation selectors): Used with emoji
- U+FEFF (BOM): Only at file start (position 0)
Always blocked:
- U+200B (zero-width space): Primary stealth attack vector
- U+2060-2069 (invisible operators/separators): Stealth attack vectors
- U+E0100-E01EF (variation selectors supplement): Rarely legitimate
See: https://www.koi.ai/blog/glassworm-first-self-propagating-worm-using-invisible-code-hits-openvsx-marketplace
Task-number: QTQAINFRA-7514
Change-Id: Ibcff19797d8217fe8d81141ed60f430783358357
Reviewed-by: Daniel Smith <daniel.smith@qt.io>
Reviewed-by: Oswald Buddenhagen <oswald.buddenhagen@gmx.de>
| -rwxr-xr-x | git-hooks/sanitize-commit | 43 |
1 files changed, 43 insertions, 0 deletions
diff --git a/git-hooks/sanitize-commit b/git-hooks/sanitize-commit index 7e8d8bd..85d7d3c 100755 --- a/git-hooks/sanitize-commit +++ b/git-hooks/sanitize-commit @@ -293,6 +293,41 @@ sub check_apple_terminology() } } +sub check_invisible_unicode() +{ + # Check for invisible Unicode characters that can hide malicious code. + # This protects against GlassWorm-style stealth payloads that exploit + # the gap between what humans see in editors and what machines execute. + # See: https://www.firetail.ai/blog/glassworm-when-invisible-worms-meet-invisible-payloads + # See: https://www.koi.ai/blog/glassworm-first-self-propagating-worm-using-invisible-code-hits-openvsx-marketplace + # + # Blocks the most dangerous invisible characters while allowing: + # - U+200C (ZWNJ) and U+200D (ZWJ): Essential for emoji and complex scripts + # - U+FE00-FE0F (variation selectors): Used with emoji + # - U+FEFF (BOM): Only at file start (position 0) + # + # Always blocked: + # - U+200B (zero-width space): Primary stealth attack vector + # - U+2060-2069 (invisible operators/separators): Stealth attack vectors + # - U+E0100-E01EF (variation selectors supplement): Rarely legitimate + # + # This check cannot be bypassed via configuration. Legitimate uses of blocked + # characters (e.g., in Unicode test data) require manual admin approval. + if (/[\x{200B}] # Zero-width space + |[\x{2060}-\x{2069}] # Invisible operators, word joiner, separators + |[\x{E0100}-\x{E01EF}]/x) { # Variation selectors supplement + complain_ln("Invisible Unicode characters detected (possible stealth payload)", + "stealth", 1); + } + # BOM (U+FEFF) is only allowed at position 0 of the file + if (/\x{FEFF}/) { + if ($lineno != 1 || !/^\x{FEFF}/) { + complain_ln("Invisible Unicode characters detected (possible stealth payload)", + "stealth", 1); + } + } +} + # The hard-coded fallbacks could be avoided by init-repository setting things up. my $with_pickbot = parse_bool($config{'with-pickbot'} // "false"); my @LTS = split(/\s+/, $config{'lts-branch'} || "5.6 5.9 5.12 5.15 6.2 6.5"); @@ -423,6 +458,7 @@ my $prevline = ""; my $changelog3rdparty = 0; my $reopens = 0; open MSG, "git cat-file -p ".$sha1." |" or die "cannot run git: $!"; +binmode(MSG, ":utf8") or die "cannot set encoding: $!"; while (<MSG>) { last if ($_ eq "\n"); if (/^parent /) { @@ -610,6 +646,7 @@ while (<MSG>) { } check_spelling() if ($spell_check); check_apple_terminology(); + check_invisible_unicode(); styleFail("Trailing whitespace") if (s/[ \t]+\r?$//); styleFail("Space indent followed by a TAB character") if (/^ +\t/); styleFail("TAB character in non-leading whitespace") if (/\S *\t/); @@ -840,10 +877,12 @@ my $eof_check; my $ctlkw_check; my $apple_check; my $notobjc_check; +my $stealth_check; my $attribution_changed; open DIFF, "git diff-tree --minimal --no-commit-id --diff-filter=ACMR --ignore-submodules " . "--src-prefix=\@old\@/ --dst-prefix=\@new\@/ --full-index -r -U100000 --cc -C -l1000 " . "--root ".$sha1." |" or die "cannot run git: $!"; +binmode(DIFF, ":utf8"); while (<DIFF>) { if (/^-/) { if ($mixws_check) { @@ -950,6 +989,7 @@ while (<DIFF>) { complain_ln("__OBJC__ will never be defined for non-Objective-C/C++ source files", "objc"); } } + check_invisible_unicode() if ($stealth_check); if ($clike && /\bQ_CLANG_QDOC\b/) { complain_ln("Using deprecated define Q_CLANG_QDOC; use Q_QDOC instead", "qdoc"); } @@ -1029,6 +1069,9 @@ while (<DIFF>) { $spell_check = !defined($cfg{spell}) && !$foreign && ($file !~ /\.ts$/i); $apple_check = !$foreign && ($file !~ /\.ts$/i); $notobjc_check = $apple_check && ($file =~ /\.(c|cc|cpp|c\+\+|cxx)$/i); + # Invisible Unicode detection is always enabled. File-type exclusions + # can be added here if needed (e.g., for Unicode test data files). + $stealth_check = 1; $conflict_fail = $is_bin || defined($cfg{conflict}); $braces = 0; $check_gen = 0; |
