summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rwxr-xr-xgit-hooks/sanitize-commit43
1 files changed, 43 insertions, 0 deletions
diff --git a/git-hooks/sanitize-commit b/git-hooks/sanitize-commit
index 7e8d8bd..85d7d3c 100755
--- a/git-hooks/sanitize-commit
+++ b/git-hooks/sanitize-commit
@@ -293,6 +293,41 @@ sub check_apple_terminology()
}
}
+sub check_invisible_unicode()
+{
+ # Check for invisible Unicode characters that can hide malicious code.
+ # This protects against GlassWorm-style stealth payloads that exploit
+ # the gap between what humans see in editors and what machines execute.
+ # See: https://www.firetail.ai/blog/glassworm-when-invisible-worms-meet-invisible-payloads
+ # See: https://www.koi.ai/blog/glassworm-first-self-propagating-worm-using-invisible-code-hits-openvsx-marketplace
+ #
+ # Blocks the most dangerous invisible characters while allowing:
+ # - U+200C (ZWNJ) and U+200D (ZWJ): Essential for emoji and complex scripts
+ # - U+FE00-FE0F (variation selectors): Used with emoji
+ # - U+FEFF (BOM): Only at file start (position 0)
+ #
+ # Always blocked:
+ # - U+200B (zero-width space): Primary stealth attack vector
+ # - U+2060-2069 (invisible operators/separators): Stealth attack vectors
+ # - U+E0100-E01EF (variation selectors supplement): Rarely legitimate
+ #
+ # This check cannot be bypassed via configuration. Legitimate uses of blocked
+ # characters (e.g., in Unicode test data) require manual admin approval.
+ if (/[\x{200B}] # Zero-width space
+ |[\x{2060}-\x{2069}] # Invisible operators, word joiner, separators
+ |[\x{E0100}-\x{E01EF}]/x) { # Variation selectors supplement
+ complain_ln("Invisible Unicode characters detected (possible stealth payload)",
+ "stealth", 1);
+ }
+ # BOM (U+FEFF) is only allowed at position 0 of the file
+ if (/\x{FEFF}/) {
+ if ($lineno != 1 || !/^\x{FEFF}/) {
+ complain_ln("Invisible Unicode characters detected (possible stealth payload)",
+ "stealth", 1);
+ }
+ }
+}
+
# The hard-coded fallbacks could be avoided by init-repository setting things up.
my $with_pickbot = parse_bool($config{'with-pickbot'} // "false");
my @LTS = split(/\s+/, $config{'lts-branch'} || "5.6 5.9 5.12 5.15 6.2 6.5");
@@ -423,6 +458,7 @@ my $prevline = "";
my $changelog3rdparty = 0;
my $reopens = 0;
open MSG, "git cat-file -p ".$sha1." |" or die "cannot run git: $!";
+binmode(MSG, ":utf8") or die "cannot set encoding: $!";
while (<MSG>) {
last if ($_ eq "\n");
if (/^parent /) {
@@ -610,6 +646,7 @@ while (<MSG>) {
}
check_spelling() if ($spell_check);
check_apple_terminology();
+ check_invisible_unicode();
styleFail("Trailing whitespace") if (s/[ \t]+\r?$//);
styleFail("Space indent followed by a TAB character") if (/^ +\t/);
styleFail("TAB character in non-leading whitespace") if (/\S *\t/);
@@ -840,10 +877,12 @@ my $eof_check;
my $ctlkw_check;
my $apple_check;
my $notobjc_check;
+my $stealth_check;
my $attribution_changed;
open DIFF, "git diff-tree --minimal --no-commit-id --diff-filter=ACMR --ignore-submodules " .
"--src-prefix=\@old\@/ --dst-prefix=\@new\@/ --full-index -r -U100000 --cc -C -l1000 " .
"--root ".$sha1." |" or die "cannot run git: $!";
+binmode(DIFF, ":utf8");
while (<DIFF>) {
if (/^-/) {
if ($mixws_check) {
@@ -950,6 +989,7 @@ while (<DIFF>) {
complain_ln("__OBJC__ will never be defined for non-Objective-C/C++ source files", "objc");
}
}
+ check_invisible_unicode() if ($stealth_check);
if ($clike && /\bQ_CLANG_QDOC\b/) {
complain_ln("Using deprecated define Q_CLANG_QDOC; use Q_QDOC instead", "qdoc");
}
@@ -1029,6 +1069,9 @@ while (<DIFF>) {
$spell_check = !defined($cfg{spell}) && !$foreign && ($file !~ /\.ts$/i);
$apple_check = !$foreign && ($file !~ /\.ts$/i);
$notobjc_check = $apple_check && ($file =~ /\.(c|cc|cpp|c\+\+|cxx)$/i);
+ # Invisible Unicode detection is always enabled. File-type exclusions
+ # can be added here if needed (e.g., for Unicode test data files).
+ $stealth_check = 1;
$conflict_fail = $is_bin || defined($cfg{conflict});
$braces = 0;
$check_gen = 0;