N+1 iterations with loop unrolling

In the following code, the loop iterates one time.

#include <stdio.h>

int main() {
    volatile int sum = 0;
    
    for (int i = 1; i < 2; i++) {
        sum +=2;
    }
    printf("Sum is %d\n", sum);
    return 0;
}

With the following commands

clang -O0 -emit-llvm -S -Xclang -disable-O0-optnone -emit-llvm $FILE.c -o $FILE.ll
opt -passes='mem2reg' -S $FILE.ll -o $FILE-m2r.ll
opt -passes='loop-unroll' -unroll-count=10 -S $FILE-m2r.ll -o $FILE-unrolled.ll

when I looked at the final IR code, I see two “add” instructions.

; ModuleID = 'loop-m2r.ll'
source_filename = "loop.c"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

@.str = private unnamed_addr constant [11 x i8] c"Sum is %d\0A\00", align 1

; Function Attrs: noinline nounwind uwtable
define dso_local i32 @main() #0 {
  %1 = alloca i32, align 4
  store volatile i32 0, ptr %1, align 4
  br label %2

2:                                                ; preds = %0
  br label %3

3:                                                ; preds = %2
  %4 = load volatile i32, ptr %1, align 4
  %5 = add nsw i32 %4, 2
  store volatile i32 %5, ptr %1, align 4
  br label %6

6:                                                ; preds = %3
  br i1 false, label %7, label %11

7:                                                ; preds = %6
  %8 = load volatile i32, ptr %1, align 4
  %9 = add nsw i32 %8, 2
  store volatile i32 %9, ptr %1, align 4
  br label %10

10:                                               ; preds = %7
  unreachable

11:                                               ; preds = %6
  %12 = load volatile i32, ptr %1, align 4
  %13 = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %12)
  ret i32 0
}

declare i32 @printf(ptr noundef, ...) #1

attributes #0 = { noinline nounwind uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #1 = { "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }

!llvm.module.flags = !{!0, !1, !2, !3, !4}
!llvm.ident = !{!5}

!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 8, !"PIC Level", i32 2}
!2 = !{i32 7, !"PIE Level", i32 2}
!3 = !{i32 7, !"uwtable", i32 2}
!4 = !{i32 7, !"frame-pointer", i32 2}
!5 = !{!"clang version 21.0.0git (https://github.com/llvm/llvm-project d0c973a7a0149db3b71767d4c5a20a31e6a8ed5b)"}

I expected to see one add instruction. Isn’t that the case?
This line br i1 false, label %7, label %11 is a bit weird, because there is no definition for i1 in the IR file. So, I am guessing that the compiler has added one more iteration, similar to wrong path, which is controlled by a condition. But why and how that condition is controlled is not clear for me. Any idea about that?

I think the IR is correct. i1 is a valid integer type (see LangRef). The instruction br i1 false, label %7, label %11 always branches to label %11, so the addition is only executed once. In this case, running other passes such as simplifycfg will clean up unreachable blocks.

Thanks. As you said, simplifycfg works.
I wonder why that unreachable code has been added by the pass. The number of iterations are known to the LLVM.

The loop unroller copies whole loop iterations (from the header to the backedge). Teaching it to skip cloning parts of a loop iteration would be harder than just cloning the whole iteration and cleaning up later. So it occasionally clones unreachable code in cases like this.

Usually we run loop rotation before unrolling, so this doesn’t happen for simple loops.