N+1 iterations with loop unrolling

mahmoodn · April 21, 2025, 10:21am

In the following code, the loop iterates one time.

#include <stdio.h>

int main() {
    volatile int sum = 0;
    
    for (int i = 1; i < 2; i++) {
        sum +=2;
    }
    printf("Sum is %d\n", sum);
    return 0;
}

With the following commands

clang -O0 -emit-llvm -S -Xclang -disable-O0-optnone -emit-llvm $FILE.c -o $FILE.ll
opt -passes='mem2reg' -S $FILE.ll -o $FILE-m2r.ll
opt -passes='loop-unroll' -unroll-count=10 -S $FILE-m2r.ll -o $FILE-unrolled.ll

when I looked at the final IR code, I see two “add” instructions.

; ModuleID = 'loop-m2r.ll'
source_filename = "loop.c"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

@.str = private unnamed_addr constant [11 x i8] c"Sum is %d\0A\00", align 1

; Function Attrs: noinline nounwind uwtable
define dso_local i32 @main() #0 {
  %1 = alloca i32, align 4
  store volatile i32 0, ptr %1, align 4
  br label %2

2:                                                ; preds = %0
  br label %3

3:                                                ; preds = %2
  %4 = load volatile i32, ptr %1, align 4
  %5 = add nsw i32 %4, 2
  store volatile i32 %5, ptr %1, align 4
  br label %6

6:                                                ; preds = %3
  br i1 false, label %7, label %11

7:                                                ; preds = %6
  %8 = load volatile i32, ptr %1, align 4
  %9 = add nsw i32 %8, 2
  store volatile i32 %9, ptr %1, align 4
  br label %10

10:                                               ; preds = %7
  unreachable

11:                                               ; preds = %6
  %12 = load volatile i32, ptr %1, align 4
  %13 = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %12)
  ret i32 0
}

declare i32 @printf(ptr noundef, ...) #1

attributes #0 = { noinline nounwind uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #1 = { "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }

!llvm.module.flags = !{!0, !1, !2, !3, !4}
!llvm.ident = !{!5}

!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 8, !"PIC Level", i32 2}
!2 = !{i32 7, !"PIE Level", i32 2}
!3 = !{i32 7, !"uwtable", i32 2}
!4 = !{i32 7, !"frame-pointer", i32 2}
!5 = !{!"clang version 21.0.0git (https://github.com/llvm/llvm-project d0c973a7a0149db3b71767d4c5a20a31e6a8ed5b)"}

I expected to see one add instruction. Isn’t that the case?
This line br i1 false, label %7, label %11 is a bit weird, because there is no definition for i1 in the IR file. So, I am guessing that the compiler has added one more iteration, similar to wrong path, which is controlled by a condition. But why and how that condition is controlled is not clear for me. Any idea about that?

kasuga-fj · April 21, 2025, 1:02pm

I think the IR is correct. i1 is a valid integer type (see LangRef). The instruction br i1 false, label %7, label %11 always branches to label %11, so the addition is only executed once. In this case, running other passes such as simplifycfg will clean up unreachable blocks.

mahmoodn · April 21, 2025, 2:15pm

Thanks. As you said, simplifycfg works.
I wonder why that unreachable code has been added by the pass. The number of iterations are known to the LLVM.

efriedma-quic · April 21, 2025, 5:50pm

The loop unroller copies whole loop iterations (from the header to the backedge). Teaching it to skip cloning parts of a loop iteration would be harder than just cloning the whole iteration and cleaning up later. So it occasionally clones unreachable code in cases like this.

Usually we run loop rotation before unrolling, so this doesn’t happen for simple loops.

Topic		Replies	Views
loop unrolling introduces conditional branch LLVM Dev List Archives	14	150	August 22, 2015
question about loop unrolling LLVM Dev List Archives	1	144	June 26, 2018
Fail to unroll loop on simple examples. LLVM Dev List Archives	2	114	July 1, 2010
question about loop unrolling Using Clang	1	228	June 28, 2018
Perform only loop unroll using opt Beginners clang , llvm-ir	3	262	March 23, 2024

N+1 iterations with loop unrolling

Related topics