Certainly, in C++ the result is identical [and would be in C too]:
#include <stdio.h>
extern int arr1[], arr2[];
const int n = 1000000;
int main()
{
int sum = 0;
for (int i = 0; i < n; i++) // n is size of an array
{
sum += arr1[i] * arr2[i];
}
printf("sum=%d\n", sum);
int* arrP1 = arr1;
int* arrP2 = arr2;
for (int i = 0; i < n; i++) // n is size of an array
{
sum += *(arrP1 + i) * *(arrP2 + i);
}
printf("sum=%d\n", sum);
}
Compiling with clang++ -O2 -S arrptr.cpp gives this for the first loop:
pxor %xmm0, %xmm0
movq $-4000000, %rax # imm = 0xFFFFFFFFFFC2F700
pxor %xmm1, %xmm1
.align 16, 0x90
.LBB0_1: # %vector.body
# =>This Inner Loop Header: Depth=1
movdqu arr1+4000000(%rax), %xmm3
movdqu arr1+4000016(%rax), %xmm4
movdqu arr2+4000000(%rax), %xmm2
movdqu arr2+4000016(%rax), %xmm5
pshufd $245, %xmm2, %xmm6 # xmm6 = xmm2[1,1,3,3]
pmuludq %xmm3, %xmm2
pshufd $232, %xmm2, %xmm2 # xmm2 = xmm2[0,2,2,3]
pshufd $245, %xmm3, %xmm3 # xmm3 = xmm3[1,1,3,3]
pmuludq %xmm6, %xmm3
pshufd $232, %xmm3, %xmm3 # xmm3 = xmm3[0,2,2,3]
punpckldq %xmm3, %xmm2 # xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
pshufd $245, %xmm5, %xmm6 # xmm6 = xmm5[1,1,3,3]
pmuludq %xmm4, %xmm5
pshufd $232, %xmm5, %xmm3 # xmm3 = xmm5[0,2,2,3]
pshufd $245, %xmm4, %xmm4 # xmm4 = xmm4[1,1,3,3]
pmuludq %xmm6, %xmm4
pshufd $232, %xmm4, %xmm4 # xmm4 = xmm4[0,2,2,3]
punpckldq %xmm4, %xmm3 # xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1]
paddd %xmm0, %xmm2
paddd %xmm1, %xmm3
movdqu arr1+4000032(%rax), %xmm1
movdqu arr1+4000048(%rax), %xmm4
movdqu arr2+4000032(%rax), %xmm0
movdqu arr2+4000048(%rax), %xmm5
pshufd $245, %xmm0, %xmm6 # xmm6 = xmm0[1,1,3,3]
pmuludq %xmm1, %xmm0
pshufd $232, %xmm0, %xmm0 # xmm0 = xmm0[0,2,2,3]
pshufd $245, %xmm1, %xmm1 # xmm1 = xmm1[1,1,3,3]
pmuludq %xmm6, %xmm1
pshufd $232, %xmm1, %xmm1 # xmm1 = xmm1[0,2,2,3]
punpckldq %xmm1, %xmm0 # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
pshufd $245, %xmm5, %xmm6 # xmm6 = xmm5[1,1,3,3]
pmuludq %xmm4, %xmm5
pshufd $232, %xmm5, %xmm1 # xmm1 = xmm5[0,2,2,3]
pshufd $245, %xmm4, %xmm4 # xmm4 = xmm4[1,1,3,3]
pmuludq %xmm6, %xmm4
pshufd $232, %xmm4, %xmm4 # xmm4 = xmm4[0,2,2,3]
punpckldq %xmm4, %xmm1 # xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1]
paddd %xmm2, %xmm0
paddd %xmm3, %xmm1
addq $64, %rax
jne .LBB0_1
The second loop does:
pxor %xmm1, %xmm1
pxor %xmm0, %xmm0
movaps (%rsp), %xmm2 # 16-byte Reload
movss %xmm2, %xmm0 # xmm0 = xmm2[0],xmm0[1,2,3]
movq $-4000000, %rax # imm = 0xFFFFFFFFFFC2F700
.align 16, 0x90
.LBB0_3: # %vector.body45
# =>This Inner Loop Header: Depth=1
movdqu arr1+4000000(%rax), %xmm3
movdqu arr1+4000016(%rax), %xmm4
movdqu arr2+4000000(%rax), %xmm2
movdqu arr2+4000016(%rax), %xmm5
pshufd $245, %xmm2, %xmm6 # xmm6 = xmm2[1,1,3,3]
pmuludq %xmm3, %xmm2
pshufd $232, %xmm2, %xmm2 # xmm2 = xmm2[0,2,2,3]
pshufd $245, %xmm3, %xmm3 # xmm3 = xmm3[1,1,3,3]
pmuludq %xmm6, %xmm3
pshufd $232, %xmm3, %xmm3 # xmm3 = xmm3[0,2,2,3]
punpckldq %xmm3, %xmm2 # xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
pshufd $245, %xmm5, %xmm6 # xmm6 = xmm5[1,1,3,3]
pmuludq %xmm4, %xmm5
pshufd $232, %xmm5, %xmm3 # xmm3 = xmm5[0,2,2,3]
pshufd $245, %xmm4, %xmm4 # xmm4 = xmm4[1,1,3,3]
pmuludq %xmm6, %xmm4
pshufd $232, %xmm4, %xmm4 # xmm4 = xmm4[0,2,2,3]
punpckldq %xmm4, %xmm3 # xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1]
paddd %xmm0, %xmm2
paddd %xmm1, %xmm3
movdqu arr1+4000032(%rax), %xmm1
movdqu arr1+4000048(%rax), %xmm4
movdqu arr2+4000032(%rax), %xmm0
movdqu arr2+4000048(%rax), %xmm5
pshufd $245, %xmm0, %xmm6 # xmm6 = xmm0[1,1,3,3]
pmuludq %xmm1, %xmm0
pshufd $232, %xmm0, %xmm0 # xmm0 = xmm0[0,2,2,3]
pshufd $245, %xmm1, %xmm1 # xmm1 = xmm1[1,1,3,3]
pmuludq %xmm6, %xmm1
pshufd $232, %xmm1, %xmm1 # xmm1 = xmm1[0,2,2,3]
punpckldq %xmm1, %xmm0 # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
pshufd $245, %xmm5, %xmm6 # xmm6 = xmm5[1,1,3,3]
pmuludq %xmm4, %xmm5
pshufd $232, %xmm5, %xmm1 # xmm1 = xmm5[0,2,2,3]
pshufd $245, %xmm4, %xmm4 # xmm4 = xmm4[1,1,3,3]
pmuludq %xmm6, %xmm4
pshufd $232, %xmm4, %xmm4 # xmm4 = xmm4[0,2,2,3]
punpckldq %xmm4, %xmm1 # xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1]
paddd %xmm2, %xmm0
paddd %xmm3, %xmm1
addq $64, %rax
jne .LBB0_3
Which, aside from loop labels and a couple of instructions before the actual loop is identical.
g++ -O2 -S gives a similarly identical set of loops, but not using SSE instructions and unrolling, so the loops are simpler:
.L2:
movl arr1(%rax), %edx
addq $4, %rax
imull arr2-4(%rax), %edx
addl %edx, %ebx
cmpq $4000000, %rax
jne .L2
movl %ebx, %esi
movl $.LC0, %edi
xorl %eax, %eax
call printf
xorl %eax, %eax
.p2align 4,,10
.p2align 3
.L3:
movl arr1(%rax), %edx
addq $4, %rax
imull arr2-4(%rax), %edx
addl %edx, %ebx
cmpq $4000000, %rax
jne .L3
movl %ebx, %esi
movl $.LC0, %edi
xorl %eax, %eax
call printf
xorl %eax, %eax
popq %rbx
.cfi_def_cfa_offset 8
ret