Skip to content

Commit 58176a5

Browse files
davecramerCommitfest Bot
authored andcommitted
Enable the Microsoft Windows ARM64/MSVC platform
Add support for the ARM64 architecture on Windows 11 using MSVC compiler addressing build issues and implementing proper memory synchronization semantics for this platform. * Implement spin_delay() with __isb(_ARM64_BARRIER_SY) intrinsic to emit the "ISB SY" instruction which matches the GCC/Clang approach to spinloop delay and emperical evidence that it out-scales the YIELD instruction in practice. * Unconditionally choose to use the MSVC supplied intrinsic for CRC32 on ARM64. * Implement the S_UNLOCK() macro using the InterlockedExchange() intrinsic. Author: Greg Burd <greg@burd.me> Author: Dave Cramer <davecramer@gmail.com> Discussion: https://postgr.es/m/3c576ad7-d2da-4137-b791-5821da7cc370%40app.fastmail.com
1 parent 795e94c commit 58176a5

File tree

5 files changed

+67
-18
lines changed

5 files changed

+67
-18
lines changed

doc/src/sgml/installation.sgml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3967,7 +3967,7 @@ configure ... LDFLAGS="-R /usr/sfw/lib:/opt/sfw/lib:/usr/local/lib"
39673967
<sect3 id="install-windows-full-64-bit">
39683968
<title>Special Considerations for 64-Bit Windows</title>
39693969
<para>
3970-
PostgreSQL will only build for the x64 architecture on 64-bit Windows.
3970+
PostgreSQL will only build for the x64 and ARM64 architectures on 64-bit Windows.
39713971
</para>
39723972
<para>
39733973
Mixing 32- and 64-bit versions in the same build tree is not supported.

meson.build

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2527,7 +2527,12 @@ int main(void)
25272527
}
25282528
'''
25292529

2530-
if cc.links(prog, name: '__crc32cb, __crc32ch, __crc32cw, and __crc32cd without -march=armv8-a+crc',
2530+
# Check first for a MSVC/ARM64 combo because the test prog above won't
2531+
# compile (as it doesn't '#ifdef _MSC_VER #include <intrin.h>'), which
2532+
# is okay as we know for a fact that this platform combo supports the
2533+
# intrinsic for ARM64 CRC the test performs, so use that unconditionally.
2534+
if (host_cpu == 'aarch64' and cc.get_id() == 'msvc') or \
2535+
cc.links(prog, name: '__crc32cb, __crc32ch, __crc32cw, and __crc32cd without -march=armv8-a+crc',
25312536
args: test_c_args)
25322537
# Use ARM CRC Extension unconditionally
25332538
cdata.set('USE_ARMV8_CRC32C', 1)
@@ -2546,7 +2551,7 @@ int main(void)
25462551
cdata.set('USE_ARMV8_CRC32C', false)
25472552
cdata.set('USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK', 1)
25482553
have_optimized_crc = true
2549-
endif
2554+
endif
25502555

25512556
elif host_cpu == 'loongarch64'
25522557

src/include/storage/s_lock.h

Lines changed: 49 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -594,42 +594,80 @@ tas(volatile slock_t *lock)
594594

595595
#if !defined(HAS_TEST_AND_SET) /* We didn't trigger above, let's try here */
596596

597-
#ifdef _MSC_VER
597+
/* When compiling for Microsoft Windows using MSVC */
598+
#if defined(_MSC_VER)
598599
typedef LONG slock_t;
599600

600601
#define HAS_TEST_AND_SET
601602
#define TAS(lock) (InterlockedCompareExchange(lock, 1, 0))
602603

603604
#define SPIN_DELAY() spin_delay()
604605

605-
/* If using Visual C++ on Win64, inline assembly is unavailable.
606-
* Use a _mm_pause intrinsic instead of rep nop.
606+
/*
607+
* _InterlockedExchange() generates a full memory barrier (or release
608+
* semantics that ensures all prior memory operations are visible to
609+
* other cores before the lock is released.
610+
*/
611+
#define S_UNLOCK(lock) (InterlockedExchange(lock, 0))
612+
613+
#if defined(_WIN64) /* Microsoft Windows x64 */
614+
615+
#if defined(_M_ARM64) /* aarch64 */
616+
/*
617+
* While there is support for a __yield() intrinsic for MSVC/ARM64[1], there
618+
* is a wealth of real-world testing across databases and languages as well
619+
* as a blog post by ARM[2] suggest that ISB is the most scalable and power
620+
* friendly instruction to use for spinlock delay loops. So we use the only
621+
* supported intrinsic/flag combination availble for this platform combo[3].
622+
* This matches what we do above when compiling with either GCC or Clang.
623+
*
624+
* [1] https://learn.microsoft.com/en-us/cpp/intrinsics/arm64-intrinsics
625+
* [2] https://developer.arm.com/community/arm-community-blogs/b/architectures-and-processors-blog/posts/multi-threaded-applications-arm
626+
* [3] https://github.com/MicrosoftDocs/cpp-docs/blob/main/docs/intrinsics/arm64-intrinsics.md
627+
*/
628+
static __forceinline void
629+
spin_delay(void)
630+
{
631+
__isb(_ARM64_BARRIER_SY);
632+
}
633+
634+
#elif defined(_M_X64) /* x86-64 */
635+
636+
/*
637+
* Use _mm_pause() intrinsic for x86-64. This emits the PAUSE instruction,
638+
* which improves performance in spin-wait loops by preventing pipeline flush
639+
* on Hyper-Threading systems.
607640
*/
608-
#if defined(_WIN64)
609641
static __forceinline void
610642
spin_delay(void)
611643
{
612644
_mm_pause();
613645
}
614-
#else
646+
647+
#endif /* defined(_M_ARM64|_M_X64) */
648+
649+
#else /* !defined(_WIN64) */
650+
651+
#ifdef _M_IX86 /* x86-specific */
652+
653+
/* Use no-op for MSVC 32bit x86 */
615654
static __forceinline void
616655
spin_delay(void)
617656
{
618657
/* See comment for gcc code. Same code, MASM syntax */
619658
__asm rep nop;
620659
}
621-
#endif
622660

623661
#include <intrin.h>
624662
#pragma intrinsic(_ReadWriteBarrier)
625663

626-
#define S_UNLOCK(lock) \
664+
#define S_UNLOCK(lock) \
627665
do { _ReadWriteBarrier(); (*(lock)) = 0; } while (0)
628666

629-
#endif
630-
631-
632-
#endif /* !defined(HAS_TEST_AND_SET) */
667+
#endif /* defined(_M_IX86) */
668+
#endif /* defined(_WIN64) */
669+
#endif /* defined(_MSC_VER) */
670+
#endif /* !defined(HAS_TEST_AND_SET) */
633671

634672

635673
/* Blow up if we didn't have any way to do spinlocks */

src/port/pg_crc32c_armv8.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,13 @@
1414
*/
1515
#include "c.h"
1616

17+
#ifdef _MSC_VER
18+
/* MSVC ARM64 intrinsics */
19+
#include <intrin.h>
20+
#else
21+
/* GCC/Clang: Use ACLE intrinsics from arm_acle.h */
1722
#include <arm_acle.h>
23+
#endif
1824

1925
#include "port/pg_crc32c.h"
2026

src/tools/msvc_gendef.pl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -118,9 +118,9 @@ sub writedef
118118
{
119119
my $isdata = $def->{$f} eq 'data';
120120

121-
# Strip the leading underscore for win32, but not x64
121+
# Strip the leading underscore for win32, but not x64 and aarch64
122122
$f =~ s/^_//
123-
unless ($arch eq "x86_64");
123+
unless ($arch eq "x86_64" || $arch eq "aarch64");
124124

125125
# Emit just the name if it's a function symbol, or emit the name
126126
# decorated with the DATA option for variables.
@@ -141,7 +141,7 @@ sub writedef
141141
sub usage
142142
{
143143
die("Usage: msvc_gendef.pl --arch <arch> --deffile <deffile> --tempdir <tempdir> files-or-directories\n"
144-
. " arch: x86 | x86_64\n"
144+
. " arch: x86 | x86_64 | aarch64\n"
145145
. " deffile: path of the generated file\n"
146146
. " tempdir: directory for temporary files\n"
147147
. " files or directories: object files or directory containing object files\n"
@@ -158,7 +158,7 @@ sub usage
158158
'tempdir:s' => \$tempdir,) or usage();
159159

160160
usage("arch: $arch")
161-
unless ($arch eq 'x86' || $arch eq 'x86_64');
161+
unless ($arch eq 'x86' || $arch eq 'x86_64' || $arch eq 'aarch64');
162162

163163
my @files;
164164

0 commit comments

Comments
 (0)