Skip to content

Commit 5a6c580

Browse files
committed
runtime: refactor cpu feature detection for 386 & amd64
Changes all cpu features to be detected and stored in bools in rt0_go. Updates: #15403 Change-Id: I5a9961cdec789b331d09c44d86beb53833d5dc3e Reviewed-on: https://go-review.googlesource.com/41950 Run-TryBot: Martin Möhrmann <[email protected]> TryBot-Result: Gobot Gobot <[email protected]> Reviewed-by: Ilya Tocar <[email protected]> Reviewed-by: Keith Randall <[email protected]>
1 parent 1f85d3a commit 5a6c580

9 files changed

+243
-94
lines changed

src/runtime/alg.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -283,9 +283,9 @@ func alginit() {
283283
// Install aes hash algorithm if we have the instructions we need
284284
if (GOARCH == "386" || GOARCH == "amd64") &&
285285
GOOS != "nacl" &&
286-
cpuid_ecx&(1<<25) != 0 && // aes (aesenc)
287-
cpuid_ecx&(1<<9) != 0 && // sse3 (pshufb)
288-
cpuid_ecx&(1<<19) != 0 { // sse4.1 (pinsr{d,q})
286+
support_aes && // AESENC
287+
support_ssse3 && // PSHUFB
288+
support_sse41 { // PINSR{D,Q}
289289
useAeshash = true
290290
algarray[alg_MEM32].hash = aeshash32
291291
algarray[alg_MEM64].hash = aeshash64

src/runtime/asm_386.s

Lines changed: 68 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -75,24 +75,81 @@ notintel:
7575
MOVL $1, AX
7676
CPUID
7777
MOVL CX, DI // Move to global variable clobbers CX when generating PIC
78-
MOVL AX, runtime·cpuid_eax(SB)
78+
MOVL AX, runtime·processorVersionInfo(SB)
7979
MOVL DI, runtime·cpuid_ecx(SB)
8080
MOVL DX, runtime·cpuid_edx(SB)
8181

8282
// Check for MMX support
83-
TESTL $(1<<23), DX // MMX
84-
JZ bad_proc
83+
TESTL $(1<<23), DX // MMX
84+
JZ bad_proc
8585

86+
TESTL $(1<<26), DX // SSE2
87+
SETNE runtime·support_sse2(SB)
88+
89+
TESTL $(1<<9), DI // SSSE3
90+
SETNE runtime·support_ssse3(SB)
91+
92+
TESTL $(1<<19), DI // SSE4.1
93+
SETNE runtime·support_sse41(SB)
94+
95+
TESTL $(1<<20), DI // SSE4.2
96+
SETNE runtime·support_sse42(SB)
97+
98+
TESTL $(1<<23), DI // POPCNT
99+
SETNE runtime·support_popcnt(SB)
100+
101+
TESTL $(1<<25), DI // AES
102+
SETNE runtime·support_aes(SB)
103+
104+
TESTL $(1<<27), DI // OSXSAVE
105+
SETNE runtime·support_osxsave(SB)
106+
107+
// If OS support for XMM and YMM is not present
108+
// support_avx will be set back to false later.
109+
TESTL $(1<<28), DI // AVX
110+
SETNE runtime·support_avx(SB)
111+
112+
eax7:
86113
// Load EAX=7/ECX=0 cpuid flags
87114
CMPL SI, $7
88-
JLT nocpuinfo
115+
JLT osavx
89116
MOVL $7, AX
90117
MOVL $0, CX
91118
CPUID
92119
MOVL BX, runtime·cpuid_ebx7(SB)
93120

94-
nocpuinfo:
121+
TESTL $(1<<3), BX // BMI1
122+
SETNE runtime·support_bmi1(SB)
123+
124+
// If OS support for XMM and YMM is not present
125+
// support_avx2 will be set back to false later.
126+
TESTL $(1<<5), BX
127+
SETNE runtime·support_avx2(SB)
128+
129+
TESTL $(1<<8), BX // BMI2
130+
SETNE runtime·support_bmi2(SB)
131+
132+
TESTL $(1<<9), BX // ERMS
133+
SETNE runtime·support_erms(SB)
134+
135+
osavx:
136+
// nacl does not support XGETBV to test
137+
// for XMM and YMM OS support.
138+
#ifndef GOOS_nacl
139+
CMPB runtime·support_osxsave(SB), $1
140+
JNE noavx
141+
MOVL $0, CX
142+
// For XGETBV, OSXSAVE bit is required and sufficient
143+
XGETBV
144+
ANDL $6, AX
145+
CMPL AX, $6 // Check for OS support of XMM and YMM registers.
146+
JE nocpuinfo
147+
#endif
148+
noavx:
149+
MOVB $0, runtime·support_avx(SB)
150+
MOVB $0, runtime·support_avx2(SB)
95151

152+
nocpuinfo:
96153
// if there is an _cgo_init, call it to let it
97154
// initialize and to set up GS. if not,
98155
// we set up GS ourselves.
@@ -803,8 +860,8 @@ TEXT runtime·getcallerpc(SB),NOSPLIT,$4-8
803860

804861
// func cputicks() int64
805862
TEXT runtime·cputicks(SB),NOSPLIT,$0-8
806-
TESTL $0x4000000, runtime·cpuid_edx(SB) // no sse2, no mfence
807-
JEQ done
863+
CMPB runtime·support_sse2(SB), $1
864+
JNE done
808865
CMPB runtime·lfenceBeforeRdtsc(SB), $1
809866
JNE mfence
810867
BYTE $0x0f; BYTE $0xae; BYTE $0xe8 // LFENCE
@@ -1311,8 +1368,8 @@ TEXT runtime·memeqbody(SB),NOSPLIT,$0-0
13111368
hugeloop:
13121369
CMPL BX, $64
13131370
JB bigloop
1314-
TESTL $0x4000000, runtime·cpuid_edx(SB) // check for sse2
1315-
JE bigloop
1371+
CMPB runtime·support_sse2(SB), $1
1372+
JNE bigloop
13161373
MOVOU (SI), X0
13171374
MOVOU (DI), X1
13181375
MOVOU 16(SI), X2
@@ -1455,8 +1512,8 @@ TEXT runtime·cmpbody(SB),NOSPLIT,$0-0
14551512
JEQ allsame
14561513
CMPL BP, $4
14571514
JB small
1458-
TESTL $0x4000000, runtime·cpuid_edx(SB) // check for sse2
1459-
JE mediumloop
1515+
CMPB runtime·support_sse2(SB), $1
1516+
JNE mediumloop
14601517
largeloop:
14611518
CMPL BP, $16
14621519
JB mediumloop

src/runtime/asm_amd64.s

Lines changed: 62 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,10 @@ TEXT runtime·rt0_go(SB),NOSPLIT,$0
2626
MOVQ SP, (g_stack+stack_hi)(DI)
2727

2828
// find out information about the processor we're on
29-
MOVQ $0, AX
29+
MOVL $0, AX
3030
CPUID
31-
MOVQ AX, SI
32-
CMPQ AX, $0
31+
MOVL AX, SI
32+
CMPL AX, $0
3333
JE nocpuinfo
3434

3535
// Figure out how to serialize RDTSC.
@@ -46,62 +46,75 @@ TEXT runtime·rt0_go(SB),NOSPLIT,$0
4646
notintel:
4747

4848
// Load EAX=1 cpuid flags
49-
MOVQ $1, AX
49+
MOVL $1, AX
5050
CPUID
51-
MOVL AX, runtime·cpuid_eax(SB)
51+
MOVL AX, runtime·processorVersionInfo(SB)
5252
MOVL CX, runtime·cpuid_ecx(SB)
5353
MOVL DX, runtime·cpuid_edx(SB)
5454

55+
TESTL $(1<<26), DX // SSE2
56+
SETNE runtime·support_sse2(SB)
57+
58+
TESTL $(1<<9), CX // SSSE3
59+
SETNE runtime·support_ssse3(SB)
60+
61+
TESTL $(1<<19), CX // SSE4.1
62+
SETNE runtime·support_sse41(SB)
63+
64+
TESTL $(1<<20), CX // SSE4.2
65+
SETNE runtime·support_sse42(SB)
66+
67+
TESTL $(1<<23), CX // POPCNT
68+
SETNE runtime·support_popcnt(SB)
69+
70+
TESTL $(1<<25), CX // AES
71+
SETNE runtime·support_aes(SB)
72+
73+
TESTL $(1<<27), CX // OSXSAVE
74+
SETNE runtime·support_osxsave(SB)
75+
76+
// If OS support for XMM and YMM is not present
77+
// support_avx will be set back to false later.
78+
TESTL $(1<<28), CX // AVX
79+
SETNE runtime·support_avx(SB)
80+
81+
eax7:
5582
// Load EAX=7/ECX=0 cpuid flags
56-
CMPQ SI, $7
57-
JLT no7
83+
CMPL SI, $7
84+
JLT osavx
5885
MOVL $7, AX
5986
MOVL $0, CX
6087
CPUID
6188
MOVL BX, runtime·cpuid_ebx7(SB)
62-
no7:
63-
// Detect AVX and AVX2 as per 14.7.1 Detection of AVX2 chapter of [1]
64-
// [1] 64-ia-32-architectures-software-developer-manual-325462.pdf
65-
// http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-manual-325462.pdf
66-
MOVL runtime·cpuid_ecx(SB), CX
67-
ANDL $0x18000000, CX // check for OSXSAVE and AVX bits
68-
CMPL CX, $0x18000000
69-
JNE noavx
70-
MOVL $0, CX
89+
90+
TESTL $(1<<3), BX // BMI1
91+
SETNE runtime·support_bmi1(SB)
92+
93+
// If OS support for XMM and YMM is not present
94+
// support_avx2 will be set back to false later.
95+
TESTL $(1<<5), BX
96+
SETNE runtime·support_avx2(SB)
97+
98+
TESTL $(1<<8), BX // BMI2
99+
SETNE runtime·support_bmi2(SB)
100+
101+
TESTL $(1<<9), BX // ERMS
102+
SETNE runtime·support_erms(SB)
103+
104+
osavx:
105+
CMPB runtime·support_osxsave(SB), $1
106+
JNE noavx
107+
MOVL $0, CX
71108
// For XGETBV, OSXSAVE bit is required and sufficient
72109
XGETBV
73-
ANDL $6, AX
74-
CMPL AX, $6 // Check for OS support of YMM registers
75-
JNE noavx
76-
MOVB $1, runtime·support_avx(SB)
77-
TESTL $(1<<5), runtime·cpuid_ebx7(SB) // check for AVX2 bit
78-
JEQ noavx2
79-
MOVB $1, runtime·support_avx2(SB)
80-
JMP testbmi1
110+
ANDL $6, AX
111+
CMPL AX, $6 // Check for OS support of XMM and YMM registers.
112+
JE nocpuinfo
81113
noavx:
82-
MOVB $0, runtime·support_avx(SB)
83-
noavx2:
84-
MOVB $0, runtime·support_avx2(SB)
85-
testbmi1:
86-
// Detect BMI1 and BMI2 extensions as per
87-
// 5.1.16.1 Detection of VEX-encoded GPR Instructions,
88-
// LZCNT and TZCNT, PREFETCHW chapter of [1]
89-
MOVB $0, runtime·support_bmi1(SB)
90-
TESTL $(1<<3), runtime·cpuid_ebx7(SB) // check for BMI1 bit
91-
JEQ testbmi2
92-
MOVB $1, runtime·support_bmi1(SB)
93-
testbmi2:
94-
MOVB $0, runtime·support_bmi2(SB)
95-
TESTL $(1<<8), runtime·cpuid_ebx7(SB) // check for BMI2 bit
96-
JEQ testpopcnt
97-
MOVB $1, runtime·support_bmi2(SB)
98-
testpopcnt:
99-
MOVB $0, runtime·support_popcnt(SB)
100-
TESTL $(1<<23), runtime·cpuid_ecx(SB) // check for POPCNT bit
101-
JEQ nocpuinfo
102-
MOVB $1, runtime·support_popcnt(SB)
103-
nocpuinfo:
104-
114+
MOVB $0, runtime·support_avx(SB)
115+
MOVB $0, runtime·support_avx2(SB)
116+
117+
nocpuinfo:
105118
// if there is an _cgo_init, call it.
106119
MOVQ _cgo_init(SB), AX
107120
TESTQ AX, AX
@@ -1942,9 +1955,8 @@ success_avx2:
19421955
VZEROUPPER
19431956
JMP success
19441957
sse42:
1945-
MOVL runtime·cpuid_ecx(SB), CX
1946-
ANDL $0x100000, CX
1947-
JZ no_sse42
1958+
CMPB runtime·support_sse42(SB), $1
1959+
JNE no_sse42
19481960
CMPQ AX, $12
19491961
// PCMPESTRI is slower than normal compare,
19501962
// so using it makes sense only if we advance 4+ bytes per compare

src/runtime/asm_amd64p32.s

Lines changed: 74 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,9 @@ TEXT runtime·rt0_go(SB),NOSPLIT,$0
2828
MOVL SP, (g_stack+stack_hi)(DI)
2929

3030
// find out information about the processor we're on
31-
MOVQ $0, AX
31+
MOVL $0, AX
3232
CPUID
33-
CMPQ AX, $0
33+
CMPL AX, $0
3434
JE nocpuinfo
3535

3636
CMPL BX, $0x756E6547 // "Genu"
@@ -42,13 +42,81 @@ TEXT runtime·rt0_go(SB),NOSPLIT,$0
4242
MOVB $1, runtime·isIntel(SB)
4343
notintel:
4444

45-
MOVQ $1, AX
45+
// Load EAX=1 cpuid flags
46+
MOVL $1, AX
4647
CPUID
47-
MOVL AX, runtime·cpuid_eax(SB)
48+
MOVL AX, runtime·processorVersionInfo(SB)
4849
MOVL CX, runtime·cpuid_ecx(SB)
4950
MOVL DX, runtime·cpuid_edx(SB)
50-
nocpuinfo:
51-
51+
52+
TESTL $(1<<26), DX // SSE2
53+
SETNE runtime·support_sse2(SB)
54+
55+
TESTL $(1<<9), CX // SSSE3
56+
SETNE runtime·support_ssse3(SB)
57+
58+
TESTL $(1<<19), CX // SSE4.1
59+
SETNE runtime·support_sse41(SB)
60+
61+
TESTL $(1<<20), CX // SSE4.2
62+
SETNE runtime·support_sse42(SB)
63+
64+
TESTL $(1<<23), CX // POPCNT
65+
SETNE runtime·support_popcnt(SB)
66+
67+
TESTL $(1<<25), CX // AES
68+
SETNE runtime·support_aes(SB)
69+
70+
TESTL $(1<<27), CX // OSXSAVE
71+
SETNE runtime·support_osxsave(SB)
72+
73+
// If OS support for XMM and YMM is not present
74+
// support_avx will be set back to false later.
75+
TESTL $(1<<28), CX // AVX
76+
SETNE runtime·support_avx(SB)
77+
78+
eax7:
79+
// Load EAX=7/ECX=0 cpuid flags
80+
CMPL SI, $7
81+
JLT osavx
82+
MOVL $7, AX
83+
MOVL $0, CX
84+
CPUID
85+
MOVL BX, runtime·cpuid_ebx7(SB)
86+
87+
TESTL $(1<<3), BX // BMI1
88+
SETNE runtime·support_bmi1(SB)
89+
90+
// If OS support for XMM and YMM is not present
91+
// support_avx2 will be set back to false later.
92+
TESTL $(1<<5), BX
93+
SETNE runtime·support_avx2(SB)
94+
95+
TESTL $(1<<8), BX // BMI2
96+
SETNE runtime·support_bmi2(SB)
97+
98+
TESTL $(1<<9), BX // ERMS
99+
SETNE runtime·support_erms(SB)
100+
101+
osavx:
102+
// nacl does not support XGETBV to test
103+
// for XMM and YMM OS support.
104+
#ifndef GOOS_nacl
105+
CMPB runtime·support_osxsave(SB), $1
106+
JNE noavx
107+
MOVL $0, CX
108+
// For XGETBV, OSXSAVE bit is required and sufficient
109+
XGETBV
110+
ANDL $6, AX
111+
CMPL AX, $6 // Check for OS support of XMM and YMM registers.
112+
JE nocpuinfo
113+
#endif
114+
noavx:
115+
MOVB $0, runtime·support_avx(SB)
116+
MOVB $0, runtime·support_avx2(SB)
117+
118+
nocpuinfo:
119+
52120
needtls:
53121
LEAL runtime·m0+m_tls(SB), DI
54122
CALL runtime·settls(SB)

src/runtime/cpuflags_amd64.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,13 @@ var useAVXmemmove bool
88

99
func init() {
1010
// Let's remove stepping and reserved fields
11-
processorVersionInfo := cpuid_eax & 0x0FFF3FF0
11+
processor := processorVersionInfo & 0x0FFF3FF0
1212

1313
isIntelBridgeFamily := isIntel &&
14-
(processorVersionInfo == 0x206A0 ||
15-
processorVersionInfo == 0x206D0 ||
16-
processorVersionInfo == 0x306A0 ||
17-
processorVersionInfo == 0x306E0)
14+
processor == 0x206A0 ||
15+
processor == 0x206D0 ||
16+
processor == 0x306A0 ||
17+
processor == 0x306E0
1818

1919
useAVXmemmove = support_avx && !isIntelBridgeFamily
2020
}

0 commit comments

Comments
 (0)