@@ -402,9 +402,9 @@ define void @merge_loads_i16(i32 %count, ptr noalias nocapture %q, ptr noalias n
402
402
define void @no_merge_loads (i32 %count , ptr noalias nocapture %q , ptr noalias nocapture %p ) nounwind uwtable noinline ssp {
403
403
; X86-BWON-LABEL: no_merge_loads:
404
404
; X86-BWON: # %bb.0:
405
- ; X86-BWON-NEXT: pushl %ebx
405
+ ; X86-BWON-NEXT: pushl %esi
406
406
; X86-BWON-NEXT: .cfi_def_cfa_offset 8
407
- ; X86-BWON-NEXT: .cfi_offset %ebx , -8
407
+ ; X86-BWON-NEXT: .cfi_offset %esi , -8
408
408
; X86-BWON-NEXT: movl {{[0-9]+}}(%esp), %eax
409
409
; X86-BWON-NEXT: testl %eax, %eax
410
410
; X86-BWON-NEXT: jle .LBB5_3
@@ -414,23 +414,21 @@ define void @no_merge_loads(i32 %count, ptr noalias nocapture %q, ptr noalias no
414
414
; X86-BWON-NEXT: .p2align 4
415
415
; X86-BWON-NEXT: .LBB5_2: # %a4
416
416
; X86-BWON-NEXT: # =>This Inner Loop Header: Depth=1
417
- ; X86-BWON-NEXT: movzbl (%edx), %ebx
418
- ; X86-BWON-NEXT: movb %bl, (%ecx)
419
- ; X86-BWON-NEXT: movzbl 1(%edx), %ebx
420
- ; X86-BWON-NEXT: movb %bl, 1(%ecx)
417
+ ; X86-BWON-NEXT: movzwl (%edx), %esi
418
+ ; X86-BWON-NEXT: movw %si, (%ecx)
421
419
; X86-BWON-NEXT: addl $8, %ecx
422
420
; X86-BWON-NEXT: decl %eax
423
421
; X86-BWON-NEXT: jne .LBB5_2
424
422
; X86-BWON-NEXT: .LBB5_3: # %._crit_edge
425
- ; X86-BWON-NEXT: popl %ebx
423
+ ; X86-BWON-NEXT: popl %esi
426
424
; X86-BWON-NEXT: .cfi_def_cfa_offset 4
427
425
; X86-BWON-NEXT: retl
428
426
;
429
427
; X86-BWOFF-LABEL: no_merge_loads:
430
428
; X86-BWOFF: # %bb.0:
431
- ; X86-BWOFF-NEXT: pushl %ebx
429
+ ; X86-BWOFF-NEXT: pushl %esi
432
430
; X86-BWOFF-NEXT: .cfi_def_cfa_offset 8
433
- ; X86-BWOFF-NEXT: .cfi_offset %ebx , -8
431
+ ; X86-BWOFF-NEXT: .cfi_offset %esi , -8
434
432
; X86-BWOFF-NEXT: movl {{[0-9]+}}(%esp), %eax
435
433
; X86-BWOFF-NEXT: testl %eax, %eax
436
434
; X86-BWOFF-NEXT: jle .LBB5_3
@@ -440,15 +438,13 @@ define void @no_merge_loads(i32 %count, ptr noalias nocapture %q, ptr noalias no
440
438
; X86-BWOFF-NEXT: .p2align 4
441
439
; X86-BWOFF-NEXT: .LBB5_2: # %a4
442
440
; X86-BWOFF-NEXT: # =>This Inner Loop Header: Depth=1
443
- ; X86-BWOFF-NEXT: movb (%edx), %bl
444
- ; X86-BWOFF-NEXT: movb %bl, (%ecx)
445
- ; X86-BWOFF-NEXT: movb 1(%edx), %bl
446
- ; X86-BWOFF-NEXT: movb %bl, 1(%ecx)
441
+ ; X86-BWOFF-NEXT: movw (%edx), %si
442
+ ; X86-BWOFF-NEXT: movw %si, (%ecx)
447
443
; X86-BWOFF-NEXT: addl $8, %ecx
448
444
; X86-BWOFF-NEXT: decl %eax
449
445
; X86-BWOFF-NEXT: jne .LBB5_2
450
446
; X86-BWOFF-NEXT: .LBB5_3: # %._crit_edge
451
- ; X86-BWOFF-NEXT: popl %ebx
447
+ ; X86-BWOFF-NEXT: popl %esi
452
448
; X86-BWOFF-NEXT: .cfi_def_cfa_offset 4
453
449
; X86-BWOFF-NEXT: retl
454
450
;
@@ -459,10 +455,8 @@ define void @no_merge_loads(i32 %count, ptr noalias nocapture %q, ptr noalias no
459
455
; X64-BWON-NEXT: .p2align 4
460
456
; X64-BWON-NEXT: .LBB5_1: # %a4
461
457
; X64-BWON-NEXT: # =>This Inner Loop Header: Depth=1
462
- ; X64-BWON-NEXT: movzbl (%rsi), %eax
463
- ; X64-BWON-NEXT: movb %al, (%rdx)
464
- ; X64-BWON-NEXT: movzbl 1(%rsi), %eax
465
- ; X64-BWON-NEXT: movb %al, 1(%rdx)
458
+ ; X64-BWON-NEXT: movzwl (%rsi), %eax
459
+ ; X64-BWON-NEXT: movw %ax, (%rdx)
466
460
; X64-BWON-NEXT: addq $8, %rdx
467
461
; X64-BWON-NEXT: decl %edi
468
462
; X64-BWON-NEXT: jne .LBB5_1
@@ -476,10 +470,8 @@ define void @no_merge_loads(i32 %count, ptr noalias nocapture %q, ptr noalias no
476
470
; X64-BWOFF-NEXT: .p2align 4
477
471
; X64-BWOFF-NEXT: .LBB5_1: # %a4
478
472
; X64-BWOFF-NEXT: # =>This Inner Loop Header: Depth=1
479
- ; X64-BWOFF-NEXT: movb (%rsi), %al
480
- ; X64-BWOFF-NEXT: movb %al, (%rdx)
481
- ; X64-BWOFF-NEXT: movb 1(%rsi), %al
482
- ; X64-BWOFF-NEXT: movb %al, 1(%rdx)
473
+ ; X64-BWOFF-NEXT: movw (%rsi), %ax
474
+ ; X64-BWOFF-NEXT: movw %ax, (%rdx)
483
475
; X64-BWOFF-NEXT: addq $8, %rdx
484
476
; X64-BWOFF-NEXT: decl %edi
485
477
; X64-BWOFF-NEXT: jne .LBB5_1
@@ -858,26 +850,26 @@ define void @MergeLoadStoreBaseIndexOffsetComplicated(ptr %a, ptr %b, ptr %c, i6
858
850
; X86-BWON-NEXT: .cfi_offset %edi, -16
859
851
; X86-BWON-NEXT: .cfi_offset %ebx, -12
860
852
; X86-BWON-NEXT: .cfi_offset %ebp, -8
861
- ; X86-BWON-NEXT: xorl %eax, %eax
862
- ; X86-BWON-NEXT: movl {{[0-9]+}}(%esp), %esi
853
+ ; X86-BWON-NEXT: xorl %esi, %esi
863
854
; X86-BWON-NEXT: movl {{[0-9]+}}(%esp), %edi
864
855
; X86-BWON-NEXT: movl {{[0-9]+}}(%esp), %ebx
865
856
; X86-BWON-NEXT: xorl %ebp, %ebp
866
857
; X86-BWON-NEXT: .p2align 4
867
858
; X86-BWON-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1
868
859
; X86-BWON-NEXT: movsbl (%edi), %ecx
869
- ; X86-BWON-NEXT: movzbl (%esi,%ecx), %edx
870
- ; X86-BWON-NEXT: movzbl 1(%esi,%ecx), %ecx
871
- ; X86-BWON-NEXT: movb %dl, (%ebx,%eax)
872
- ; X86-BWON-NEXT: movl %eax, %edx
873
- ; X86-BWON-NEXT: orl $1, %edx
874
- ; X86-BWON-NEXT: movb %cl, (%ebx,%edx)
860
+ ; X86-BWON-NEXT: movl {{[0-9]+}}(%esp), %eax
861
+ ; X86-BWON-NEXT: movzbl (%eax,%ecx), %edx
862
+ ; X86-BWON-NEXT: movzbl 1(%eax,%ecx), %ecx
863
+ ; X86-BWON-NEXT: movl %esi, %eax
864
+ ; X86-BWON-NEXT: orl $1, %eax
865
+ ; X86-BWON-NEXT: movb %cl, (%ebx,%eax)
866
+ ; X86-BWON-NEXT: movb %dl, (%ebx,%esi)
875
867
; X86-BWON-NEXT: incl %edi
876
- ; X86-BWON-NEXT: addl $2, %eax
868
+ ; X86-BWON-NEXT: addl $2, %esi
877
869
; X86-BWON-NEXT: adcl $0, %ebp
878
- ; X86-BWON-NEXT: cmpl {{[0-9]+}}(%esp), %eax
879
- ; X86-BWON-NEXT: movl %ebp, %ecx
880
- ; X86-BWON-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
870
+ ; X86-BWON-NEXT: cmpl {{[0-9]+}}(%esp), %esi
871
+ ; X86-BWON-NEXT: movl %ebp, %eax
872
+ ; X86-BWON-NEXT: sbbl {{[0-9]+}}(%esp), %eax
881
873
; X86-BWON-NEXT: jl .LBB10_1
882
874
; X86-BWON-NEXT: # %bb.2:
883
875
; X86-BWON-NEXT: popl %esi
@@ -904,26 +896,26 @@ define void @MergeLoadStoreBaseIndexOffsetComplicated(ptr %a, ptr %b, ptr %c, i6
904
896
; X86-BWOFF-NEXT: .cfi_offset %edi, -16
905
897
; X86-BWOFF-NEXT: .cfi_offset %ebx, -12
906
898
; X86-BWOFF-NEXT: .cfi_offset %ebp, -8
907
- ; X86-BWOFF-NEXT: xorl %eax, %eax
908
- ; X86-BWOFF-NEXT: movl {{[0-9]+}}(%esp), %esi
899
+ ; X86-BWOFF-NEXT: xorl %esi, %esi
909
900
; X86-BWOFF-NEXT: movl {{[0-9]+}}(%esp), %edi
910
901
; X86-BWOFF-NEXT: movl {{[0-9]+}}(%esp), %ebx
911
902
; X86-BWOFF-NEXT: xorl %ebp, %ebp
912
903
; X86-BWOFF-NEXT: .p2align 4
913
904
; X86-BWOFF-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1
914
905
; X86-BWOFF-NEXT: movsbl (%edi), %ecx
915
- ; X86-BWOFF-NEXT: movb (%esi,%ecx), %dl
916
- ; X86-BWOFF-NEXT: movb 1(%esi,%ecx), %cl
917
- ; X86-BWOFF-NEXT: movb %dl, (%ebx,%eax)
918
- ; X86-BWOFF-NEXT: movl %eax, %edx
919
- ; X86-BWOFF-NEXT: orl $1, %edx
920
- ; X86-BWOFF-NEXT: movb %cl, (%ebx,%edx)
906
+ ; X86-BWOFF-NEXT: movl {{[0-9]+}}(%esp), %eax
907
+ ; X86-BWOFF-NEXT: movb (%eax,%ecx), %dl
908
+ ; X86-BWOFF-NEXT: movb 1(%eax,%ecx), %cl
909
+ ; X86-BWOFF-NEXT: movl %esi, %eax
910
+ ; X86-BWOFF-NEXT: orl $1, %eax
911
+ ; X86-BWOFF-NEXT: movb %cl, (%ebx,%eax)
912
+ ; X86-BWOFF-NEXT: movb %dl, (%ebx,%esi)
921
913
; X86-BWOFF-NEXT: incl %edi
922
- ; X86-BWOFF-NEXT: addl $2, %eax
914
+ ; X86-BWOFF-NEXT: addl $2, %esi
923
915
; X86-BWOFF-NEXT: adcl $0, %ebp
924
- ; X86-BWOFF-NEXT: cmpl {{[0-9]+}}(%esp), %eax
925
- ; X86-BWOFF-NEXT: movl %ebp, %ecx
926
- ; X86-BWOFF-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
916
+ ; X86-BWOFF-NEXT: cmpl {{[0-9]+}}(%esp), %esi
917
+ ; X86-BWOFF-NEXT: movl %ebp, %eax
918
+ ; X86-BWOFF-NEXT: sbbl {{[0-9]+}}(%esp), %eax
927
919
; X86-BWOFF-NEXT: jl .LBB10_1
928
920
; X86-BWOFF-NEXT: # %bb.2:
929
921
; X86-BWOFF-NEXT: popl %esi
0 commit comments