| // |
| // Generated by NVIDIA NVVM Compiler |
| // |
| // Compiler Build ID: CL-23083092 |
| // Cuda compilation tools, release 9.1, V9.1.85 |
| // Based on LLVM 3.4svn |
| // |
| |
| .version 6.1 |
| .target sm_30 |
| .address_size 64 |
| |
| // .globl _Z6vecAddPfS_S_i |
| |
| .visible .entry _Z6vecAddPfS_S_i( |
| .param .u64 _Z6vecAddPfS_S_i_param_0, |
| .param .u64 _Z6vecAddPfS_S_i_param_1, |
| .param .u64 _Z6vecAddPfS_S_i_param_2, |
| .param .u32 _Z6vecAddPfS_S_i_param_3 |
| ) |
| { |
| .reg .pred %p<2>; |
| .reg .f32 %f<4>; |
| .reg .b32 %r<6>; |
| .reg .b64 %rd<11>; |
| |
| |
| ld.param.u64 %rd1, [_Z6vecAddPfS_S_i_param_0]; |
| ld.param.u64 %rd2, [_Z6vecAddPfS_S_i_param_1]; |
| ld.param.u64 %rd3, [_Z6vecAddPfS_S_i_param_2]; |
| ld.param.u32 %r2, [_Z6vecAddPfS_S_i_param_3]; |
| .loc 1 9 12 |
| mov.u32 %r3, %ctaid.x; |
| mov.u32 %r4, %ntid.x; |
| mov.u32 %r5, %tid.x; |
| mad.lo.s32 %r1, %r4, %r3, %r5; |
| .loc 1 12 5 |
| setp.ge.s32 %p1, %r1, %r2; |
| @%p1 bra BB0_2; |
| |
| .loc 1 9 12 |
| cvta.to.global.u64 %rd4, %rd1; |
| .loc 1 13 9 |
| mul.wide.s32 %rd5, %r1, 4; |
| add.s64 %rd6, %rd4, %rd5; |
| .loc 1 9 12 |
| cvta.to.global.u64 %rd7, %rd2; |
| .loc 1 13 9 |
| add.s64 %rd8, %rd7, %rd5; |
| ld.global.f32 %f1, [%rd8]; |
| ld.global.f32 %f2, [%rd6]; |
| add.f32 %f3, %f2, %f1; |
| .loc 1 9 12 |
| cvta.to.global.u64 %rd9, %rd3; |
| .loc 1 13 9 |
| add.s64 %rd10, %rd9, %rd5; |
| st.global.f32 [%rd10], %f3; |
| |
| BB0_2: |
| .loc 1 14 1 |
| ret; |
| } |
| |
| .file 1 "/tmp/moo.cu", 1525722349, 2016 |
| .file 2 "/opt/compiler-explorer/gcc-6.4.0/include/c++/6.4.0/cmath", 1517261630, 47676 |