|  | // | 
|  | // Generated by NVIDIA NVVM Compiler | 
|  | // | 
|  | // Compiler Build ID: CL-23083092 | 
|  | // Cuda compilation tools, release 9.1, V9.1.85 | 
|  | // Based on LLVM 3.4svn | 
|  | // | 
|  |  | 
|  | .version 6.1 | 
|  | .target sm_30 | 
|  | .address_size 64 | 
|  |  | 
|  | // .globl       _Z6vecAddPfS_S_i | 
|  |  | 
|  | .visible .entry _Z6vecAddPfS_S_i( | 
|  | .param .u64 _Z6vecAddPfS_S_i_param_0, | 
|  | .param .u64 _Z6vecAddPfS_S_i_param_1, | 
|  | .param .u64 _Z6vecAddPfS_S_i_param_2, | 
|  | .param .u32 _Z6vecAddPfS_S_i_param_3 | 
|  | ) | 
|  | { | 
|  | .reg .pred      %p<2>; | 
|  | .reg .f32       %f<4>; | 
|  | .reg .b32       %r<6>; | 
|  | .reg .b64       %rd<11>; | 
|  |  | 
|  |  | 
|  | ld.param.u64    %rd1, [_Z6vecAddPfS_S_i_param_0]; | 
|  | ld.param.u64    %rd2, [_Z6vecAddPfS_S_i_param_1]; | 
|  | ld.param.u64    %rd3, [_Z6vecAddPfS_S_i_param_2]; | 
|  | ld.param.u32    %r2, [_Z6vecAddPfS_S_i_param_3]; | 
|  | .loc 1 9 12 | 
|  | mov.u32         %r3, %ctaid.x; | 
|  | mov.u32         %r4, %ntid.x; | 
|  | mov.u32         %r5, %tid.x; | 
|  | mad.lo.s32      %r1, %r4, %r3, %r5; | 
|  | .loc 1 12 5 | 
|  | setp.ge.s32     %p1, %r1, %r2; | 
|  | @%p1 bra        BB0_2; | 
|  |  | 
|  | .loc 1 9 12 | 
|  | cvta.to.global.u64      %rd4, %rd1; | 
|  | .loc 1 13 9 | 
|  | mul.wide.s32    %rd5, %r1, 4; | 
|  | add.s64         %rd6, %rd4, %rd5; | 
|  | .loc 1 9 12 | 
|  | cvta.to.global.u64      %rd7, %rd2; | 
|  | .loc 1 13 9 | 
|  | add.s64         %rd8, %rd7, %rd5; | 
|  | ld.global.f32   %f1, [%rd8]; | 
|  | ld.global.f32   %f2, [%rd6]; | 
|  | add.f32         %f3, %f2, %f1; | 
|  | .loc 1 9 12 | 
|  | cvta.to.global.u64      %rd9, %rd3; | 
|  | .loc 1 13 9 | 
|  | add.s64         %rd10, %rd9, %rd5; | 
|  | st.global.f32   [%rd10], %f3; | 
|  |  | 
|  | BB0_2: | 
|  | .loc 1 14 1 | 
|  | ret; | 
|  | } | 
|  |  | 
|  | .file   1 "/tmp/moo.cu", 1525722349, 2016 | 
|  | .file   2 "/opt/compiler-explorer/gcc-6.4.0/include/c++/6.4.0/cmath", 1517261630, 47676 |