branch: master
test.py
602 bytesRaw
#!/usr/bin/env python3
import numpy as np
from tinygrad.runtime.ops_cuda import CUDAProgram, RawCUDABuffer

if __name__ == "__main__":
  test = RawCUDABuffer.fromCPU(np.zeros(10, np.float32))
  prg = CUDAProgram("test", """
  .version 7.8
  .target sm_86
  .address_size 64
  .visible .entry test(.param .u64 x) {
    .reg .b32       %r<2>;
    .reg .b64       %rd<3>;

    ld.param.u64    %rd1, [x];
    cvta.to.global.u64      %rd2, %rd1;
    mov.u32         %r1, 0x40000000; // 2.0 in float
    st.global.u32   [%rd2], %r1;
    ret;
  }""", binary=True)
  prg([1], [1], test)
  print(test.toCPU())