branch: master
test_zero_copy.py
901 bytesRaw
import unittest
from tinygrad import Tensor, Device
import time

def time_tensor_numpy(out:Tensor):
  times = []
  for _ in range(5):
    st = time.perf_counter()
    out.lazydata.base.realized.as_buffer(allow_zero_copy=True)
    et = time.perf_counter() - st
    times.append(et)
  return min(times)

N = 4096
class TestZeroCopy(unittest.TestCase):
  @unittest.skipIf(Device.DEFAULT not in {"CPU", "LLVM", "METAL"}, "device isn't zero copy")
  def test_zero_copy_from_default_to_cpu(self):
    demo = Tensor.rand(1).realize()
    t1 = time_tensor_numpy(demo)
    out = Tensor.rand(N, N).realize()
    t2 = time_tensor_numpy(out)
    gbps = out.nbytes()*1e-9/max(t2-t1, 1e-10)
    print(f"time(base): {t1*1e3:.2f} ms, time(copy): {t2*1e3:.2f} ms :  copy speed {gbps:.2f} GB/s")
    self.assertGreater(gbps, 600)  # more than 600 GB/s = no copy

if __name__ == '__main__':
  unittest.main(verbosity=2)