import unittest import numpy as np from tinygrad import Device, dtypes, Tensor, Context from tinygrad.dtype import ImageDType from tinygrad.engine.realize import lower_schedule from tinygrad.helpers import prod, unwrap @unittest.skipIf(Device.DEFAULT not in ("QCOM", "GPU"), "only images on GPU") class TestImageCopy(unittest.TestCase): def test_image_copyout_1x1(self, img_type=dtypes.imagef): it = Tensor.arange(4).cast(img_type((1,1,4))).realize() buf = it.lazydata.buffer out = buf.as_buffer() np.testing.assert_equal(out.cast(it.dtype.fmt).tolist(), np.arange(4)) def test_imageh_copyout_1x1(self): self.test_image_copyout_1x1(img_type=dtypes.imageh) def test_image_numpy_1x1(self, img_type=dtypes.imagef): it = Tensor.arange(4).cast(img_type((1,1,4))).realize() np.testing.assert_equal(it.numpy(), np.arange(4)) def test_imageh_numpy_1x1(self): self.test_image_numpy_1x1(img_type=dtypes.imageh) def test_image_copyout_2x3(self): it = Tensor.arange(2*3*4).cast(dtypes.imagef((2,3,4))).realize() buf = it.lazydata.buffer out = buf.as_buffer() np.testing.assert_equal(out.cast('f').tolist(), np.arange(2*3*4)) def test_image_roundtrip(self): sz = (4,2,4) it = Tensor.rand(prod(sz)).cast(dtypes.imagef(sz)).realize() buf = it.lazydata.buffer out = buf.as_buffer() it2 = Tensor.rand(prod(sz)).cast(dtypes.imagef(sz)).realize() buf2 = it2.lazydata.buffer buf2.copyin(out) assert (it == it2).sum().item() == prod(sz) @unittest.skipIf(Device.DEFAULT not in ("QCOM", "GPU"), "only images on GPU") class TestImageDType(unittest.TestCase): def test_image_and_back(self): data = Tensor.randn(9*27*4).realize() tst = data.numpy() it = data.cast(dtypes.imagef((9,27,4))).contiguous().realize() assert isinstance(it.lazydata.base.realized.dtype, ImageDType) np.testing.assert_equal(tst, it.numpy()) @unittest.expectedFailure # this isn't supported anymore, CAST to ImageDType stays ImageDType def test_image_cast_and_back_collapses(self): data = Tensor.randn(9*27*4).realize() tst = data.numpy() it = data.cast(dtypes.imagef((9,27,4))).realize() # the underlying UOp is identical self.assertIs(it.lazydata.base.realized, data.lazydata.base.realized) np.testing.assert_equal(tst, it.numpy()) def test_image_and_back_wrong_shape(self): data = Tensor.randn(9*27*4).realize() tst = data.numpy() it = data.cast(dtypes.imagef((9,12,4))).realize() assert not isinstance(it.lazydata.base.realized.dtype, ImageDType) np.testing.assert_equal(tst, it.numpy()) def test_shrink_load_float(self): it = Tensor.randn(4).cast(dtypes.imagef((1,1,4))).realize() imgv = it.numpy() np.testing.assert_equal(imgv[0:2], it[0:2].numpy()) def test_mul_stays_image(self): # NOTE: contiguous is needed otherwise this folds it = Tensor.randn(4).cast(dtypes.imagef((1,1,4))).contiguous().realize() out = (it*2).realize() assert isinstance(out.lazydata.base.realized.dtype, ImageDType) def test_sum(self): it = Tensor.rand(8).cast(dtypes.imagef((1,2,4))).realize() itn = it.numpy() np.testing.assert_allclose(np.sum(itn), it.sum().numpy(), rtol=1e-6) def test_shrink_max(self): it = Tensor.randn(8).cast(dtypes.imagef((1,2,4))).realize() imgv = it.numpy() np.testing.assert_equal(np.maximum(imgv[0:3], 0), it[0:3].relu().numpy()) def test_shrink_to_float(self): it = Tensor.randn(4, 4).cast(dtypes.imagef((1,4,4))).realize() imgv = it.numpy() np.testing.assert_equal(np.maximum(imgv[:, 0], 0), it[:, 0].relu().numpy()) def test_lru_alloc(self): data = Tensor.randn(9*27*4).realize() it = data.cast(dtypes.imagef((9,27,4))).realize() b1 = it.lazydata.base.realized._buf del it it = data.cast(dtypes.imagef((9,27,4))).realize() assert it.lazydata.base.realized._buf == b1 def test_no_lru_alloc(self): data = Tensor.randn(9*27*4).realize() it = data.cast(dtypes.imagef((9,27,4))).contiguous().realize() b1 = it.lazydata.base.realized._buf del it it = data.cast(dtypes.imagef((10,27,4))).contiguous().realize() assert it.lazydata.base.realized._buf != b1 def test_no_lru_alloc_dtype(self): data = Tensor.randn(9*27*4).realize() it = data.cast(dtypes.imagef((9,27,4))).contiguous().realize() b1 = it.lazydata.base.realized._buf del it it = data.cast(dtypes.imageh((9,27,4))).realize() assert it.lazydata.base.realized._buf != b1 # issue caused by: don't realize image to image casts. this is part of a larger problem #@unittest.expectedFailure # update: passing after tensor_map def test_lil_model(self): with Context(IMAGE=2): x = Tensor.zeros(1, 1) w1 = Tensor.zeros(1, 8, requires_grad=True) w2 = Tensor.zeros(8, 2) loss = x.image_dot(w1).image_dot(w2).float().max() loss.backward() sched = unwrap(w1.grad).schedule() for s,(_,ei) in zip(sched, lower_schedule(sched[:])): ei.run() if s.bufs[0].dtype == dtypes.float: lst = s.bufs[0].as_buffer().cast("f").tolist() print(lst) assert not np.any(np.isnan(lst)) # NOTE: the w1 grad must realize to a seperate kernel assert w1.grad.lazydata.is_realized, f"never realized {w1.grad}" self.assertEqual(w1.grad.lazydata.base.buffer.dtype, dtypes.float32) self.assertEqual(len(sched), 10) @unittest.skipIf(Device.DEFAULT not in ("QCOM", "GPU"), "only images on GPU") class TestImageRealization(unittest.TestCase): def test_image_dtype_expand(self): data = Tensor.randn(9*27*4).realize() it = data.cast(dtypes.imagef((9,27,4))).contiguous().realize() self.assertEqual(it.dtype, dtypes.imagef((9,27,4))) it_expanded = it.reshape((9,27,4,1)).expand((9,27,4,4)).contiguous().realize() self.assertEqual(it_expanded.dtype, dtypes.float32) def test_image_dtype_expand_and_back(self): data = Tensor.randn(9*27*4).realize() it = data.cast(dtypes.imagef((9,27,4))).contiguous().realize() self.assertEqual(it.dtype, dtypes.imagef((9,27,4))) it_expanded = it.reshape((9,27,4,1)).expand((9,27,4,4)) it2 = it_expanded.sum(3).realize() self.assertEqual(it2.dtype, dtypes.imagef((9,27,4))) def test_image_alu_children(self): data = Tensor.randn(9*27*4).realize() it = data.cast(dtypes.imagef((9,27,4))).contiguous().realize() self.assertEqual(it.dtype, dtypes.imagef((9,27,4))) it_expanded = it.reshape((9,27,4,1)).expand((9,27,4,4)).contiguous() alu1 = it_expanded+1 alu2 = it_expanded.sum(3) it_expanded.realize() # NOTE: the parent becomes float, but the alu child will stay image until its output cannot fit the image self.assertEqual(alu1.dtype, dtypes.imagef((9,27,4))) alu1.realize() self.assertEqual(alu1.dtype, dtypes.float32) # alu2 is back in image because it fits the dtype again self.assertEqual(alu2.dtype, dtypes.imagef((9,27,4))) alu2.realize() self.assertEqual(alu2.dtype, dtypes.imagef((9,27,4))) if __name__ == '__main__': unittest.main()