branch: master
test_image_dtype.py
7106 bytesRaw
import unittest
import numpy as np
from tinygrad import Device, dtypes, Tensor, Context
from tinygrad.dtype import ImageDType
from tinygrad.engine.realize import lower_schedule
from tinygrad.helpers import prod, unwrap
@unittest.skipIf(Device.DEFAULT not in ("QCOM", "GPU"), "only images on GPU")
class TestImageCopy(unittest.TestCase):
def test_image_copyout_1x1(self, img_type=dtypes.imagef):
it = Tensor.arange(4).cast(img_type((1,1,4))).realize()
buf = it.lazydata.buffer
out = buf.as_buffer()
np.testing.assert_equal(out.cast(it.dtype.fmt).tolist(), np.arange(4))
def test_imageh_copyout_1x1(self): self.test_image_copyout_1x1(img_type=dtypes.imageh)
def test_image_numpy_1x1(self, img_type=dtypes.imagef):
it = Tensor.arange(4).cast(img_type((1,1,4))).realize()
np.testing.assert_equal(it.numpy(), np.arange(4))
def test_imageh_numpy_1x1(self): self.test_image_numpy_1x1(img_type=dtypes.imageh)
def test_image_copyout_2x3(self):
it = Tensor.arange(2*3*4).cast(dtypes.imagef((2,3,4))).realize()
buf = it.lazydata.buffer
out = buf.as_buffer()
np.testing.assert_equal(out.cast('f').tolist(), np.arange(2*3*4))
def test_image_roundtrip(self):
sz = (4,2,4)
it = Tensor.rand(prod(sz)).cast(dtypes.imagef(sz)).realize()
buf = it.lazydata.buffer
out = buf.as_buffer()
it2 = Tensor.rand(prod(sz)).cast(dtypes.imagef(sz)).realize()
buf2 = it2.lazydata.buffer
buf2.copyin(out)
assert (it == it2).sum().item() == prod(sz)
@unittest.skipIf(Device.DEFAULT not in ("QCOM", "GPU"), "only images on GPU")
class TestImageDType(unittest.TestCase):
def test_image_and_back(self):
data = Tensor.randn(9*27*4).realize()
tst = data.numpy()
it = data.cast(dtypes.imagef((9,27,4))).contiguous().realize()
assert isinstance(it.lazydata.base.realized.dtype, ImageDType)
np.testing.assert_equal(tst, it.numpy())
@unittest.expectedFailure # this isn't supported anymore, CAST to ImageDType stays ImageDType
def test_image_cast_and_back_collapses(self):
data = Tensor.randn(9*27*4).realize()
tst = data.numpy()
it = data.cast(dtypes.imagef((9,27,4))).realize()
# the underlying UOp is identical
self.assertIs(it.lazydata.base.realized, data.lazydata.base.realized)
np.testing.assert_equal(tst, it.numpy())
def test_image_and_back_wrong_shape(self):
data = Tensor.randn(9*27*4).realize()
tst = data.numpy()
it = data.cast(dtypes.imagef((9,12,4))).realize()
assert not isinstance(it.lazydata.base.realized.dtype, ImageDType)
np.testing.assert_equal(tst, it.numpy())
def test_shrink_load_float(self):
it = Tensor.randn(4).cast(dtypes.imagef((1,1,4))).realize()
imgv = it.numpy()
np.testing.assert_equal(imgv[0:2], it[0:2].numpy())
def test_mul_stays_image(self):
# NOTE: contiguous is needed otherwise this folds
it = Tensor.randn(4).cast(dtypes.imagef((1,1,4))).contiguous().realize()
out = (it*2).realize()
assert isinstance(out.lazydata.base.realized.dtype, ImageDType)
def test_sum(self):
it = Tensor.rand(8).cast(dtypes.imagef((1,2,4))).realize()
itn = it.numpy()
np.testing.assert_allclose(np.sum(itn), it.sum().numpy(), rtol=1e-6)
def test_shrink_max(self):
it = Tensor.randn(8).cast(dtypes.imagef((1,2,4))).realize()
imgv = it.numpy()
np.testing.assert_equal(np.maximum(imgv[0:3], 0), it[0:3].relu().numpy())
def test_shrink_to_float(self):
it = Tensor.randn(4, 4).cast(dtypes.imagef((1,4,4))).realize()
imgv = it.numpy()
np.testing.assert_equal(np.maximum(imgv[:, 0], 0), it[:, 0].relu().numpy())
def test_lru_alloc(self):
data = Tensor.randn(9*27*4).realize()
it = data.cast(dtypes.imagef((9,27,4))).realize()
b1 = it.lazydata.base.realized._buf
del it
it = data.cast(dtypes.imagef((9,27,4))).realize()
assert it.lazydata.base.realized._buf == b1
def test_no_lru_alloc(self):
data = Tensor.randn(9*27*4).realize()
it = data.cast(dtypes.imagef((9,27,4))).contiguous().realize()
b1 = it.lazydata.base.realized._buf
del it
it = data.cast(dtypes.imagef((10,27,4))).contiguous().realize()
assert it.lazydata.base.realized._buf != b1
def test_no_lru_alloc_dtype(self):
data = Tensor.randn(9*27*4).realize()
it = data.cast(dtypes.imagef((9,27,4))).contiguous().realize()
b1 = it.lazydata.base.realized._buf
del it
it = data.cast(dtypes.imageh((9,27,4))).realize()
assert it.lazydata.base.realized._buf != b1
# issue caused by: don't realize image to image casts. this is part of a larger problem
#@unittest.expectedFailure
# update: passing after tensor_map
def test_lil_model(self):
with Context(IMAGE=2):
x = Tensor.zeros(1, 1)
w1 = Tensor.zeros(1, 8, requires_grad=True)
w2 = Tensor.zeros(8, 2)
loss = x.image_dot(w1).image_dot(w2).float().max()
loss.backward()
sched = unwrap(w1.grad).schedule()
for s,(_,ei) in zip(sched, lower_schedule(sched[:])):
ei.run()
if s.bufs[0].dtype == dtypes.float:
lst = s.bufs[0].as_buffer().cast("f").tolist()
print(lst)
assert not np.any(np.isnan(lst))
# NOTE: the w1 grad must realize to a seperate kernel
assert w1.grad.lazydata.is_realized, f"never realized {w1.grad}"
self.assertEqual(w1.grad.lazydata.base.buffer.dtype, dtypes.float32)
self.assertEqual(len(sched), 10)
@unittest.skipIf(Device.DEFAULT not in ("QCOM", "GPU"), "only images on GPU")
class TestImageRealization(unittest.TestCase):
def test_image_dtype_expand(self):
data = Tensor.randn(9*27*4).realize()
it = data.cast(dtypes.imagef((9,27,4))).contiguous().realize()
self.assertEqual(it.dtype, dtypes.imagef((9,27,4)))
it_expanded = it.reshape((9,27,4,1)).expand((9,27,4,4)).contiguous().realize()
self.assertEqual(it_expanded.dtype, dtypes.float32)
def test_image_dtype_expand_and_back(self):
data = Tensor.randn(9*27*4).realize()
it = data.cast(dtypes.imagef((9,27,4))).contiguous().realize()
self.assertEqual(it.dtype, dtypes.imagef((9,27,4)))
it_expanded = it.reshape((9,27,4,1)).expand((9,27,4,4))
it2 = it_expanded.sum(3).realize()
self.assertEqual(it2.dtype, dtypes.imagef((9,27,4)))
def test_image_alu_children(self):
data = Tensor.randn(9*27*4).realize()
it = data.cast(dtypes.imagef((9,27,4))).contiguous().realize()
self.assertEqual(it.dtype, dtypes.imagef((9,27,4)))
it_expanded = it.reshape((9,27,4,1)).expand((9,27,4,4)).contiguous()
alu1 = it_expanded+1
alu2 = it_expanded.sum(3)
it_expanded.realize()
# NOTE: the parent becomes float, but the alu child will stay image until its output cannot fit the image
self.assertEqual(alu1.dtype, dtypes.imagef((9,27,4)))
alu1.realize()
self.assertEqual(alu1.dtype, dtypes.float32)
# alu2 is back in image because it fits the dtype again
self.assertEqual(alu2.dtype, dtypes.imagef((9,27,4)))
alu2.realize()
self.assertEqual(alu2.dtype, dtypes.imagef((9,27,4)))
if __name__ == '__main__':
unittest.main()