Implement Numpy fancy indexing #434

mratsim · 2020-04-19T20:16:17Z

This implements fancy indexing and closes #400.

This work has been partly sponsored by Base2Genomics

Checklist

fancy indexing via an array/tensor of axisID

Arraymancer/tests/tensor/test_fancy_indexing.nim

Lines 29 to 46 in 46d32d9

    
           test "Index selection via fancy indexing": 
        
             block: # print(x[:, [0, 2]]) 
        
               let r = x[_, [0, 2]] 
        
               let exp = [[ 4,  2], 
        
                          [ 3, 99], 
        
                          [ 1,  7], 
        
                          [ 8,  8]].toTensor() 
        
               check: r == exp 
        
             block: # print(x[[1, 3], :]) 
        
               let r = x[[1, 3], _] 
        
               let exp = [[3, 4, 99], 
        
                          [8, 6,  8]].toTensor() 
        
               check: r == exp

fancy indexing via a boolean mask (result collated in a flattened tensor)

Arraymancer/tests/tensor/test_fancy_indexing.nim

Lines 48 to 57 in 934658b

    
           test "Masked selection via fancy indexing": 
        
             block: 
        
               let r = x[x >. 50] 
        
               let exp = [99, 99].toTensor() 
        
               check: r == exp 
        
             block: 
        
               let r = x[x <. 50] 
        
               let exp = [4, 2, 3, 4, 1, 8, 7, 8, 6, 8].toTensor() 
        
               check: r == exp

fancy indexing via a boolean mask of axisID

Arraymancer/tests/tensor/test_fancy_indexing.nim

Lines 48 to 65 in 46d32d9

    
           test "Masked selection via fancy indexing": 
        
             block: # print('x[:, np.sum(x, axis = 0) > 50]') 
        
               let r = x[_, x.sum(axis = 0) >. 50] 
        
               let exp = [[99, 2], 
        
                          [ 4, 99], 
        
                          [ 8, 7], 
        
                          [ 6, 8]].toTensor() 
        
               check: r == exp 
        
             block: # print('x[np.sum(x, axis = 1) > 50, :]') 
        
               let r = x[x.sum(axis = 1) >. 50, _] 
        
               let exp = [[4, 99, 2], 
        
                          [3, 4, 99]].toTensor() 
        
               check: r == exp

fancy assign a broadcasted value via an array/tensor of axisID

Arraymancer/tests/tensor/test_fancy_indexing.nim

Lines 78 to 99 in f48b409

    
           test "Index assign value via fancy indexing": 
        
             block: # y[:, [0, 2]] = -100 
        
               var y = x.clone() 
        
               y[_, [0, 2]] = -100 
        
               let exp = [[-100, 99, -100], 
        
                          [-100,  4, -100], 
        
                          [-100,  8, -100], 
        
                          [-100,  6, -100]].toTensor() 
        
               check: y == exp 
        
             block: # y[[1, 3], :] = -100 
        
               var y = x.clone() 
        
               y[[1, 3], _] = -100 
        
               let exp = [[   4,   99,    2], 
        
                          [-100, -100, -100], 
        
                          [   1,    8,    7], 
        
                          [-100, -100, -100]].toTensor() 
        
               check: y == exp

fancy assign a broadcasted value via a boolean mask (result collated in a flattened tensor)

Arraymancer/tests/tensor/test_fancy_indexing.nim

Lines 101 to 122 in f48b409

    
           test "Masked assign value via fancy indexing": 
        
             block: # y[y > 50] = -100 
        
               var y = x.clone() 
        
               y[y >. 50] = -100 
        
               let exp = [[ 4, -100,    2], 
        
                          [ 3,    4, -100], 
        
                          [ 1,    8,    7], 
        
                          [ 8,    6,    8]].toTensor() 
        
               check: y == exp 
        
             block: # y[y < 50] = -100 
        
               var y = x.clone() 
        
               y[y <. 50] = -100 
        
               let exp = [[ -100,   99, -100], 
        
                          [ -100, -100,   99], 
        
                          [ -100, -100, -100], 
        
                          [ -100, -100, -100]].toTensor() 
        
               check: y == exp

fancy assign a broadcasted value via a boolean mask of axisID

Arraymancer/tests/tensor/test_fancy_indexing.nim

Lines 124 to 145 in f48b409

    
           test "Masked axis assign value via fancy indexing": 
        
             block: # y[:, y.sum(axis = 0) > 50] = -100 
        
               var y = x.clone() 
        
               y[_, y.sum(axis = 0) >. 50] = -100 
        
               let exp = [[  4, -100, -100], 
        
                          [  3, -100, -100], 
        
                          [  1, -100, -100], 
        
                          [  8, -100, -100]].toTensor() 
        
               check: y == exp 
        
             block: # y[y.sum(axis = 1) > 50, :] = -100 
        
               var y = x.clone() 
        
               y[y.sum(axis = 1) >. 50, _] = -100 
        
               let exp = [[-100, -100, -100], 
        
                          [-100, -100, -100], 
        
                          [   1,    8,    7], 
        
                          [   8,    6,    8]].toTensor() 
        
               check: y == exp

fancy assign a broadcastable "1d" tensor via a boolean mask of axisID

Arraymancer/tests/tensor/test_fancy_indexing.nim

Lines 154 to 175 in f48b409

    
           test "Masked axis assign broadcastable 1d tensor via fancy indexing": 
        
             block: # y[:, y.sum(axis = 0) > 50] = np.array([[10], [20], [30], [40]]) 
        
               var y = x.clone() 
        
               y[_, y.sum(axis = 0) >. 50] = [[10], [20], [30], [40]].toTensor() 
        
               let exp = [[  4, 10, 10], 
        
                          [  3, 20, 20], 
        
                          [  1, 30, 30], 
        
                          [  8, 40, 40]].toTensor() 
        
               check: y == exp 
        
             block: # y[y.sum(axis = 1) > 50, :] = np.array([-10, -20, -30]) 
        
               var y = x.clone() 
        
               y[y.sum(axis = 1) >. 50, _] = [[-10, -20, -30]].toTensor() 
        
               let exp = [[-10, -20, -30], 
        
                          [-10, -20, -30], 
        
                          [  1,   8,   7], 
        
                          [  8,   6,   8]].toTensor() 
        
               check: y == exp

Limitations

Only fancy indexing against a single axis is allowed, it cannot be combined with other slicing or indexing syntaxes. Things like x[[True, True, False, False], [1,2]] or x[1 .. 2, [1,2]] are not allowed at the moment.
Assignment works for a restricted subset:

broadcasting a scalar value

broadcasting a "1d" tensor, the 1d tensor must have the proper dimension unsqueezed, for example:

Arraymancer/src/tensor/selectors.nim

Lines 256 to 279 in f48b409

    
           func masked_axis_fill*[T](t: var Tensor[T], mask: Tensor[bool], axis: int, value: T or Tensor[T]) = 
        
             ## Take a 1D boolean mask tensor with size equal to the `t.shape[axis]` 
        
             ## The axis index that are set to true in the mask will be filled with `value` 
        
             ## 
        
             ## Limitation: 
        
             ##   If value is a Tensor, only filling via broadcastable tensors is supported at the moment 
        
             ##   for example if filling axis of a tensor `t` of shape [4, 3] the corresponding shapes are valid 
        
             ##     [4, 3].masked_axis_fill(mask = [1, 3], axis = 1, value = [4, 1]) 
        
             ## 
        
             ##   with values 
        
             ##     t = [[ 4, 99,  2], 
        
             ##          [ 3,  4, 99], 
        
             ##          [ 1,  8,  7], 
        
             ##          [ 8,  6,  8]].toTensor() 
        
             ##     mask = [false, true, true] 
        
             ##     value = [[10], 
        
             ##              [20], 
        
             ##              [30], 
        
             ##              [40]].toTensor() 
        
             ## 
        
             ##     result = [[  4, 10, 10], 
        
             ##               [  3, 20, 20], 
        
             ##               [  1, 30, 30], 
        
             ##               [  8, 40, 40]].toTensor()

unfortunately Numpy is inconsistent w.r.t. to broadcasting and I don't want to add more ergonomics syntax now which new code might depend on and then have to introduce backwards-incompatible changes to be more Numpy-like on the fancy indexing syntax (though this one is probably safe):

Arraymancer/tests/manual_checks/fancy_indexing.py

Lines 121 to 137 in f48b409

    
           def masked_axis_fill_tensor_invalid_1(): 
        
               # ValueError: shape mismatch: 
        
               # value array of shape (4,) could not be broadcast 
        
               # to indexing result of shape (2,4) 
        
               print('Masked axis fill with tensor - invalid numpy syntax') 
        
               print('--------------------------') 
        
               x = np.array([[ 4, 99,  2], 
        
                           [ 3,  4, 99], 
        
                           [ 1,  8,  7], 
        
                           [ 8,  6,  8]]) 
        
               print(x) 
        
               print('--------------------------') 
        
               y = x.copy() 
        
               print('y[:, y.sum(axis = 0) > 50] = np.array([10, 20, 30, 40])') 
        
               y[:, y.sum(axis = 0) > 50] = np.array([10, 20, 30, 40]) 
        
               print(y)

Additional fixes included

index_select "allowed" indexing via floats (it would have been caught by the compiler but with a poorer error message)
index_select, masked_select, masked_fill, masked_axis_select, masked_axis_fill have been overloaded for arrays and sequences
The broadcast operator introduced in [RFC] Masked select and Masked fill #429 are using a deprecated syntax

Upstream bug

This PR highlighted another bug when working with types in macro: nim-lang/Nim#14021

…g/Nim#14021

mratsim added 17 commits April 19, 2020 17:15

index_select should use SomeInteger not SOmeNumber

16d7bcc

Overload index_select for arrays and sequences

ea71952

Masked Selector overload for openarrays

6867374

Add masked overload for regular arrays and sequences

80b7d00

Initial support of Numpy fancy indexing: index select

db3f637

Fix broadcast operators from #429 using deprecated syntax

bde1880

Stash dispatcher, working with types in macros is a minefield nim-lan…

927f1de

…g/Nim#14021

Masked indexing: closes #400, workaround nim-lang/Nim#14021

46d32d9

Test for full masked fancy indexing

934658b

Add index_fill

6223e33

Tensor mutation via fancy indexing

b64ea0c

Add tests for index mutation via fancy indexing

092e15b

Fancy indexing: supports broadcasting a value to a masked assignation

155b5d3

Detect wrong mask or tensor axis length

e5f5604

masked axis assign value test

38000f5

Add masked assign of broadcastable tensor

f48b409

Tag for changelog [skip ci]

8d5f7f1

mratsim merged commit b2620e6 into master Apr 19, 2020

mratsim deleted the fancy-indexing branch April 20, 2020 10:44

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Implement Numpy fancy indexing #434

Implement Numpy fancy indexing #434

mratsim commented Apr 19, 2020 •

edited

Loading

	test "Index selection via fancy indexing":
	block: # print(x[:, [0, 2]])
	let r = x[_, [0, 2]]

	let exp = [[ 4, 2],
	[ 3, 99],
	[ 1, 7],
	[ 8, 8]].toTensor()

	check: r == exp

	block: # print(x[[1, 3], :])
	let r = x[[1, 3], _]

	let exp = [[3, 4, 99],
	[8, 6, 8]].toTensor()

	check: r == exp

	test "Masked selection via fancy indexing":
	block:
	let r = x[x >. 50]
	let exp = [99, 99].toTensor()
	check: r == exp

	block:
	let r = x[x <. 50]
	let exp = [4, 2, 3, 4, 1, 8, 7, 8, 6, 8].toTensor()
	check: r == exp

	test "Masked selection via fancy indexing":
	block: # print('x[:, np.sum(x, axis = 0) > 50]')
	let r = x[_, x.sum(axis = 0) >. 50]

	let exp = [[99, 2],
	[ 4, 99],
	[ 8, 7],
	[ 6, 8]].toTensor()

	check: r == exp

	block: # print('x[np.sum(x, axis = 1) > 50, :]')
	let r = x[x.sum(axis = 1) >. 50, _]

	let exp = [[4, 99, 2],
	[3, 4, 99]].toTensor()

	check: r == exp

	test "Index assign value via fancy indexing":
	block: # y[:, [0, 2]] = -100
	var y = x.clone()
	y[_, [0, 2]] = -100

	let exp = [[-100, 99, -100],
	[-100, 4, -100],
	[-100, 8, -100],
	[-100, 6, -100]].toTensor()

	check: y == exp

	block: # y[[1, 3], :] = -100
	var y = x.clone()
	y[[1, 3], _] = -100

	let exp = [[ 4, 99, 2],
	[-100, -100, -100],
	[ 1, 8, 7],
	[-100, -100, -100]].toTensor()

	check: y == exp

	test "Masked assign value via fancy indexing":
	block: # y[y > 50] = -100
	var y = x.clone()
	y[y >. 50] = -100

	let exp = [[ 4, -100, 2],
	[ 3, 4, -100],
	[ 1, 8, 7],
	[ 8, 6, 8]].toTensor()

	check: y == exp

	block: # y[y < 50] = -100
	var y = x.clone()
	y[y <. 50] = -100

	let exp = [[ -100, 99, -100],
	[ -100, -100, 99],
	[ -100, -100, -100],
	[ -100, -100, -100]].toTensor()

	check: y == exp

	test "Masked axis assign value via fancy indexing":
	block: # y[:, y.sum(axis = 0) > 50] = -100
	var y = x.clone()
	y[_, y.sum(axis = 0) >. 50] = -100

	let exp = [[ 4, -100, -100],
	[ 3, -100, -100],
	[ 1, -100, -100],
	[ 8, -100, -100]].toTensor()

	check: y == exp

	block: # y[y.sum(axis = 1) > 50, :] = -100
	var y = x.clone()
	y[y.sum(axis = 1) >. 50, _] = -100

	let exp = [[-100, -100, -100],
	[-100, -100, -100],
	[ 1, 8, 7],
	[ 8, 6, 8]].toTensor()

	check: y == exp

	test "Masked axis assign broadcastable 1d tensor via fancy indexing":
	block: # y[:, y.sum(axis = 0) > 50] = np.array([[10], [20], [30], [40]])
	var y = x.clone()
	y[_, y.sum(axis = 0) >. 50] = [[10], [20], [30], [40]].toTensor()

	let exp = [[ 4, 10, 10],
	[ 3, 20, 20],
	[ 1, 30, 30],
	[ 8, 40, 40]].toTensor()

	check: y == exp

	block: # y[y.sum(axis = 1) > 50, :] = np.array([-10, -20, -30])
	var y = x.clone()
	y[y.sum(axis = 1) >. 50, _] = [[-10, -20, -30]].toTensor()

	let exp = [[-10, -20, -30],
	[-10, -20, -30],
	[ 1, 8, 7],
	[ 8, 6, 8]].toTensor()

	check: y == exp

	func masked_axis_fill*[T](t: var Tensor[T], mask: Tensor[bool], axis: int, value: T or Tensor[T]) =
	## Take a 1D boolean mask tensor with size equal to the `t.shape[axis]`
	## The axis index that are set to true in the mask will be filled with `value`
	##
	## Limitation:
	## If value is a Tensor, only filling via broadcastable tensors is supported at the moment
	## for example if filling axis of a tensor `t` of shape [4, 3] the corresponding shapes are valid
	## [4, 3].masked_axis_fill(mask = [1, 3], axis = 1, value = [4, 1])
	##
	## with values
	## t = [[ 4, 99, 2],
	## [ 3, 4, 99],
	## [ 1, 8, 7],
	## [ 8, 6, 8]].toTensor()
	## mask = [false, true, true]
	## value = [[10],
	## [20],
	## [30],
	## [40]].toTensor()
	##
	## result = [[ 4, 10, 10],
	## [ 3, 20, 20],
	## [ 1, 30, 30],
	## [ 8, 40, 40]].toTensor()

	def masked_axis_fill_tensor_invalid_1():
	# ValueError: shape mismatch:
	# value array of shape (4,) could not be broadcast
	# to indexing result of shape (2,4)
	print('Masked axis fill with tensor - invalid numpy syntax')
	print('--------------------------')
	x = np.array([[ 4, 99, 2],
	[ 3, 4, 99],
	[ 1, 8, 7],
	[ 8, 6, 8]])

	print(x)
	print('--------------------------')
	y = x.copy()
	print('y[:, y.sum(axis = 0) > 50] = np.array([10, 20, 30, 40])')
	y[:, y.sum(axis = 0) > 50] = np.array([10, 20, 30, 40])
	print(y)

Implement Numpy fancy indexing #434

Implement Numpy fancy indexing #434

Conversation

mratsim commented Apr 19, 2020 • edited Loading

Checklist

Limitations

Additional fixes included

Upstream bug

mratsim commented Apr 19, 2020 •

edited

Loading