@@ -212,6 +212,116 @@ def retrieve_non_zero_diagonal_blocks(
212
212
column_charges , return_counts = True )
213
213
common_charges = np .intersect1d (
214
214
unique_row_charges , - unique_column_charges , assume_unique = True )
215
+
216
+ row_degeneracies = dict (zip (unique_row_charges , row_dims ))
217
+ column_degeneracies = dict (zip (unique_column_charges , column_dims ))
218
+
219
+ mask = np .isin (column_charges , - common_charges )
220
+ masked_charges = column_charges [mask ]
221
+ degeneracy_vector = np .empty (len (masked_charges ), dtype = np .int64 )
222
+ masks = {}
223
+ for c in common_charges :
224
+ mask = masked_charges == - c
225
+ masks [c ] = mask
226
+ degeneracy_vector [mask ] = row_degeneracies [c ]
227
+ summed_degeneracies = np .cumsum (degeneracy_vector )
228
+ blocks = {}
229
+
230
+ for c in common_charges :
231
+ a = np .expand_dims (summed_degeneracies [masks [c ]] - row_degeneracies [c ], 0 )
232
+ b = np .expand_dims (np .arange (row_degeneracies [c ]), 1 )
233
+ if not return_data :
234
+ blocks [c ] = [a + b , (row_degeneracies [c ], column_degeneracies [- c ])]
235
+ else :
236
+ blocks [c ] = np .reshape (data [a + b ],
237
+ (row_degeneracies [c ], column_degeneracies [- c ]))
238
+ return blocks
239
+
240
+
241
+ def retrieve_non_zero_diagonal_blocks_deprecated (
242
+ data : np .ndarray ,
243
+ charges : List [np .ndarray ],
244
+ flows : List [Union [bool , int ]],
245
+ return_data : Optional [bool ] = True ) -> Dict :
246
+ """
247
+ Given the meta data and underlying data of a symmetric matrix, compute
248
+ all diagonal blocks and return them in a dict.
249
+ Args:
250
+ data: An np.ndarray of the data. The number of elements in `data`
251
+ has to match the number of non-zero elements defined by `charges`
252
+ and `flows`
253
+ charges: List of np.ndarray, one for each leg.
254
+ Each np.ndarray `charges[leg]` is of shape `(D[leg],)`.
255
+ The bond dimension `D[leg]` can vary on each leg.
256
+ flows: A list of integers, one for each leg,
257
+ with values `1` or `-1`, denoting the flow direction
258
+ of the charges on each leg. `1` is inflowing, `-1` is outflowing
259
+ charge.
260
+ return_data: If `True`, the return dictionary maps quantum numbers `q` to
261
+ actual `np.ndarray` with the data. This involves a copy of data.
262
+ If `False`, the returned dict maps quantum numbers of a list
263
+ [locations, shape], where `locations` is an np.ndarray of type np.int64
264
+ containing the locations of the tensor elements within A.data, i.e.
265
+ `A.data[locations]` contains the elements belonging to the tensor with
266
+ quantum numbers `(q,q). `shape` is the shape of the corresponding array.
267
+
268
+ Returns:
269
+ dict: Dictionary mapping quantum numbers (integers) to either an np.ndarray
270
+ or a python list of locations and shapes, depending on the value of `return_data`.
271
+ """
272
+ #TODO: this is currently way too slow!!!!
273
+ #Run the following benchmark for testing (typical MPS use case)
274
+ #retrieving the blocks is ~ 10 times as slow as multiplying them
275
+
276
+ # D=4000
277
+ # B=10
278
+ # q1 = np.random.randint(0,B,D)
279
+ # q2 = np.asarray([0,1])
280
+ # q3 = np.random.randint(0,B,D)
281
+ # i1 = Index(charges=q1,flow=1)
282
+ # i2 = Index(charges=q2,flow=1)
283
+ # i3 = Index(charges=q3,flow=-1)
284
+ # indices=[i1,i2,i3]
285
+ # A = BlockSparseTensor.random(indices=indices, dtype=np.complex128)
286
+ # A.reshape((D*2, D))
287
+ # def multiply_blocks(blocks):
288
+ # for b in blocks.values():
289
+ # np.dot(b.T, b)
290
+ # t1s=[]
291
+ # t2s=[]
292
+ # for n in range(10):
293
+ # print(n)
294
+ # t1 = time.time()
295
+ # b = A.get_diagonal_blocks()
296
+ # t1s.append(time.time() - t1)
297
+ # t1 = time.time()
298
+ # multiply_blocks(b)
299
+ # t2s.append(time.time() - t1)
300
+ # print('average retrieval time', np.average(t1s))
301
+ # print('average multiplication time',np.average(t2s))
302
+
303
+ if len (charges ) != 2 :
304
+ raise ValueError ("input has to be a two-dimensional symmetric matrix" )
305
+ check_flows (flows )
306
+ if len (flows ) != len (charges ):
307
+ raise ValueError ("`len(flows)` is different from `len(charges)`" )
308
+
309
+ row_charges = flows [0 ] * charges [0 ] # a list of charges on each row
310
+ column_charges = flows [1 ] * charges [1 ] # a list of charges on each column
311
+
312
+ #get the unique charges
313
+ t1 = time .time ()
314
+ unique_row_charges , row_dims = np .unique (row_charges , return_counts = True )
315
+ # print('finding unique row charges', time.time() - t1)
316
+ # t1 = time.time()
317
+ unique_column_charges , column_dims = np .unique (
318
+ column_charges , return_counts = True )
319
+ # print('finding unique column charges', time.time() - t1)
320
+ # t1 = time.time()
321
+ common_charges = np .intersect1d (
322
+ unique_row_charges , - unique_column_charges , assume_unique = True )
323
+ # print('finding unique intersections', time.time() - t1)
324
+ # t1 = time.time()
215
325
#common_charges = np.intersect1d(row_charges, -column_charges)
216
326
217
327
# for each matrix column find the number of non-zero elements in it
@@ -223,21 +333,61 @@ def retrieve_non_zero_diagonal_blocks(
223
333
column_degeneracies = dict (zip (unique_column_charges , column_dims ))
224
334
225
335
number_of_seen_elements = 0
226
- idxs = {c : [] for c in common_charges }
336
+ #idxs = {c: [] for c in common_charges}
337
+ idxs = {
338
+ c : np .empty (
339
+ row_degeneracies [c ] * column_degeneracies [- c ], dtype = np .int64 )
340
+ for c in common_charges
341
+ }
342
+ idxs_stops = {c : 0 for c in common_charges }
343
+ t1 = time .time ()
227
344
mask = np .isin (column_charges , - common_charges )
228
- for charge in column_charges [mask ]:
229
- idxs [- charge ].append (
230
- np .arange (number_of_seen_elements ,
231
- row_degeneracies [- charge ] + number_of_seen_elements ))
345
+ masked_charges = column_charges [mask ]
346
+ print ('finding mask' , time .time () - t1 )
347
+ # print(len(column_charges), len(masked_charges))
348
+ t1 = time .time ()
349
+ elements = {c : np .arange (row_degeneracies [c ]) for c in common_charges }
350
+ for charge in masked_charges :
351
+ # idxs[-charge].append((number_of_seen_elements,
352
+ # row_degeneracies[-charge] + number_of_seen_elements))
353
+
354
+ idxs [- charge ][
355
+ idxs_stops [- charge ]:idxs_stops [- charge ] +
356
+ row_degeneracies [- charge ]] = number_of_seen_elements + elements [- charge ]
357
+
358
+ # np.arange(
359
+ # number_of_seen_elements,
360
+ # row_degeneracies[-charge] + number_of_seen_elements)
361
+
232
362
number_of_seen_elements += row_degeneracies [- charge ]
363
+ idxs_stops [- charge ] += row_degeneracies [- charge ]
364
+ print ('getting start and stop' , time .time () - t1 )
365
+ # t1 = time.time()
366
+ # for charge in masked_charges:
367
+ # tmp = np.arange(number_of_seen_elements,
368
+ # row_degeneracies[-charge] + number_of_seen_elements)
369
+ # number_of_seen_elements += row_degeneracies[-charge]
370
+ # print('running the partial loop', time.time() - t1)
371
+
372
+ #######################################################################################
373
+ #looks like this takes pretty long for rectangular matrices where shape[1] >> shape[0]
374
+ #it's mostly np.arange that causes the overhead.
375
+ # t1 = time.time()
376
+ # for charge in masked_charges:
377
+ # idxs[-charge].append(
378
+ # np.arange(number_of_seen_elements,
379
+ # row_degeneracies[-charge] + number_of_seen_elements))
380
+ # number_of_seen_elements += row_degeneracies[-charge]
381
+ # print('running the full loop', time.time() - t1)
382
+ #######################################################################################
233
383
234
384
blocks = {}
235
385
if not return_data :
236
386
for c , idx in idxs .items ():
237
- num_elements = np .sum ([len (t ) for t in idx ])
238
- indexes = np .empty (num_elements , dtype = np .int64 )
239
- np .concatenate (idx , out = indexes )
240
- blocks [c ] = [indexes , (row_degeneracies [c ], column_degeneracies [- c ])]
387
+ # num_elements = np.sum([len(t) for t in idx])
388
+ # indexes = np.empty(num_elements, dtype=np.int64)
389
+ # np.concatenate(idx, out=indexes)
390
+ blocks [c ] = [idx , (row_degeneracies [c ], column_degeneracies [- c ])]
241
391
return blocks
242
392
243
393
for c , idx in idxs .items ():
@@ -246,7 +396,6 @@ def retrieve_non_zero_diagonal_blocks(
246
396
np .concatenate (idx , out = indexes )
247
397
blocks [c ] = np .reshape (data [indexes ],
248
398
(row_degeneracies [c ], column_degeneracies [- c ]))
249
-
250
399
return blocks
251
400
252
401
0 commit comments