From caab476a87e6cbc4a80f35b675cf4d2ae9c29b06 Mon Sep 17 00:00:00 2001 From: Matt Seddon Date: Fri, 18 Oct 2024 12:47:42 +1100 Subject: [PATCH] update order_by docstring --- src/datachain/lib/dc.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/datachain/lib/dc.py b/src/datachain/lib/dc.py index 138e2a13..56e7dd9d 100644 --- a/src/datachain/lib/dc.py +++ b/src/datachain/lib/dc.py @@ -953,10 +953,22 @@ def _extend_to_data_model(self, method_name, *args, **kwargs): @resolve_columns def order_by(self, *args, descending: bool = False) -> "Self": - """Orders by specified set of signals. + """Orders by specified set of columns. Parameters: descending (bool): Whether to sort in descending order or not. + + Example: + ```py + dc.order_by("similarity_score", descending=True).limit(10) + ``` + + Note: + Order is not guaranteed when steps are added after an `order_by` statement. + I.e. when using `from_dataset` an `order_by` statement should be used if + the order of the records in the chain is important. + Using `order_by` directly before `limit` will give expected results. + See https://github.com/iterative/datachain/issues/477 for further details. """ if descending: args = tuple(sqlalchemy.desc(a) for a in args)