1
+ import logging
1
2
import threading
2
3
import time
3
4
import uuid
4
5
from typing import Dict , Iterator , List , Optional
5
6
6
- from ray .data ._internal .dataset_logger import DatasetLogger
7
7
from ray .data ._internal .execution .autoscaling_requester import (
8
8
get_or_create_autoscaling_requester_actor ,
9
9
)
30
30
select_operator_to_run ,
31
31
update_operator_states ,
32
32
)
33
+ from ray .data ._internal .logging import get_log_path
33
34
from ray .data ._internal .progress_bar import ProgressBar
34
35
from ray .data ._internal .stats import DatasetStats , StatsManager
35
36
from ray .data .context import DataContext
36
37
37
- logger = DatasetLogger (__name__ )
38
+ logger = logging . getLogger (__name__ )
38
39
39
40
# Force a progress bar update after this many events processed . This avoids the
40
41
# progress bar seeming to stall for very large scale workloads.
@@ -102,22 +103,16 @@ def execute(
102
103
self ._start_time = time .perf_counter ()
103
104
104
105
if not isinstance (dag , InputDataBuffer ):
105
- stdout_logger = logger .get_logger ()
106
- log_path = logger .get_datasets_log_path ()
107
- message = "Starting execution of Dataset."
108
- if log_path is not None :
109
- message += f" Full log is in { log_path } "
110
- stdout_logger .info (message )
111
- stdout_logger .info ("Execution plan of Dataset: %s\n " , dag )
112
- logger .get_logger (log_to_stdout = False ).info (
113
- "Execution config: %s" , self ._options
114
- )
115
- if not self ._options .verbose_progress :
116
- logger .get_logger (log_to_stdout = False ).info (
117
- "Tip: For detailed progress reporting, run "
118
- "`ray.data.DataContext.get_current()."
119
- "execution_options.verbose_progress = True`"
120
- )
106
+ context = DataContext .get_current ()
107
+ if context .print_on_execution_start :
108
+ message = "Starting execution of Dataset."
109
+ log_path = get_log_path ()
110
+ if log_path is not None :
111
+ message += f" Full log is in { log_path } "
112
+ logger .info (message )
113
+ logger .info (f"Execution plan of Dataset: { dag } " )
114
+
115
+ logger .debug ("Execution config: %s" , self ._options )
121
116
122
117
# Setup the streaming DAG topology and start the runner thread.
123
118
self ._topology , _ = build_streaming_topology (dag , self ._options )
@@ -171,7 +166,7 @@ def shutdown(self, execution_completed: bool = True):
171
166
with self ._shutdown_lock :
172
167
if not self ._execution_started or self ._shutdown :
173
168
return
174
- logger .get_logger ( log_to_stdout = False ). debug (f"Shutting down { self } ." )
169
+ logger .debug (f"Shutting down { self } ." )
175
170
_num_shutdown += 1
176
171
self ._shutdown = True
177
172
# Give the scheduling loop some time to finish processing.
@@ -190,9 +185,8 @@ def shutdown(self, execution_completed: bool = True):
190
185
stats_summary_string = self ._final_stats .to_summary ().to_string (
191
186
include_parent = False
192
187
)
193
- logger .get_logger (log_to_stdout = context .enable_auto_log_stats ).info (
194
- stats_summary_string ,
195
- )
188
+ if context .enable_auto_log_stats :
189
+ logger .info (stats_summary_string )
196
190
# Close the progress bars from top to bottom to avoid them jumping
197
191
# around in the console after completion.
198
192
if self ._global_info :
@@ -325,7 +319,7 @@ def _scheduling_loop_step(self, topology: Topology) -> bool:
325
319
f"Operator { op } completed. "
326
320
f"Operator Metrics:\n { op ._metrics .as_dict ()} "
327
321
)
328
- logger .get_logger ( log_to_stdout = False ). info (log_str )
322
+ logger .debug (log_str )
329
323
self ._has_op_completed [op ] = True
330
324
331
325
# Keep going until all operators run to completion.
@@ -436,13 +430,12 @@ def _debug_dump_topology(topology: Topology, resource_manager: ResourceManager)
436
430
topology: The topology to debug.
437
431
resource_manager: The resource manager for this topology.
438
432
"""
439
- logger .get_logger ( log_to_stdout = False ). info ("Execution Progress:" )
433
+ logger .debug ("Execution Progress:" )
440
434
for i , (op , state ) in enumerate (topology .items ()):
441
- logger .get_logger ( log_to_stdout = False ). info (
435
+ logger .debug (
442
436
f"{ i } : { state .summary_str (resource_manager )} , "
443
437
f"Blocks Outputted: { state .num_completed_tasks } /{ op .num_outputs_total ()} "
444
438
)
445
- logger .get_logger (log_to_stdout = False ).info ("" )
446
439
447
440
448
441
def _log_op_metrics (topology : Topology ) -> None :
@@ -454,4 +447,4 @@ def _log_op_metrics(topology: Topology) -> None:
454
447
log_str = "Operator Metrics:\n "
455
448
for op in topology :
456
449
log_str += f"{ op .name } : { op .metrics .as_dict ()} \n "
457
- logger .get_logger ( log_to_stdout = False ). info (log_str )
450
+ logger .debug (log_str )
0 commit comments