|
29 | 29 | from thrift.transport.TTransport import TTransportException
|
30 | 30 | from thrift.Thrift import TApplicationException
|
31 | 31 | from thrift.protocol.TBinaryProtocol import TBinaryProtocolAccelerated
|
| 32 | +from impala._thrift_gen.ExecStats.ttypes import TExecStats |
32 | 33 | from impala._thrift_gen.TCLIService.ttypes import (
|
33 | 34 | TOpenSessionReq, TFetchResultsReq, TCloseSessionReq,
|
34 | 35 | TExecuteStatementReq, TGetInfoReq, TGetInfoType, TTypeId,
|
|
44 | 45 | from impala.compat import (Decimal, _xrange as xrange)
|
45 | 46 | from impala.error import (NotSupportedError, OperationalError,
|
46 | 47 | ProgrammingError, HiveServer2Error, HttpError)
|
| 48 | +from impala.exec_summary import build_exec_summary_table |
47 | 49 | from impala.interface import Connection, Cursor, _bind_parameters
|
48 | 50 | from impala.util import get_logger_and_init_null
|
49 | 51 |
|
@@ -727,8 +729,9 @@ def get_summary(self):
|
727 | 729 |
|
728 | 730 | def build_summary_table(self, summary, output, idx=0,
|
729 | 731 | is_fragment_root=False, indent_level=0):
|
730 |
| - return build_summary_table(summary, idx, is_fragment_root, |
731 |
| - indent_level, output) |
| 732 | + return build_exec_summary_table( |
| 733 | + summary, idx, indent_level, is_fragment_root, output, is_prettyprint=True, |
| 734 | + separate_prefix_column=False) |
732 | 735 |
|
733 | 736 | def get_databases(self):
|
734 | 737 | def op():
|
@@ -1550,122 +1553,3 @@ def get_result_schema(self):
|
1550 | 1553 | log.debug('get_result_schema: schema=%s', schema)
|
1551 | 1554 |
|
1552 | 1555 | return schema
|
1553 |
| - |
1554 |
| - |
1555 |
| -def build_summary_table(summary, idx, is_fragment_root, indent_level, output): |
1556 |
| - """Direct translation of Coordinator::PrintExecSummary() to recursively |
1557 |
| - build a list of rows of summary statistics, one per exec node |
1558 |
| -
|
1559 |
| - summary: the TExecSummary object that contains all the summary data |
1560 |
| -
|
1561 |
| - idx: the index of the node to print |
1562 |
| -
|
1563 |
| - is_fragment_root: true if the node to print is the root of a fragment (and |
1564 |
| - therefore feeds into an exchange) |
1565 |
| -
|
1566 |
| - indent_level: the number of spaces to print before writing the node's |
1567 |
| - label, to give the appearance of a tree. The 0th child of a node has the |
1568 |
| - same indent_level as its parent. All other children have an indent_level |
1569 |
| - of one greater than their parent. |
1570 |
| -
|
1571 |
| - output: the list of rows into which to append the rows produced for this |
1572 |
| - node and its children. |
1573 |
| -
|
1574 |
| - Returns the index of the next exec node in summary.exec_nodes that should |
1575 |
| - be processed, used internally to this method only. |
1576 |
| - """ |
1577 |
| - # pylint: disable=too-many-locals |
1578 |
| - |
1579 |
| - attrs = ["latency_ns", "cpu_time_ns", "cardinality", "memory_used"] |
1580 |
| - |
1581 |
| - # Initialise aggregate and maximum stats |
1582 |
| - agg_stats, max_stats = TExecStats(), TExecStats() |
1583 |
| - for attr in attrs: |
1584 |
| - setattr(agg_stats, attr, 0) |
1585 |
| - setattr(max_stats, attr, 0) |
1586 |
| - |
1587 |
| - node = summary.nodes[idx] |
1588 |
| - for stats in node.exec_stats: |
1589 |
| - for attr in attrs: |
1590 |
| - val = getattr(stats, attr) |
1591 |
| - if val is not None: |
1592 |
| - setattr(agg_stats, attr, getattr(agg_stats, attr) + val) |
1593 |
| - setattr(max_stats, attr, max(getattr(max_stats, attr), val)) |
1594 |
| - |
1595 |
| - if len(node.exec_stats) > 0: |
1596 |
| - avg_time = agg_stats.latency_ns / len(node.exec_stats) |
1597 |
| - else: |
1598 |
| - avg_time = 0 |
1599 |
| - |
1600 |
| - # If the node is a broadcast-receiving exchange node, the cardinality of |
1601 |
| - # rows produced is the max over all instances (which should all have |
1602 |
| - # received the same number of rows). Otherwise, the cardinality is the sum |
1603 |
| - # over all instances which process disjoint partitions. |
1604 |
| - if node.is_broadcast and is_fragment_root: |
1605 |
| - cardinality = max_stats.cardinality |
1606 |
| - else: |
1607 |
| - cardinality = agg_stats.cardinality |
1608 |
| - |
1609 |
| - est_stats = node.estimated_stats |
1610 |
| - label_prefix = "" |
1611 |
| - if indent_level > 0: |
1612 |
| - label_prefix = "|" |
1613 |
| - if is_fragment_root: |
1614 |
| - label_prefix += " " * indent_level |
1615 |
| - else: |
1616 |
| - label_prefix += "--" * indent_level |
1617 |
| - |
1618 |
| - def prettyprint(val, units, divisor): |
1619 |
| - for unit in units: |
1620 |
| - if val < divisor: |
1621 |
| - if unit == units[0]: |
1622 |
| - return "%d%s" % (val, unit) |
1623 |
| - else: |
1624 |
| - return "%3.2f%s" % (val, unit) |
1625 |
| - val /= divisor |
1626 |
| - |
1627 |
| - def prettyprint_bytes(byte_val): |
1628 |
| - return prettyprint( |
1629 |
| - byte_val, [' B', ' KB', ' MB', ' GB', ' TB'], 1024.0) |
1630 |
| - |
1631 |
| - def prettyprint_units(unit_val): |
1632 |
| - return prettyprint(unit_val, ["", "K", "M", "B"], 1000.0) |
1633 |
| - |
1634 |
| - def prettyprint_time(time_val): |
1635 |
| - return prettyprint(time_val, ["ns", "us", "ms", "s"], 1000.0) |
1636 |
| - |
1637 |
| - row = [label_prefix + node.label, |
1638 |
| - len(node.exec_stats), |
1639 |
| - prettyprint_time(avg_time), |
1640 |
| - prettyprint_time(max_stats.latency_ns), |
1641 |
| - prettyprint_units(cardinality), |
1642 |
| - prettyprint_units(est_stats.cardinality), |
1643 |
| - prettyprint_bytes(max_stats.memory_used), |
1644 |
| - prettyprint_bytes(est_stats.memory_used), |
1645 |
| - node.label_detail] |
1646 |
| - |
1647 |
| - output.append(row) |
1648 |
| - try: |
1649 |
| - sender_idx = summary.exch_to_sender_map[idx] |
1650 |
| - # This is an exchange node, so the sender is a fragment root, and |
1651 |
| - # should be printed next. |
1652 |
| - build_summary_table(summary, sender_idx, True, indent_level, output) |
1653 |
| - except (KeyError, TypeError): |
1654 |
| - # Fall through if idx not in map, or if exch_to_sender_map itself is |
1655 |
| - # not set |
1656 |
| - pass |
1657 |
| - |
1658 |
| - idx += 1 |
1659 |
| - if node.num_children > 0: |
1660 |
| - first_child_output = [] |
1661 |
| - idx = build_summary_table(summary, idx, False, indent_level, |
1662 |
| - first_child_output) |
1663 |
| - # pylint: disable=unused-variable |
1664 |
| - # TODO: is child_idx supposed to be unused? See #120 |
1665 |
| - for child_idx in range(1, node.num_children): |
1666 |
| - # All other children are indented (we only have 0, 1 or 2 children |
1667 |
| - # for every exec node at the moment) |
1668 |
| - idx = build_summary_table(summary, idx, False, indent_level + 1, |
1669 |
| - output) |
1670 |
| - output += first_child_output |
1671 |
| - return idx |
0 commit comments