Skip to content

Commit

Permalink
Improve verbosity of AMP Logging
Browse files Browse the repository at this point in the history
- Adds a message at the end of the grappler pass listing:
    Number of whitelisted ops, number of blacklisted ops, and number
    of nodes which were not converted because of blacklisted ops*
- References the documentation where users can learn about modifying
    the casting lists

*=Only takes into account the nodes downstream of blacklisted ops,
    not those which were not casted because they were no longer between
    two whitelisted ops
  • Loading branch information
MattConley authored and 2sin18 committed Jun 24, 2021
1 parent 1b7aef4 commit e5f5044
Showing 1 changed file with 59 additions and 4 deletions.
63 changes: 59 additions & 4 deletions tensorflow/core/grappler/optimizers/auto_mixed_precision.cc
Original file line number Diff line number Diff line change
Expand Up @@ -936,6 +936,10 @@ class AutoMixedPrecisionImpl {
std::pair<NodeTypeIdSet, NodeTypeIdSet>>
DataStructureOpsMap;

Status PrintSummaryInfo(
int num_processable_nodes, int num_recognized_nodes,
int num_whitelist_nodes, int num_blacklist_nodes,
int num_blacklist_affected_nodes);
Status PrintDebugLogs(bool preop, size_t timestamp);
void LogSkippedNode(const NodeDef& node) const;
bool MustPreserve(const NodeDef& node) const;
Expand All @@ -955,9 +959,10 @@ class AutoMixedPrecisionImpl {
const absl::flat_hash_map<string, TypeAttrId>& write_ops,
const absl::flat_hash_map<string, TypeAttrId>& read_ops,
DataStructureOpsMap* object_clients_map) const;
void RecognizeNodes(int* processable_nodes, int* num_recognized_nodes) const;
void AddWhitelistOps(absl::flat_hash_set<int>* white_set) const;
void PropagateBlackFwdThroughClearAndGray(
absl::flat_hash_set<int>* black_set) const;
absl::flat_hash_set<int>* black_set, int* blacklist_nodes) const;
void ForceColorMatchBetweenDataStructureOps(
const DataStructureOpsMap& object_clients_map,
absl::flat_hash_set<int>* white_set,
Expand Down Expand Up @@ -1002,6 +1007,26 @@ bool AutoMixedPrecisionImpl::NodeHasFP16KernelForTypeAttr(
return IsKernelRegisteredForNode(node_copy).ok();
}

Status AutoMixedPrecisionImpl::PrintSummaryInfo(int num_processable_nodes,
int num_recognized_nodes, int num_whitelist_nodes, int num_blacklist_nodes,
int num_blacklist_affected_nodes) {
LOG(INFO) << "*******************************************************\n"
<< "Automatic Mixed Precision Grappler Pass Summary:\n\n"
<< "Total processable nodes: " << num_processable_nodes << "\n"
<< "Listed nodes available for conversion: " << num_recognized_nodes << " ("
<< (int)(100*(double)num_recognized_nodes/(double)num_processable_nodes)
<< " %)\nWhitelisted nodes converted: " << num_whitelist_nodes << "\n"
<< "Blacklisted nodes blocking conversion: " << num_blacklist_nodes << "\n"
<< "Nodes blocked from conversion by blacklisted nodes: "
<< num_blacklist_affected_nodes << "\n\n"
<< "For more information regarding automatic mixed precision, "
<< "including how to augment the op lists to improve casting performance, "
<< "please see the documentation available here:\n"
<< "https://docs.nvidia.com/deeplearning/frameworks/"
<< "tensorflow-user-guide/index.html#tfamp\n\n";
return Status::OK();
}

Status AutoMixedPrecisionImpl::PrintDebugLogs(bool preop, size_t timestamp) {
string prepend_path;
TF_RETURN_IF_ERROR(ReadStringFromEnvVar(
Expand Down Expand Up @@ -1331,6 +1356,11 @@ Status AutoMixedPrecisionImpl::Optimize() {
// This is done to increase the number of ops in the white_set without
// affecting numerical stability.

VLOG(2) << "Counting recognized nodes";
int num_processable_nodes = 0;
int num_recognized_nodes = 0;
RecognizeNodes(&num_processable_nodes, &num_recognized_nodes);

absl::flat_hash_set<int> white_set;
VLOG(2) << "Beginning pass 1 to add whitelist ops";
AddWhitelistOps(&white_set);
Expand All @@ -1340,16 +1370,19 @@ Status AutoMixedPrecisionImpl::Optimize() {
LOG(INFO) << "No whitelist ops found, nothing to do";
return Status::OK();
}
int num_whitelist_nodes = white_set.size();

absl::flat_hash_set<int> black_set;
VLOG(2) << "Beginning pass 2 to propagate black forwards from blacklist ops "
"through clear/graylist ops";
PropagateBlackFwdThroughClearAndGray(&black_set);
int num_blacklist_nodes = 0;
PropagateBlackFwdThroughClearAndGray(&black_set, &num_blacklist_nodes);
VLOG(2) << "Finished pass 2";

VLOG(2) << "Forcing color match between data structure ops";
ForceColorMatchBetweenDataStructureOps(object_clients_map, &white_set,
&black_set);
int num_blacklist_affected_nodes = black_set.size() - num_blacklist_nodes;

VLOG(2) << "Beginning pass 3 to set clear and gray nodes to white if they "
"are between white ops";
Expand All @@ -1376,6 +1409,9 @@ Status AutoMixedPrecisionImpl::Optimize() {
TF_RETURN_IF_ERROR(ChangeTypeAttrsAndAddCasts(white_set));
VLOG(2) << "Finished final pass";

TF_RETURN_IF_ERROR(PrintSummaryInfo(
num_processable_nodes, num_recognized_nodes, num_whitelist_nodes,
num_blacklist_nodes, num_blacklist_affected_nodes));
TF_RETURN_IF_ERROR(PrintDebugLogs(/* preop = */ false, timestamp));

return Status::OK();
Expand Down Expand Up @@ -1412,19 +1448,37 @@ Status AutoMixedPrecisionImpl::AddDataStructureOpsToMap(
return Status::OK();
}

void AutoMixedPrecisionImpl::RecognizeNodes(
int* processable_nodes, int* num_recognized_nodes) const {
// Add whitelisted ops to white_set.
for (int root_idx = 0; root_idx < graph_type_view_.num_nodes(); ++root_idx) {
const NodeTypeId& root = *graph_type_view_.GetNode(root_idx);
if (!ShouldProcess(*root.node)) continue;
++*processable_nodes;
if (fp16_whitelist_.count(root.node->op()) ||
fp16_blacklist_.count(root.node->op()) ||
fp16_graylist_.count(root.node->op()) ||
fp16_clearlist_.count(root.node->op())) {
++*num_recognized_nodes;
}
}
}

void AutoMixedPrecisionImpl::AddWhitelistOps(
absl::flat_hash_set<int>* white_set) const {
// Add whitelisted ops to white_set.
int num_whitelisted_nodes = 0;
for (int root_idx = 0; root_idx < graph_type_view_.num_nodes(); ++root_idx) {
const NodeTypeId& root = *graph_type_view_.GetNode(root_idx);
if (!ShouldProcess(*root.node)) continue;
bool force_white = force_all_fp16_ && CanForceFP16(*root.node);
if (fp16_whitelist_.count(root.node->op()) || force_white) {
bool inserted = white_set->insert(root_idx).second;
if (VLOG_IS_ON(2) && inserted) {
if (inserted) {
VLOG(2) << "Painting type " << root.type_attr.DebugString()
<< " of node " << root.node->name() << " WHITE because its op "
<< root.node->op() << " is on the whitelist";
++num_whitelisted_nodes;
}
}
}
Expand All @@ -1436,7 +1490,7 @@ void AutoMixedPrecisionImpl::AddWhitelistOps(
// E.g., black -> gray -> clear -> gray -> clear -> white -> gray
// becomes: black -> black -> black -> black -> clear -> white -> gray.
void AutoMixedPrecisionImpl::PropagateBlackFwdThroughClearAndGray(
absl::flat_hash_set<int>* black_set) const {
absl::flat_hash_set<int>* black_set, int* blacklist_nodes) const {
if (force_all_fp16_) return;

// Find clear nodes that are upstream of black or gray.
Expand All @@ -1447,6 +1501,7 @@ void AutoMixedPrecisionImpl::PropagateBlackFwdThroughClearAndGray(
fp16_graylist_.count(root.node->op()))) {
continue;
}
if (fp16_blacklist_.count(root.node->op())) { ++*blacklist_nodes; }
DfsTypeTraversal(graph_type_view_, {&root},
TypeTraversalDirection::kFollowInputs,
DfsTypePredicates::Enter([&](int idx) -> bool {
Expand Down

0 comments on commit e5f5044

Please sign in to comment.