@@ -2140,9 +2140,10 @@ bool Compiler::optIsStackLocalInvariant(unsigned loopNum, unsigned lclNum)
21402140// optExtractArrIndex: Try to extract the array index from "tree".
21412141//
21422142// Arguments:
2143- // tree the tree to be checked if it is the array [] operation.
2144- // result the extracted GT_INDEX information is updated in result.
2145- // lhsNum for the root level (function is recursive) callers should pass BAD_VAR_NUM.
2143+ // tree the tree to be checked if it is the array [] operation.
2144+ // result the extracted GT_INDEX information is updated in result.
2145+ // lhsNum for the root level (function is recursive) callers should pass BAD_VAR_NUM.
2146+ // topLevelIsFinal OUT: set to `true` if see a non-TYP_REF element type array.
21462147//
21472148// Return Value:
21482149// Returns true if array index can be extracted, else, return false. See assumption about
@@ -2203,7 +2204,7 @@ bool Compiler::optIsStackLocalInvariant(unsigned loopNum, unsigned lclNum)
22032204// used as an index expression, or array base var is used as the array base. This saves us from parsing
22042205// all the forms that morph can create, especially for arrays of structs.
22052206//
2206- bool Compiler::optExtractArrIndex (GenTree* tree, ArrIndex* result, unsigned lhsNum)
2207+ bool Compiler::optExtractArrIndex (GenTree* tree, ArrIndex* result, unsigned lhsNum, bool * topLevelIsFinal )
22072208{
22082209 if (tree->gtOper != GT_COMMA)
22092210 {
@@ -2247,37 +2248,31 @@ bool Compiler::optExtractArrIndex(GenTree* tree, ArrIndex* result, unsigned lhsN
22472248 result->useBlock = compCurBB;
22482249 result->rank ++;
22492250
2251+ // If the array element type (saved from the GT_INDEX node during morphing) is anything but
2252+ // TYP_REF, then it must the the final level of jagged array.
2253+ assert (arrBndsChk->gtInxType != TYP_VOID);
2254+ *topLevelIsFinal = (arrBndsChk->gtInxType != TYP_REF);
2255+
22502256 return true ;
22512257}
22522258
22532259// ---------------------------------------------------------------------------------------------------------------
2254- // optReconstructArrIndex: Reconstruct array index .
2260+ // optReconstructArrIndexHelp: Helper function for optReconstructArrIndex. See that function for more details .
22552261//
22562262// Arguments:
2257- // tree the tree to be checked if it is an array [][][] operation.
2258- // result OUT: the extracted GT_INDEX information.
2259- // lhsNum for the root level (function is recursive) callers should pass BAD_VAR_NUM.
2263+ // tree the tree to be checked if it is an array [][][] operation.
2264+ // result OUT: the extracted GT_INDEX information.
2265+ // lhsNum var number of array object we're looking for.
2266+ // topLevelIsFinal OUT: set to `true` if we reached a non-TYP_REF element type array.
22602267//
22612268// Return Value:
22622269// Returns true if array index can be extracted, else, return false. "rank" field in
2263- // "result" contains the array access depth. The "indLcls" fields contain the indices.
2264- //
2265- // Operation:
2266- // Recursively look for a list of array indices. For example, if the tree is
2267- // V03 = (V05 = V00[V01]), V05[V02]
2268- // that corresponds to access of V00[V01][V02]. The return value would then be:
2269- // ArrIndex result { arrLcl: V00, indLcls: [V01, V02], rank: 2 }
2270- //
2271- // Note that the array expression is implied by the array bounds check under the COMMA, and the array bounds
2272- // checks is what is parsed from the morphed tree; the array addressing expression is not parsed.
2273- //
2274- // Assumption:
2275- // The method extracts only if the array base and indices are GT_LCL_VAR.
2270+ // "result" contains the array access depth. The "indLcls" field contains the indices.
22762271//
2277- bool Compiler::optReconstructArrIndex (GenTree* tree, ArrIndex* result, unsigned lhsNum)
2272+ bool Compiler::optReconstructArrIndexHelp (GenTree* tree, ArrIndex* result, unsigned lhsNum, bool * topLevelIsFinal )
22782273{
22792274 // If we can extract "tree" (which is a top level comma) return.
2280- if (optExtractArrIndex (tree, result, lhsNum))
2275+ if (optExtractArrIndex (tree, result, lhsNum, topLevelIsFinal ))
22812276 {
22822277 return true ;
22832278 }
@@ -2294,18 +2289,152 @@ bool Compiler::optReconstructArrIndex(GenTree* tree, ArrIndex* result, unsigned
22942289 GenTree* rhs = before->gtGetOp2 ();
22952290
22962291 // "rhs" should contain an GT_INDEX
2297- if (!lhs->IsLocal () || !optReconstructArrIndex (rhs, result, lhsNum))
2292+ if (!lhs->IsLocal () || !optReconstructArrIndexHelp (rhs, result, lhsNum, topLevelIsFinal ))
22982293 {
22992294 return false ;
23002295 }
2296+
2297+ // If rhs represents an array of elements other than arrays (e.g., an array of structs),
2298+ // then we can't go any farther.
2299+ if (*topLevelIsFinal)
2300+ {
2301+ return false ;
2302+ }
2303+
23012304 unsigned lhsNum = lhs->AsLclVarCommon ()->GetLclNum ();
23022305 GenTree* after = tree->gtGetOp2 ();
23032306 // Pass the "lhsNum", so we can verify if indeed it is used as the array base.
2304- return optExtractArrIndex (after, result, lhsNum);
2307+ return optExtractArrIndex (after, result, lhsNum, topLevelIsFinal );
23052308 }
23062309 return false ;
23072310}
23082311
2312+ // ---------------------------------------------------------------------------------------------------------------
2313+ // optReconstructArrIndex: Reconstruct array index from a post-morph tree.
2314+ //
2315+ // Arguments:
2316+ // tree the tree to be checked if it is an array [][][] operation.
2317+ // result OUT: the extracted GT_INDEX information.
2318+ //
2319+ // Return Value:
2320+ // Returns true if array index can be extracted, else, return false. "rank" field in
2321+ // "result" contains the array access depth. The "indLcls" field contains the indices.
2322+ //
2323+ // Operation:
2324+ // Recursively look for a list of array indices. For example, if the tree is
2325+ // V03 = (V05 = V00[V01]), V05[V02]
2326+ // that corresponds to access of V00[V01][V02]. The return value would then be:
2327+ // ArrIndex result { arrLcl: V00, indLcls: [V01, V02], rank: 2 }
2328+ //
2329+ // Note that the array expression is implied by the array bounds check under the COMMA, and the array bounds
2330+ // checks is what is parsed from the morphed tree; the array addressing expression is not parsed.
2331+ // However, the array bounds checks are not quite sufficient because of the way "morph" alters the trees.
2332+ // Specifically, we normally see a COMMA node with a LHS of the morphed array INDEX expression and RHS
2333+ // of the bounds check. E.g., for int[][], a[i][j] we have a pre-morph tree:
2334+ //
2335+ // \--* INDEX int
2336+ // +--* INDEX ref
2337+ // | +--* LCL_VAR ref V00 loc0
2338+ // | \--* LCL_VAR int V02 loc2
2339+ // \--* LCL_VAR int V03 loc3
2340+ //
2341+ // and post-morph tree:
2342+ //
2343+ // \--* COMMA int
2344+ // +--* ASG ref
2345+ // | +--* LCL_VAR ref V19 tmp12
2346+ // | \--* COMMA ref
2347+ // | +--* BOUNDS_CHECK_Rng void
2348+ // | | +--* LCL_VAR int V02 loc2
2349+ // | | \--* ARR_LENGTH int
2350+ // | | \--* LCL_VAR ref V00 loc0
2351+ // | \--* IND ref
2352+ // | \--* ADD byref
2353+ // | +--* LCL_VAR ref V00 loc0
2354+ // | \--* ADD long
2355+ // | +--* LSH long
2356+ // | | +--* CAST long <- uint
2357+ // | | | \--* LCL_VAR int V02 loc2
2358+ // | | \--* CNS_INT long 3
2359+ // | \--* CNS_INT long 16 Fseq[#FirstElem]
2360+ // \--* COMMA int
2361+ // +--* BOUNDS_CHECK_Rng void
2362+ // | +--* LCL_VAR int V03 loc3
2363+ // | \--* ARR_LENGTH int
2364+ // | \--* LCL_VAR ref V19 tmp12
2365+ // \--* IND int
2366+ // \--* ADD byref
2367+ // +--* LCL_VAR ref V19 tmp12
2368+ // \--* ADD long
2369+ // +--* LSH long
2370+ // | +--* CAST long <- uint
2371+ // | | \--* LCL_VAR int V03 loc3
2372+ // | \--* CNS_INT long 2
2373+ // \--* CNS_INT long 16 Fseq[#FirstElem]
2374+ //
2375+ // However, for an array of structs that contains an array field, e.g. ValueTuple<int[], int>[], expression
2376+ // a[i].Item1[j],
2377+ //
2378+ // \--* INDEX int
2379+ // +--* FIELD ref Item1
2380+ // | \--* ADDR byref
2381+ // | \--* INDEX struct<System.ValueTuple`2[System.Int32[],System.Int32], 16>
2382+ // | +--* LCL_VAR ref V01 loc1
2383+ // | \--* LCL_VAR int V04 loc4
2384+ // \--* LCL_VAR int V06 loc6
2385+ //
2386+ // Morph "hoists" the bounds check above the struct field access:
2387+ //
2388+ // \--* COMMA int
2389+ // +--* ASG ref
2390+ // | +--* LCL_VAR ref V23 tmp16
2391+ // | \--* COMMA ref
2392+ // | +--* BOUNDS_CHECK_Rng void
2393+ // | | +--* LCL_VAR int V04 loc4
2394+ // | | \--* ARR_LENGTH int
2395+ // | | \--* LCL_VAR ref V01 loc1
2396+ // | \--* IND ref
2397+ // | \--* ADDR byref Zero Fseq[Item1]
2398+ // | \--* IND struct<System.ValueTuple`2[System.Int32[],System.Int32], 16>
2399+ // | \--* ADD byref
2400+ // | +--* LCL_VAR ref V01 loc1
2401+ // | \--* ADD long
2402+ // | +--* LSH long
2403+ // | | +--* CAST long <- uint
2404+ // | | | \--* LCL_VAR int V04 loc4
2405+ // | | \--* CNS_INT long 4
2406+ // | \--* CNS_INT long 16 Fseq[#FirstElem]
2407+ // \--* COMMA int
2408+ // +--* BOUNDS_CHECK_Rng void
2409+ // | +--* LCL_VAR int V06 loc6
2410+ // | \--* ARR_LENGTH int
2411+ // | \--* LCL_VAR ref V23 tmp16
2412+ // \--* IND int
2413+ // \--* ADD byref
2414+ // +--* LCL_VAR ref V23 tmp16
2415+ // \--* ADD long
2416+ // +--* LSH long
2417+ // | +--* CAST long <- uint
2418+ // | | \--* LCL_VAR int V06 loc6
2419+ // | \--* CNS_INT long 2
2420+ // \--* CNS_INT long 16 Fseq[#FirstElem]
2421+ //
2422+ // This should not be parsed as a jagged array (e.g., a[i][j]). To ensure that it is not, the type of the
2423+ // GT_INDEX node is stashed in the GT_BOUNDS_CHECK node during morph. If we see a bounds check node where
2424+ // the GT_INDEX was not TYP_REF, then it must be the outermost jagged array level. E.g., if it is
2425+ // TYP_STRUCT, then we have an array of structs, and any further bounds checks must be of one of its fields.
2426+ //
2427+ // It would be much better if we didn't need to parse these trees at all, and did all this work pre-morph.
2428+ //
2429+ // Assumption:
2430+ // The method extracts only if the array base and indices are GT_LCL_VAR.
2431+ //
2432+ bool Compiler::optReconstructArrIndex (GenTree* tree, ArrIndex* result)
2433+ {
2434+ bool topLevelIsFinal = false ;
2435+ return optReconstructArrIndexHelp (tree, result, BAD_VAR_NUM, &topLevelIsFinal);
2436+ }
2437+
23092438// ----------------------------------------------------------------------------------------------
23102439// optCanOptimizeByLoopCloning: Check if the tree can be optimized by loop cloning and if so,
23112440// identify as potential candidate and update the loop context.
@@ -2329,7 +2458,7 @@ Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTree* tree, Loop
23292458 ArrIndex arrIndex (getAllocator (CMK_LoopClone));
23302459
23312460 // Check if array index can be optimized.
2332- if (optReconstructArrIndex (tree, &arrIndex, BAD_VAR_NUM ))
2461+ if (optReconstructArrIndex (tree, &arrIndex))
23332462 {
23342463 assert (tree->gtOper == GT_COMMA);
23352464
0 commit comments