|
| 1 | +// Licensed to the .NET Foundation under one or more agreements. |
| 2 | +// The .NET Foundation licenses this file to you under the MIT license. |
| 3 | +// See the LICENSE file in the project root for more information. |
| 4 | + |
| 5 | +using System; |
| 6 | +using System.Collections; |
| 7 | +using Microsoft.ML.Runtime.Data; |
| 8 | +using Microsoft.ML.Runtime.Internal.Utilities; |
| 9 | + |
| 10 | +namespace Microsoft.ML.Runtime.Ensemble |
| 11 | +{ |
| 12 | + internal static class EnsembleUtils |
| 13 | + { |
| 14 | + /// <summary> |
| 15 | + /// Return a dataset with non-selected features zeroed out. |
| 16 | + /// </summary> |
| 17 | + public static RoleMappedData SelectFeatures(IHost host, RoleMappedData data, BitArray features) |
| 18 | + { |
| 19 | + Contracts.AssertValue(host); |
| 20 | + Contracts.AssertValue(data); |
| 21 | + Contracts.Assert(data.Schema.Feature != null); |
| 22 | + Contracts.AssertValue(features); |
| 23 | + |
| 24 | + var type = data.Schema.Feature.Type; |
| 25 | + Contracts.Assert(features.Length == type.VectorSize); |
| 26 | + int card = Utils.GetCardinality(features); |
| 27 | + if (card == type.VectorSize) |
| 28 | + return data; |
| 29 | + |
| 30 | + // REVIEW: This doesn't preserve metadata on the features column. Should it? |
| 31 | + var name = data.Schema.Feature.Name; |
| 32 | + var view = LambdaColumnMapper.Create( |
| 33 | + host, "FeatureSelector", data.Data, name, name, type, type, |
| 34 | + (ref VBuffer<Single> src, ref VBuffer<Single> dst) => SelectFeatures(ref src, features, card, ref dst)); |
| 35 | + |
| 36 | + var res = RoleMappedData.Create(view, data.Schema.GetColumnRoleNames()); |
| 37 | + return res; |
| 38 | + } |
| 39 | + |
| 40 | + /// <summary> |
| 41 | + /// Fill dst with values selected from src if the indices of the src values are set in includedIndices, |
| 42 | + /// otherwise assign default(T). The length of dst will be equal to src.Length. |
| 43 | + /// </summary> |
| 44 | + public static void SelectFeatures<T>(ref VBuffer<T> src, BitArray includedIndices, int cardinality, ref VBuffer<T> dst) |
| 45 | + { |
| 46 | + Contracts.Assert(Utils.Size(includedIndices) == src.Length); |
| 47 | + Contracts.Assert(cardinality == Utils.GetCardinality(includedIndices)); |
| 48 | + Contracts.Assert(cardinality < src.Length); |
| 49 | + |
| 50 | + var values = dst.Values; |
| 51 | + var indices = dst.Indices; |
| 52 | + |
| 53 | + if (src.IsDense) |
| 54 | + { |
| 55 | + if (cardinality >= src.Length / 2) |
| 56 | + { |
| 57 | + T defaultValue = default; |
| 58 | + if (Utils.Size(values) < src.Length) |
| 59 | + values = new T[src.Length]; |
| 60 | + for (int i = 0; i < src.Length; i++) |
| 61 | + values[i] = !includedIndices[i] ? defaultValue : src.Values[i]; |
| 62 | + dst = new VBuffer<T>(src.Length, values, indices); |
| 63 | + } |
| 64 | + else |
| 65 | + { |
| 66 | + if (Utils.Size(values) < cardinality) |
| 67 | + values = new T[cardinality]; |
| 68 | + if (Utils.Size(indices) < cardinality) |
| 69 | + indices = new int[cardinality]; |
| 70 | + |
| 71 | + int count = 0; |
| 72 | + for (int i = 0; i < src.Length; i++) |
| 73 | + { |
| 74 | + if (includedIndices[i]) |
| 75 | + { |
| 76 | + Contracts.Assert(count < cardinality); |
| 77 | + values[count] = src.Values[i]; |
| 78 | + indices[count] = i; |
| 79 | + count++; |
| 80 | + } |
| 81 | + } |
| 82 | + |
| 83 | + Contracts.Assert(count == cardinality); |
| 84 | + dst = new VBuffer<T>(src.Length, count, values, indices); |
| 85 | + } |
| 86 | + } |
| 87 | + else |
| 88 | + { |
| 89 | + int valuesSize = Utils.Size(values); |
| 90 | + int indicesSize = Utils.Size(indices); |
| 91 | + if (valuesSize < src.Count || indicesSize < src.Count) |
| 92 | + { |
| 93 | + if (valuesSize < cardinality) |
| 94 | + values = new T[cardinality]; |
| 95 | + if (indicesSize < cardinality) |
| 96 | + indices = new int[cardinality]; |
| 97 | + } |
| 98 | + |
| 99 | + int count = 0; |
| 100 | + for (int i = 0; i < src.Count; i++) |
| 101 | + { |
| 102 | + if (includedIndices[src.Indices[i]]) |
| 103 | + { |
| 104 | + values[count] = src.Values[i]; |
| 105 | + indices[count] = src.Indices[i]; |
| 106 | + count++; |
| 107 | + } |
| 108 | + } |
| 109 | + |
| 110 | + dst = new VBuffer<T>(src.Length, count, values, indices); |
| 111 | + } |
| 112 | + } |
| 113 | + } |
| 114 | +} |
0 commit comments