Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
using System;
using System.Collections.Generic;
using Microsoft.ML;
using Microsoft.ML.Data;
using Microsoft.ML.Featurizers;

namespace Samples.Dynamic
{
public static class DateTimeTransformer
{
private class DateTimeInput
{
public long Date;
}

public static void Example()
{
// Create a new ML context, for ML.NET operations. It can be used for
// exception tracking and logging, as well as the source of randomness.
var mlContext = new MLContext();

// Create a small dataset as an IEnumerable.
// Future Date - 2025 June 30
var samples = new[] { new DateTimeInput() { Date = 1751241600 } };

// Convert training data to IDataView.
var dataview = mlContext.Data.LoadFromEnumerable(samples);

// A pipeline for splitting the time features into individual columns
var pipeline = mlContext.Transforms.FeaturizeDateTime("Date", "DTC");

// The transformed data.
var transformedData = pipeline.Fit(dataview).Transform(dataview);

// Now let's take a look at what this did. We should have created 21 more columns with all the
// DateTime information split into its own columns
var featuresColumn = mlContext.Data.CreateEnumerable<TransformedData>(
transformedData, reuseRowObject: false);

// And we can write out a few rows
Console.WriteLine($"Features column obtained post-transformation.");
foreach (var featureRow in featuresColumn)
Console.WriteLine(featureRow.Date + ", " + featureRow.DTCYear + ", " + featureRow.DTCMonth + ", " +
featureRow.DTCDay + ", " + featureRow.DTCHour + ", " + featureRow.DTCMinute + ", " +
featureRow.DTCSecond + ", " + featureRow.DTCAmPm + ", " + featureRow.DTCHour12 + ", " +
featureRow.DTCDayOfWeek + ", " + featureRow.DTCDayOfQuarter + ", " + featureRow.DTCDayOfYear +
", " + featureRow.DTCWeekOfMonth + ", " + featureRow.DTCQuarterOfYear + ", " + featureRow.DTCHalfOfYear +
", " + featureRow.DTCWeekIso + ", " + featureRow.DTCYearIso + ", " + featureRow.DTCMonthLabel + ", " +
featureRow.DTCAmPmLabel + ", " + featureRow.DTCDayOfWeekLabel + ", " + featureRow.DTCHolidayName + ", " +
featureRow.DTCIsPaidTimeOff);

// Expected output:
// Features columns obtained post-transformation.
// 1751241600, 2025, 6, 30, 0, 0, 0, 0, 0, 1, 91, 180, 4, 2, 1, 27, 2025, June, am, Monday, , 0
}

// These columns start with DTC because that is the prefix we picked
private sealed class TransformedData
{
public long Date { get; set; }
public int DTCYear { get; set; }
public byte DTCMonth { get; set; }
public byte DTCDay { get; set; }
public byte DTCHour { get; set; }
public byte DTCMinute { get; set; }
public byte DTCSecond { get; set; }
public byte DTCAmPm { get; set; }
public byte DTCHour12 { get; set; }
public byte DTCDayOfWeek { get; set; }
public byte DTCDayOfQuarter { get; set; }
public ushort DTCDayOfYear { get; set; }
public ushort DTCWeekOfMonth { get; set; }
public byte DTCQuarterOfYear { get; set; }
public byte DTCHalfOfYear { get; set; }
public byte DTCWeekIso { get; set; }
public int DTCYearIso { get; set; }
public string DTCMonthLabel { get; set; }
public string DTCAmPmLabel { get; set; }
public string DTCDayOfWeekLabel { get; set; }
public string DTCHolidayName { get; set; }
public byte DTCIsPaidTimeOff { get; set; }
}
}
}
27 changes: 27 additions & 0 deletions src/Microsoft.ML.Featurizers/Common.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Runtime.InteropServices;
using System.Security;
using System.Text;
Expand Down Expand Up @@ -219,5 +220,31 @@ internal static TypeId GetNativeTypeIdFromType(this Type type)

throw new InvalidOperationException($"Unsupported type {type}");
}

// The Native Featurizers do not currently support CentOS7, this method checks the OS and returns true if it is CentOS7.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about RedHat 7?

internal static bool OsIsCentOS7()
{
if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux))
{
using (Process process = new Process())
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A more performant way of doing this check would be to read from the file directly. Starting a new process just to get this info is too heavy-weight IMO.

See https://github.com/dotnet/runtime/blob/6f445d6dc237f59d730e0df47f8630b18887d776/src/installer/managed/Microsoft.DotNet.PlatformAbstractions/Native/PlatformApis.cs#L140-L142

{
process.StartInfo.FileName = "/bin/bash";
process.StartInfo.Arguments = "-c \"cat /etc/*-release\"";
process.StartInfo.UseShellExecute = false;
process.StartInfo.RedirectStandardOutput = true;
process.StartInfo.CreateNoWindow = true;
process.Start();

string distro = process.StandardOutput.ReadToEnd().Trim();

process.WaitForExit();
if (distro.Contains("CentOS Linux 7"))
{
return true;
}
}
}
return false;
}
}
}
Loading