Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -57,18 +57,23 @@ public static void Example()

IDataView trainDataset = trainTestData.TrainSet;
IDataView testDataset = trainTestData.TestSet;

var pipeline = mlContext.Model.ImageClassification(
"ImagePath", "Label",
// Just by changing/selecting InceptionV3 here instead of
// ResnetV2101 you can try a different architecture/pre-trained
// model.
arch: ImageClassificationEstimator.Architecture.ResnetV2101,
batchSize: 10,
learningRate: 0.01f,
earlyStopping: new ImageClassificationEstimator.EarlyStopping(minDelta: 0.001f, patience:20, metric:ImageClassificationEstimator.EarlyStoppingMetric.Loss),
metricsCallback: (metrics) => Console.WriteLine(metrics),
validationSet: testDataset);

var validationSet = mlContext.Transforms.LoadImages("Image", fullImagesetFolderPath, false, "ImagePath") // false indicates we want the image as a VBuffer<byte>
.Fit(testDataset)
.Transform(testDataset);

var pipeline = mlContext.Transforms.LoadImages("Image", fullImagesetFolderPath, false, "ImagePath") // false indicates we want the image as a VBuffer<byte>
.Append(mlContext.Model.ImageClassification(
"Image", "Label",
// Just by changing/selecting InceptionV3 here instead of
// ResnetV2101 you can try a different architecture/pre-trained
// model.
arch: ImageClassificationEstimator.Architecture.ResnetV2101,
batchSize: 10,
learningRate: 0.01f,
earlyStopping: new ImageClassificationEstimator.EarlyStopping(minDelta: 0.001f, patience: 20, metric: ImageClassificationEstimator.EarlyStoppingMetric.Loss),
metricsCallback: (metrics) => Console.WriteLine(metrics),
validationSet: validationSet));


Console.WriteLine("*** Training the image classification model with " +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ public static void Example()
//Download the image set and unzip
string finalImagesFolderName = DownloadImageSet(
imagesDownloadFolderPath);

string fullImagesetFolderPath = Path.Combine(
imagesDownloadFolderPath, finalImagesFolderName);

Expand Down Expand Up @@ -58,19 +57,25 @@ public static void Example()
IDataView trainDataset = trainTestData.TrainSet;
IDataView testDataset = trainTestData.TestSet;

var pipeline = mlContext.Model.ImageClassification(
"ImagePath", "Label",
// Just by changing/selecting InceptionV3 here instead of
// ResnetV2101 you can try a different architecture/pre-trained
// model.
arch: ImageClassificationEstimator.Architecture.ResnetV2101,
epoch: 50,
batchSize: 10,
learningRate: 0.01f,
metricsCallback: (metrics) => Console.WriteLine(metrics),
validationSet: testDataset,
disableEarlyStopping: true)
.Append(mlContext.Transforms.Conversion.MapKeyToValue(outputColumnName: "PredictedLabel", inputColumnName: "PredictedLabel"));
var validationSet = mlContext.Transforms.LoadImages("Image", fullImagesetFolderPath, false, "ImagePath") // false indicates we want the image as a VBuffer<byte>
.Fit(testDataset)
.Transform(testDataset);

var pipeline = mlContext.Transforms.LoadImages("Image", fullImagesetFolderPath, false, "ImagePath") // false indicates we want the image as a VBuffer<byte>
.Append(mlContext.Model.ImageClassification(
"Image", "Label",
// Just by changing/selecting InceptionV3 here instead of
// ResnetV2101 you can try a different architecture/pre-trained
// model.
arch: ImageClassificationEstimator.Architecture.ResnetV2101,
epoch: 50,
batchSize: 10,
learningRate: 0.01f,
metricsCallback: (metrics) => Console.WriteLine(metrics),
validationSet: validationSet,
disableEarlyStopping: true)
.Append(mlContext.Transforms.Conversion.MapKeyToValue(outputColumnName: "PredictedLabel", inputColumnName: "PredictedLabel")));


Console.WriteLine("*** Training the image classification model with " +
"DNN Transfer Learning on top of the selected pre-trained " +
Expand Down Expand Up @@ -98,6 +103,7 @@ public static void Example()
EvaluateModel(mlContext, testDataset, loadedModel);

watch = System.Diagnostics.Stopwatch.StartNew();

TrySinglePrediction(fullImagesetFolderPath, mlContext, loadedModel);

watch.Stop();
Expand Down Expand Up @@ -125,6 +131,9 @@ private static void TrySinglePrediction(string imagesForPredictions,
IEnumerable<ImageData> testImages = LoadImagesFromDirectory(
imagesForPredictions, false);

byte[] imgBytes = File.ReadAllBytes(testImages.First().ImagePath);
VBuffer<Byte> imgData = new VBuffer<byte>(imgBytes.Length, imgBytes);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It doesn't look like this in-memory image is being used for the Prediction when using the PredictionEngine, right?
In fact, looks like this object 'imgData' is not used later on?

We need to have and test Predictions by providing the in-memory image instead of file paths.
I cannot find such a code.


ImageData imageToPredict = new ImageData
{
ImagePath = testImages.First().ImagePath
Expand Down Expand Up @@ -160,13 +169,12 @@ private static void EvaluateModel(MLContext mlContext,
Console.WriteLine("Predicting and Evaluation took: " +
(elapsed2Ms / 1000).ToString() + " seconds");
}

public static IEnumerable<ImageData> LoadImagesFromDirectory(string folder,
bool useFolderNameAsLabel = true)
{
var files = Directory.GetFiles(folder, "*",
searchOption: SearchOption.AllDirectories);

foreach (var file in files)
{
if (Path.GetExtension(file) != ".jpg")
Expand All @@ -186,7 +194,7 @@ public static IEnumerable<ImageData> LoadImagesFromDirectory(string folder,
}
}
}

yield return new ImageData()
{
ImagePath = file,
Expand Down Expand Up @@ -299,4 +307,3 @@ public class ImagePrediction
}
}
}

Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,5 @@
<ItemGroup>
<ProjectReference Include="../Microsoft.ML/Microsoft.ML.nupkgproj" />
<PackageReference Include="System.Drawing.Common" Version="$(SystemDrawingCommonPackageVersion)" />
</ItemGroup>

</ItemGroup>
</Project>
61 changes: 40 additions & 21 deletions src/Microsoft.ML.Dnn/ImageClassificationTransform.cs
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,27 @@ private void CheckTrainingParameters(ImageClassificationEstimator.Options option
return (jpegData, resizedImage);
}

private static Tensor EncodeByteAsString(VBuffer<byte> buffer)
{
int length = buffer.Length;
var size = c_api.TF_StringEncodedSize((UIntPtr)length);
var handle = c_api.TF_AllocateTensor(TF_DataType.TF_STRING, IntPtr.Zero, 0, (UIntPtr)((ulong)size + 8));

IntPtr tensor = c_api.TF_TensorData(handle);
Marshal.WriteInt64(tensor, 0);

var status = new Status();
unsafe
{
fixed (byte* src = buffer.GetValues())
c_api.TF_StringEncode(src, (UIntPtr)length, (sbyte*)(tensor + sizeof(Int64)), size, status);
}

status.Check(true);
status.Dispose();
return new Tensor(handle);
}

private sealed class ImageProcessor
{
private Runner _imagePreprocessingRunner;
Expand All @@ -214,16 +235,16 @@ public ImageProcessor(ImageClassificationTransformer transformer)
_imagePreprocessingRunner.AddOutputs(transformer._resizedImageTensorName);
}

public Tensor ProcessImage(string path)
public Tensor ProcessImage(in VBuffer<byte> imageBuffer)
{
var imageTensor = new Tensor(File.ReadAllBytes(path), TF_DataType.TF_STRING);
var imageTensor = EncodeByteAsString(imageBuffer);
var processedTensor = _imagePreprocessingRunner.AddInput(imageTensor, 0).Run()[0];
imageTensor.Dispose();
return processedTensor;
}
}

private void CacheFeaturizedImagesToDisk(IDataView input, string labelColumnName, string imagepathColumnName,
private void CacheFeaturizedImagesToDisk(IDataView input, string labelColumnName, string imageColumnName,
ImageProcessor imageProcessor, string inputTensorName, string outputTensorName, string cacheFilePath,
ImageClassificationMetrics.Dataset dataset, ImageClassificationMetricsCallback metricsCallback)
{
Expand All @@ -234,17 +255,17 @@ private void CacheFeaturizedImagesToDisk(IDataView input, string labelColumnName
labelColumnName, typeof(uint).ToString(),
labelColumn.Type.RawType.ToString());

var imagePathColumn = input.Schema[imagepathColumnName];
var imageColumn = input.Schema[imageColumnName];
Runner runner = new Runner(_session);
runner.AddOutputs(outputTensorName);

using (TextWriter writer = File.CreateText(cacheFilePath))
using (var cursor = input.GetRowCursor(input.Schema.Where(c => c.Index == labelColumn.Index || c.Index == imagePathColumn.Index)))
using (var cursor = input.GetRowCursor(input.Schema.Where(c => c.Index == labelColumn.Index || c.Index == imageColumn.Index)))
{
var labelGetter = cursor.GetGetter<uint>(labelColumn);
var imagePathGetter = cursor.GetGetter<ReadOnlyMemory<char>>(imagePathColumn);
var imageGetter = cursor.GetGetter<VBuffer<byte>>(imageColumn);
UInt32 label = UInt32.MaxValue;
ReadOnlyMemory<char> imagePath = default;
VBuffer<byte> image = default;
runner.AddInput(inputTensorName);
ImageClassificationMetrics metrics = new ImageClassificationMetrics();
metrics.Bottleneck = new BottleneckMetrics();
Expand All @@ -253,9 +274,8 @@ private void CacheFeaturizedImagesToDisk(IDataView input, string labelColumnName
while (cursor.MoveNext())
{
labelGetter(ref label);
imagePathGetter(ref imagePath);
var imagePathStr = imagePath.ToString();
var imageTensor = imageProcessor.ProcessImage(imagePathStr);
imageGetter(ref image);
var imageTensor = imageProcessor.ProcessImage(image);
runner.AddInput(imageTensor, 0);
var featurizedImage = runner.Run()[0]; // Reuse memory
featurizedImage.ToArray<float>(ref imageArray);
Expand All @@ -264,7 +284,6 @@ private void CacheFeaturizedImagesToDisk(IDataView input, string labelColumnName
featurizedImage.Dispose();
imageTensor.Dispose();
metrics.Bottleneck.Index++;
metrics.Bottleneck.Name = imagePathStr;
metricsCallback?.Invoke(metrics);
}
}
Expand Down Expand Up @@ -878,8 +897,8 @@ public Mapper(ImageClassificationTransformer parent, DataViewSchema inputSchema)
private class OutputCache
{
public long Position;
private ValueGetter<ReadOnlyMemory<char>> _imagePathGetter;
private ReadOnlyMemory<char> _imagePath;
private ValueGetter<VBuffer<byte>> _imageGetter;
private VBuffer<byte> _image;
private Runner _runner;
private ImageProcessor _imageProcessor;
private long _predictedLabel;
Expand All @@ -890,8 +909,8 @@ private class OutputCache

public OutputCache(DataViewRow input, ImageClassificationTransformer transformer)
{
_imagePath = default;
_imagePathGetter = input.GetGetter<ReadOnlyMemory<char>>(input.Schema[transformer._inputs[0]]);
_image = default;
_imageGetter = input.GetGetter<VBuffer<byte>>(input.Schema[transformer._inputs[0]]);
_runner = new Runner(transformer._session);
_runner.AddInput(transformer._inputTensorName);
_runner.AddOutputs(transformer._softmaxTensorName);
Expand All @@ -908,8 +927,8 @@ public void UpdateCacheIfNeeded()
if (_inputRow.Position != Position)
{
Position = _inputRow.Position;
_imagePathGetter(ref _imagePath);
var processedTensor = _imageProcessor.ProcessImage(_imagePath.ToString());
_imageGetter(ref _image);
var processedTensor = _imageProcessor.ProcessImage(_image);
var outputTensor = _runner.AddInput(processedTensor, 0).Run();
outputTensor[0].ToArray<float>(ref _classProbability);
outputTensor[1].ToScalar<long>(ref _predictedLabel);
Expand Down Expand Up @@ -1365,15 +1384,15 @@ internal sealed class Options : TransformInputBase
private readonly IHost _host;
private readonly Options _options;
private readonly DnnModel _dnnModel;
private readonly TF_DataType[] _tfInputTypes;
private readonly DataViewType[] _inputTypes;
private ImageClassificationTransformer _transformer;

internal ImageClassificationEstimator(IHostEnvironment env, Options options, DnnModel dnnModel)
{
_host = Contracts.CheckRef(env, nameof(env)).Register(nameof(ImageClassificationEstimator));
_options = options;
_dnnModel = dnnModel;
_tfInputTypes = new[] { TF_DataType.TF_STRING };
_inputTypes = new[] { new VectorDataViewType(NumberDataViewType.Byte) };
}

private static Options CreateArguments(DnnModel tensorFlowModel, string[] outputColumnNames, string[] inputColumnName, bool addBatchDimensionInput)
Expand All @@ -1399,8 +1418,8 @@ public SchemaShape GetOutputSchema(SchemaShape inputSchema)
var input = _options.InputColumns[i];
if (!inputSchema.TryFindColumn(input, out var col))
throw _host.ExceptSchemaMismatch(nameof(inputSchema), "input", input);
var expectedType = DnnUtils.Tf2MlNetType(_tfInputTypes[i]);
if (col.ItemType != expectedType)
var expectedType = _inputTypes[i];
if (!col.ItemType.Equals(expectedType.GetItemType()))
throw _host.ExceptSchemaMismatch(nameof(inputSchema), "input", input, expectedType.ToString(), col.ItemType.ToString());
}

Expand Down
27 changes: 10 additions & 17 deletions src/Microsoft.ML.ImageAnalytics/ExtensionsCatalog.cs
Original file line number Diff line number Diff line change
Expand Up @@ -69,34 +69,27 @@ internal static ImageGrayscalingEstimator ConvertToGrayscale(this TransformsCata
/// ]]></format>
/// </example>
public static ImageLoadingEstimator LoadImages(this TransformsCatalog catalog, string outputColumnName, string imageFolder, string inputColumnName = null)
=> new ImageLoadingEstimator(CatalogUtils.GetEnvironment(catalog), imageFolder, new[] { (outputColumnName, inputColumnName ?? outputColumnName) });
=> new ImageLoadingEstimator(CatalogUtils.GetEnvironment(catalog), imageFolder, true, new[] { (outputColumnName, inputColumnName ?? outputColumnName) });

/// <summary>
/// Loads the images from the <see cref="ImageLoadingTransformer.ImageFolder" /> into memory.
/// Create a <see cref="ImageLoadingEstimator"/>, which loads the data from the column specified in <paramref name="inputColumnName"/>
/// as an image to a new column: <paramref name="outputColumnName"/>.
/// </summary>
/// <remarks>
/// The image get loaded in memory as a <see cref="System.Drawing.Bitmap" /> type.
/// Loading is the first step of almost every pipeline that does image processing, and further analysis on images.
/// The images to load need to be in the formats supported by <see cref = "System.Drawing.Bitmap" />.
/// For end-to-end image processing pipelines, and scenarios in your applications, see the
/// <a href="https://github.com/dotnet/machinelearning-samples/tree/master/samples/csharp/getting-started"> examples in the machinelearning-samples github repository.</a>
/// </remarks>
/// <param name="catalog">The transform's catalog.</param>
/// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.
/// This column's data type will be <see cref="VectorDataViewType"/>.</param>
/// <param name="inputColumnName">Name of the column with paths to the images to load.
/// This estimator operates over text data.</param>
/// <param name="imageFolder">Folder where to look for images.</param>
/// <param name="columns">Specifies the names of the input columns for the transformation, and their respective output column names.</param>
/// <param name="useImageType">Image type flag - If true loads image as a ImageDataViewType type else loads image as VectorDataViewType. Defaults to ImageDataViewType if not specified or is true.</param>
/// <example>
/// <format type="text/markdown">
/// <![CDATA[
/// [!code-csharp[LoadImages](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/LoadImages.cs)]
/// ]]></format>
/// </example>
[BestFriend]
internal static ImageLoadingEstimator LoadImages(this TransformsCatalog catalog, string imageFolder, params InputOutputColumnPair[] columns)
{
var env = CatalogUtils.GetEnvironment(catalog);
env.CheckValue(columns, nameof(columns));
return new ImageLoadingEstimator(env, imageFolder, InputOutputColumnPair.ConvertToValueTuples(columns));
}
public static ImageLoadingEstimator LoadImages(this TransformsCatalog catalog, string outputColumnName, string imageFolder, bool useImageType, string inputColumnName = null)
=> new ImageLoadingEstimator(CatalogUtils.GetEnvironment(catalog), imageFolder, useImageType, new[] { (outputColumnName, inputColumnName ?? outputColumnName) });

/// <summary>
/// Create a <see cref="ImagePixelExtractingEstimator"/>, which extracts pixels values from the data specified in column: <paramref name="inputColumnName"/>
Expand Down
Loading