Skip to content

Commit

Permalink
Refactoring Reading Text Files Part I
Browse files Browse the repository at this point in the history
  • Loading branch information
Dirkster99 committed May 23, 2020
1 parent ce35974 commit cdc2d5d
Show file tree
Hide file tree
Showing 6 changed files with 292 additions and 18 deletions.
2 changes: 2 additions & 0 deletions source/AehnlichLib/AehnlichLib.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@
<Compile Include="Enums\EditType.cs" />
<Compile Include="Enums\HashType.cs" />
<Compile Include="Enums\DiffDirFileMode.cs" />
<Compile Include="Files\AsyncPump.cs" />
<Compile Include="Files\FileEx.cs" />
<Compile Include="Interfaces\Dir\IDataSource.cs" />
<Compile Include="Interfaces\Dir\IDataSourceFactory.cs" />
<Compile Include="Interfaces\Dir\IDirectoryInfo.cs" />
Expand Down
7 changes: 6 additions & 1 deletion source/AehnlichLib/Binaries/BinaryDiffLines.cs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,12 @@ public sealed class BinaryDiffLines
#endregion

#region Constructors

/// <summary>
/// Class constructor
/// </summary>
/// <param name="baseFile"></param>
/// <param name="list"></param>
/// <param name="bytesPerLine"Number of bytes to display per line.></param>
public BinaryDiffLines(Stream baseFile, AddCopyCollection list, int bytesPerLine)
{
this.bytesPerLine = bytesPerLine;
Expand Down
149 changes: 149 additions & 0 deletions source/AehnlichLib/Files/AsyncPump.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Threading;
using System.Threading.Tasks;

namespace AehnlichLib.Files
{
/// <summary>Provides a pump that supports running asynchronous methods on the current thread.
/// https://stackoverflow.com/questions/9343594/how-to-call-asynchronous-method-from-synchronous-method-in-c
/// </summary>
public static class AsyncPump
{
/// <summary>Runs the specified asynchronous method.</summary>
/// <param name="asyncMethod">The asynchronous method to execute.</param>
public static void Run(Action asyncMethod)
{
if (asyncMethod == null) throw new ArgumentNullException("asyncMethod");

var prevCtx = SynchronizationContext.Current;
try
{
// Establish the new context
var syncCtx = new SingleThreadSynchronizationContext(true);
SynchronizationContext.SetSynchronizationContext(syncCtx);

// Invoke the function
syncCtx.OperationStarted();
asyncMethod();
syncCtx.OperationCompleted();

// Pump continuations and propagate any exceptions
syncCtx.RunOnCurrentThread();
}
finally { SynchronizationContext.SetSynchronizationContext(prevCtx); }
}

/// <summary>Runs the specified asynchronous method.</summary>
/// <param name="asyncMethod">The asynchronous method to execute.</param>
public static void Run(Func<Task> asyncMethod)
{
if (asyncMethod == null) throw new ArgumentNullException("asyncMethod");

var prevCtx = SynchronizationContext.Current;
try
{
// Establish the new context
var syncCtx = new SingleThreadSynchronizationContext(false);
SynchronizationContext.SetSynchronizationContext(syncCtx);

// Invoke the function and alert the context to when it completes
var t = asyncMethod();
if (t == null) throw new InvalidOperationException("No task provided.");
t.ContinueWith(delegate { syncCtx.Complete(); }, TaskScheduler.Default);

// Pump continuations and propagate any exceptions
syncCtx.RunOnCurrentThread();
t.GetAwaiter().GetResult();
}
finally { SynchronizationContext.SetSynchronizationContext(prevCtx); }
}

/// <summary>Runs the specified asynchronous method.</summary>
/// <param name="asyncMethod">The asynchronous method to execute.</param>
public static T Run<T>(Func<Task<T>> asyncMethod)
{
if (asyncMethod == null) throw new ArgumentNullException("asyncMethod");

var prevCtx = SynchronizationContext.Current;
try
{
// Establish the new context
var syncCtx = new SingleThreadSynchronizationContext(false);
SynchronizationContext.SetSynchronizationContext(syncCtx);

// Invoke the function and alert the context to when it completes
var t = asyncMethod();
if (t == null) throw new InvalidOperationException("No task provided.");
t.ContinueWith(delegate { syncCtx.Complete(); }, TaskScheduler.Default);

// Pump continuations and propagate any exceptions
syncCtx.RunOnCurrentThread();
return t.GetAwaiter().GetResult();
}
finally { SynchronizationContext.SetSynchronizationContext(prevCtx); }
}

/// <summary>Provides a SynchronizationContext that's single-threaded.</summary>
private sealed class SingleThreadSynchronizationContext : SynchronizationContext
{
/// <summary>The queue of work items.</summary>
private readonly BlockingCollection<KeyValuePair<SendOrPostCallback, object>> m_queue =
new BlockingCollection<KeyValuePair<SendOrPostCallback, object>>();
/// <summary>The processing thread.</summary>
private readonly Thread m_thread = Thread.CurrentThread;
/// <summary>The number of outstanding operations.</summary>
private int m_operationCount = 0;
/// <summary>Whether to track operations m_operationCount.</summary>
private readonly bool m_trackOperations;

/// <summary>Initializes the context.</summary>
/// <param name="trackOperations">Whether to track operation count.</param>
internal SingleThreadSynchronizationContext(bool trackOperations)
{
m_trackOperations = trackOperations;
}

/// <summary>Dispatches an asynchronous message to the synchronization context.</summary>
/// <param name="d">The System.Threading.SendOrPostCallback delegate to call.</param>
/// <param name="state">The object passed to the delegate.</param>
public override void Post(SendOrPostCallback d, object state)
{
if (d == null) throw new ArgumentNullException("d");
m_queue.Add(new KeyValuePair<SendOrPostCallback, object>(d, state));
}

/// <summary>Not supported.</summary>
public override void Send(SendOrPostCallback d, object state)
{
throw new NotSupportedException("Synchronously sending is not supported.");
}

/// <summary>Runs an loop to process all queued work items.</summary>
public void RunOnCurrentThread()
{
foreach (var workItem in m_queue.GetConsumingEnumerable())
workItem.Key(workItem.Value);
}

/// <summary>Notifies the context that no more work will arrive.</summary>
public void Complete() { m_queue.CompleteAdding(); }

/// <summary>Invoked when an async operation is started.</summary>
public override void OperationStarted()
{
if (m_trackOperations)
Interlocked.Increment(ref m_operationCount);
}

/// <summary>Invoked when an async operation is completed.</summary>
public override void OperationCompleted()
{
if (m_trackOperations &&
Interlocked.Decrement(ref m_operationCount) == 0)
Complete();
}
}
}
}
80 changes: 80 additions & 0 deletions source/AehnlichLib/Files/FileEx.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
namespace AehnlichLib.Files
{
using System.Collections.Generic;
using System.IO;
using System.Text;
using System.Threading.Tasks;

/// <summary>
/// File Utility Class
/// https://stackoverflow.com/questions/13167934/how-to-async-files-readalllines-and-await-for-results
/// </summary>
public static class FileEx
{
/// <summary>
/// This is the same default buffer size as
/// <see cref="StreamReader"/> and <see cref="FileStream"/>.
/// </summary>
private const int DefaultBufferSize = 4096;

/// <summary>
/// Indicates that
/// 1. The file is to be used for asynchronous reading.
/// 2. The file is to be accessed sequentially from beginning to end.
/// </summary>
private const FileOptions DefaultOptions = FileOptions.Asynchronous | FileOptions.SequentialScan;

public static async Task<List<string>> GetFileTextLinesAsync(string path)
{
var lines = new List<string>();

// Open the FileStream with the same FileMode, FileAccess
// and FileShare as a call to File.OpenText would've done.
using (var stream = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read, DefaultBufferSize, DefaultOptions))
{
var bom = new byte[4]; // Decode bom (if any) and continue to read text content
await stream.ReadAsync(bom, 0, 4);
stream.Seek(0, SeekOrigin.Begin);
Encoding encoding = GetEncoding(bom);

using (var reader = new StreamReader(stream, encoding))
{
string line;
while ((line = await reader.ReadLineAsync()) != null)
{
lines.Add(line);
}
}
}

return lines;
}

/// <summary>
/// Gets the encoding of a file from its first 4 bytes.
/// </summary>
/// <param name="bom">BOM to be translated into an <see cref="Encoding"/>.
/// This should be at least 4 bytes long.</param>
/// <returns>Recommended <see cref="Encoding"/> to be used to read text from this file.</returns>
public static Encoding GetEncoding(byte[] bom)
{
// Analyze the BOM
if (bom[0] == 0x2b && bom[1] == 0x2f && bom[2] == 0x76)
return Encoding.UTF7;

if (bom[0] == 0xef && bom[1] == 0xbb && bom[2] == 0xbf)
return Encoding.UTF8;

if (bom[0] == 0xff && bom[1] == 0xfe)
return Encoding.Unicode; //UTF-16LE

if (bom[0] == 0xfe && bom[1] == 0xff)
return Encoding.BigEndianUnicode; //UTF-16BE

if (bom[0] == 0 && bom[1] == 0 && bom[2] == 0xfe && bom[3] == 0xff)
return Encoding.UTF32;

return Encoding.Default;
}
}
}
53 changes: 37 additions & 16 deletions source/AehnlichLib/Text/ProcessTextDiff.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
using AehnlichLib.Binaries;
using AehnlichLib.Dir;
using AehnlichLib.Enums;
using AehnlichLib.Files;
using AehnlichLib.Interfaces;
using AehnlichLib.Models;
using System;
Expand Down Expand Up @@ -58,24 +59,25 @@ public IDiffProgress ProcessDiff(IDiffProgress progress)
try
{
IList<string> a, b;
int leadingCharactersToIgnore = 0;
var result = new DiffBinaryTextResults();

if (_Args.DiffType == DiffType.File)
{
var fileA = new FileCompInfo(_Args.A);
var fileB = new FileCompInfo(_Args.B);

GetFileLines(fileA, fileB, out a, out b, out leadingCharactersToIgnore, _Args, progress);
GetFileLines(fileA, fileB, out a, out b, _Args, result, progress);
}
else
{
GetTextLines(_Args.A, _Args.B, _Args, out a, out b, progress);
}

IsBinaryCompare = leadingCharactersToIgnore > 0;
IgnoreCase = IsBinaryCompare ? false : _Args.IgnoreCase;
IsBinaryCompare = result.LeadingCharactersToIgnore > 0;
IgnoreCase = result.IsBinaryCompare ? false : _Args.IgnoreCase;
IgnoreTextWhitespace = IsBinaryCompare ? false : _Args.IgnoreTextWhitespace;
TextDiff diff = new TextDiff(_Args.HashType, IgnoreCase, IgnoreTextWhitespace, leadingCharactersToIgnore, !_Args.ShowChangeAsDeleteInsert);
TextDiff diff = new TextDiff(_Args.HashType, IgnoreCase, IgnoreTextWhitespace,
result.LeadingCharactersToIgnore, !_Args.ShowChangeAsDeleteInsert);

ListA = a;
ListB = b;
Expand All @@ -92,16 +94,24 @@ public IDiffProgress ProcessDiff(IDiffProgress progress)
}

#region TextLineConverter
/// <summary>
///
/// </summary>
/// <param name="fileA"></param>
/// <param name="fileB"></param>
/// <param name="a"></param>
/// <param name="b"></param>
/// <param name="args"></param>
/// <param name="progress"></param>
private static void GetFileLines(FileCompInfo fileA,
FileCompInfo fileB,
out IList<string> a, out IList<string> b,
out int leadingCharactersToIgnore,
TextBinaryDiffArgs args,
DiffBinaryTextResults result,
IDiffProgress progress)
{
a = null;
b = null;
leadingCharactersToIgnore = 0;
CompareType compareType = args.CompareType;

// Nothing to compare if both files do not exist
Expand All @@ -115,7 +125,7 @@ private static void GetFileLines(FileCompInfo fileA,
if (compareType == CompareType.Binary ||
(args.IsAuto && fileA.Is == FileType.Binary || fileB.Is == FileType.Binary))
{
GetBinaryFileLines(fileA, fileB, args, progress, out a, out b, out leadingCharactersToIgnore);
GetBinaryFileLines(fileA, fileB, args, result, progress, out a, out b);
return;
}

Expand All @@ -137,26 +147,37 @@ private static void GetFileLines(FileCompInfo fileA,
if (fileA.Is != FileType.Xml || fileB.Is != FileType.Xml)
{
if (fileA.FileExists)
a = DiffUtility.GetFileTextLines(fileA.FileNamePath, progress);
a = AsyncPump.Run(() => FileEx.GetFileTextLinesAsync(fileA.FileNamePath));
else
a = new List<string>();

if (fileB.FileExists)
b = DiffUtility.GetFileTextLines(fileB.FileNamePath, progress);
b = AsyncPump.Run(() => FileEx.GetFileTextLinesAsync(fileB.FileNamePath));
else
b = new List<string>();
}
}

/// <summary>
/// Get Binary file contents rendered as text lines with line number marker at beginning of each line.
/// </summary>
/// <param name="fileA"></param>
/// <param name="fileB"></param>
/// <param name="args"></param>
/// <param name="progress"></param>
/// <param name="a"></param>
/// <param name="b"></param>
/// <param name="leadingCharactersToIgnore">Leading number of characters to ignore for diff in each line.
/// This space is used in binary diff to display 8 digit line number and 4 digit space.</param>
private static void GetBinaryFileLines(FileCompInfo fileA, FileCompInfo fileB,
TextBinaryDiffArgs args,
IDiffProgress progress,
out IList<string> a, out IList<string> b,
out int leadingCharactersToIgnore)
TextBinaryDiffArgs args,
DiffBinaryTextResults result,
IDiffProgress progress,
out IList<string> a, out IList<string> b)
{
a = new List<string>();
b = new List<string>();
leadingCharactersToIgnore = BinaryDiffLines.PrefixLength;
result.LeadingCharactersToIgnore = BinaryDiffLines.PrefixLength;

// Neither left nor right file exist or cannot be accessed
if (fileA.FileExists == false && fileB.FileExists == false)
Expand Down Expand Up @@ -188,7 +209,7 @@ private static void GetBinaryFileLines(FileCompInfo fileA, FileCompInfo fileB,
BinaryDiffLines lines = new BinaryDiffLines(fileStreamA, addCopy, args.BinaryFootprintLength);
a = lines.BaseLines;
b = lines.VersionLines;
leadingCharactersToIgnore = BinaryDiffLines.PrefixLength;
result.LeadingCharactersToIgnore = BinaryDiffLines.PrefixLength;
}
finally
{
Expand Down
Loading

0 comments on commit cdc2d5d

Please sign in to comment.