Skip to content

Commit

Permalink
Add BBC Sounds scraping
Browse files Browse the repository at this point in the history
  • Loading branch information
cultpodcasts committed Jan 11, 2025
1 parent e2b4f40 commit 2637d8c
Show file tree
Hide file tree
Showing 22 changed files with 376 additions and 159 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
using System.Net;
using Microsoft.Extensions.Logging;
using RedditPodcastPoster.PodcastServices.Abstractions;

namespace RedditPodcastPoster.BBC;

// ReSharper disable once InconsistentNaming
public class BBCPageMetaDataExtractor(
IBBCHttpClient httpClient,
IiPlayerPageMetaDataExtractor iPlayerPageMetaDataExtractor,
ISoundsPageMetaDataExtractor soundsPageMetaDataExtractor,
ILogger<BBCPageMetaDataExtractor> logger
) : IBBCPageMetaDataExtractor
{
public async Task<NonPodcastServiceItemMetaData> GetMetaData(Uri url)
{
var pageResponse = await httpClient.GetAsync(url);
if (pageResponse.StatusCode != HttpStatusCode.OK)
{
throw new NonPodcastServiceMetaDataExtractionException(url, pageResponse.StatusCode);
}
NonPodcastServiceItemMetaData metaData;
if (ServiceMatcher.IsSounds(url))
{
logger.LogInformation("For url '{url}' using extractor of type '{extractorType}'.", url, nameof(ISoundsPageMetaDataExtractor));
metaData = await soundsPageMetaDataExtractor.Extract(url, pageResponse);
}
else if (ServiceMatcher.IsIplayer(url))
{
logger.LogInformation("For url '{url}' using extractor of type '{extractorType}'.", url, nameof(IiPlayerPageMetaDataExtractor));
metaData = await iPlayerPageMetaDataExtractor.Extract(url, pageResponse);
}
else
{
throw new InvalidOperationException($"Unknown bbc-service for url '{url}'.");
}
return metaData;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
using System.Text.Json.Serialization;

namespace RedditPodcastPoster.BBC.DTOs;

public class BBCSoundsMetaData
{
[JsonPropertyName("programmes")]
public required Programmes Programmes { get; set; }
}
12 changes: 12 additions & 0 deletions Class-Libraries/RedditPodcastPoster.BBC/DTOs/Duration.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
using System.Text.Json.Serialization;

namespace RedditPodcastPoster.BBC.DTOs
{
public class Duration
{
[JsonPropertyName("value")]
public int? Seconds { get; set; }

public TimeSpan? Length => Seconds==null? null: TimeSpan.FromSeconds(Seconds.Value);
}
}
12 changes: 12 additions & 0 deletions Class-Libraries/RedditPodcastPoster.BBC/DTOs/Guidance.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
using System.Text.Json.Serialization;

namespace RedditPodcastPoster.BBC.DTOs
{
public class Guidance
{
[JsonPropertyName("warnings")]
public Dictionary<string, string>? Warnings { get; set; }

public bool HasWarnings => Warnings != null && Warnings.Any();
}
}
21 changes: 21 additions & 0 deletions Class-Libraries/RedditPodcastPoster.BBC/DTOs/Programme.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
using System.Text.Json.Serialization;

namespace RedditPodcastPoster.BBC.DTOs;

public class Programme
{
[JsonPropertyName("release")]
public required Release Release { get; set; }

[JsonPropertyName("titles")]
public required Titles Titles { get; set; }

[JsonPropertyName("synopses")]
public required Synopses Synopses { get; set; }

[JsonPropertyName("duration")]
public required Duration Duration { get; set; }

[JsonPropertyName("guidance")]
public required Guidance Guidance { get; set; }
}
9 changes: 9 additions & 0 deletions Class-Libraries/RedditPodcastPoster.BBC/DTOs/Programmes.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
using System.Text.Json.Serialization;

namespace RedditPodcastPoster.BBC.DTOs;

public class Programmes
{
[JsonPropertyName("current")]
public required Programme CurrentProgramme { get; set; }
}
9 changes: 9 additions & 0 deletions Class-Libraries/RedditPodcastPoster.BBC/DTOs/Release.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
using System.Text.Json.Serialization;

namespace RedditPodcastPoster.BBC.DTOs;

public class Release
{
[JsonPropertyName("date")]
public required DateTime Date { get; set; }
}
18 changes: 18 additions & 0 deletions Class-Libraries/RedditPodcastPoster.BBC/DTOs/Synopses.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
using System.Text.Json.Serialization;

namespace RedditPodcastPoster.BBC.DTOs
{
public class Synopses
{
[JsonPropertyName("short")]
public string? Short { get; set; }

[JsonPropertyName("medium")]
public string? Medium { get; set; }

[JsonPropertyName("long")]
public string? Long { get; set; }

public string Description => Long ?? Medium ?? Short ?? string.Empty;
}
}
20 changes: 20 additions & 0 deletions Class-Libraries/RedditPodcastPoster.BBC/DTOs/Titles.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
using System.Text.Json.Serialization;

namespace RedditPodcastPoster.BBC.DTOs;

public class Titles
{
[JsonPropertyName("primary")]
public String? Primary { get; set; }

[JsonPropertyName("secondary")]
public String? Secondary { get; set; }

[JsonPropertyName("tertiary")]
public String? Tertiary { get; set; }

[JsonPropertyName("entity_title")]
public String? EntityTitle { get; set; }

public string Title => EntityTitle ?? Tertiary ?? Secondary ?? Primary ?? string.Empty;
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ public static IServiceCollection AddBBCServices(this IServiceCollection services
return services
.AddSingleton<IBBCHttpClientFactory, BBCHttpClientFactory>()
.AddScoped(s => s.GetService<IBBCHttpClientFactory>()!.Create())
.AddScoped<IBBCPageMetaDataExtractor, BBCPageMetaDataExtractor>()
.AddScoped<IiPlayerPageMetaDataExtractor, iPlayerPageMetaDataExtractor>()
.AddScoped<IMetaDataExtractor, MetaDataExtractor>();
.AddScoped< ISoundsPageMetaDataExtractor, SoundsPageMetaDataExtractor>();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
using RedditPodcastPoster.PodcastServices.Abstractions;

namespace RedditPodcastPoster.BBC;

// ReSharper disable once InconsistentNaming
public interface IBBCPageMetaDataExtractor
{
Task<NonPodcastServiceItemMetaData> GetMetaData(Uri url);
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ namespace RedditPodcastPoster.BBC;
public interface IMetaDataExtractor
{
Task<NonPodcastServiceItemMetaData> Extract(Uri url, HttpResponseMessage pageResponse);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
namespace RedditPodcastPoster.BBC;

public interface ISoundsPageMetaDataExtractor : IMetaDataExtractor { }
Original file line number Diff line number Diff line change
@@ -1,9 +1,3 @@
using RedditPodcastPoster.PodcastServices.Abstractions;
namespace RedditPodcastPoster.BBC;

namespace RedditPodcastPoster.BBC;

// ReSharper disable once InconsistentNaming
public interface IiPlayerPageMetaDataExtractor
{
Task<NonPodcastServiceItemMetaData> GetMetaData(Uri url);
}
public interface IiPlayerPageMetaDataExtractor : IMetaDataExtractor { }
108 changes: 0 additions & 108 deletions Class-Libraries/RedditPodcastPoster.BBC/MetaDataExtractor.cs

This file was deleted.

18 changes: 18 additions & 0 deletions Class-Libraries/RedditPodcastPoster.BBC/ServiceMatcher.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
namespace RedditPodcastPoster.BBC;

public static class ServiceMatcher
{
private static bool IsBBC(Uri url)
{
return url.Host.ToLower().Contains("bbc.co.uk");
}
public static bool IsIplayer(Uri url)
{
return IsBBC(url) && url.AbsolutePath.StartsWith("/iplayer/episode");
}
public static bool IsSounds(Uri url)
{
return IsBBC(url) && url.AbsolutePath.StartsWith("/sounds/play/");
}

}
Loading

0 comments on commit 2637d8c

Please sign in to comment.