Skip to content

Commit

Permalink
refactor(backend): move FranceTvUpdater to non Reactive API
Browse files Browse the repository at this point in the history
Related to #231
  • Loading branch information
davinkevin committed Aug 3, 2024
1 parent 1353981 commit bcceeee
Show file tree
Hide file tree
Showing 7 changed files with 1,937 additions and 145 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import com.github.davinkevin.podcastserver.service.image.CoverInformation
import org.slf4j.LoggerFactory
import reactor.core.publisher.Flux
import reactor.core.publisher.Mono
import reactor.kotlin.core.publisher.toFlux
import java.net.URI
import java.time.ZonedDateTime
import java.util.*
Expand Down Expand Up @@ -42,7 +41,7 @@ interface Updater {
return UpdatePodcastInformation(podcast, items, signature)
}

fun findItems(podcast: PodcastToUpdate): Flux<ItemFromUpdate> = findItemsBlocking(podcast).toFlux()
fun findItems(podcast: PodcastToUpdate): Flux<ItemFromUpdate> = Mono.fromCallable { findItemsBlocking(podcast) }.flatMapIterable { it }
fun signatureOf(url: URI): Mono<String> = Mono.fromCallable { signatureOfBlocking(url) }

fun findItemsBlocking(podcast: PodcastToUpdate): List<ItemFromUpdate> = emptyList()
Expand Down
Original file line number Diff line number Diff line change
@@ -1,120 +1,129 @@
package com.github.davinkevin.podcastserver.update.updaters.francetv

import com.fasterxml.jackson.databind.JsonNode
import com.fasterxml.jackson.databind.ObjectMapper
import com.github.davinkevin.podcastserver.extension.java.util.orNull
import com.github.davinkevin.podcastserver.service.image.ImageService
import com.github.davinkevin.podcastserver.update.fetchCoverUpdateInformation
import com.github.davinkevin.podcastserver.update.updaters.ItemFromUpdate
import com.github.davinkevin.podcastserver.update.updaters.PodcastToUpdate
import com.github.davinkevin.podcastserver.update.updaters.Type
import com.github.davinkevin.podcastserver.update.updaters.Updater
import org.jsoup.Jsoup
import org.slf4j.LoggerFactory
import org.springframework.util.DigestUtils
import org.springframework.web.reactive.function.client.WebClient
import org.springframework.web.reactive.function.client.bodyToMono
import org.springframework.web.client.RestClient
import org.springframework.web.client.body
import reactor.core.publisher.Flux
import reactor.core.publisher.Mono
import reactor.kotlin.core.publisher.switchIfEmpty
import reactor.kotlin.core.publisher.toFlux
import reactor.kotlin.core.publisher.toMono
import reactor.kotlin.core.util.function.component1
import reactor.kotlin.core.util.function.component2
import java.net.URI
import java.time.Clock
import java.time.ZonedDateTime
import java.time.format.DateTimeFormatter
import java.util.*
import kotlin.time.Duration
import kotlin.time.DurationUnit

class FranceTvUpdater(
private val franceTvClient: WebClient,
private val franceTvClient: RestClient,
private val image: ImageService,
private val mapper: ObjectMapper,
private val clock: Clock
): Updater {

private val log = LoggerFactory.getLogger(FranceTvUpdater::class.java)

override fun findItems(podcast: PodcastToUpdate): Flux<ItemFromUpdate> {
override fun findItemsBlocking(podcast: PodcastToUpdate): List<ItemFromUpdate> {

val url = podcast.url.toASCIIString()

log.debug("Fetch $url")

val replay = replayUrl(podcast.url)

return franceTvClient
val page = franceTvClient
.get()
.uri(replay)
.retrieve()
.bodyToMono<String>()
.map { Jsoup.parse(it, url) }
.flatMapIterable { it.select(".c-wall__item > [data-video-id]") }
.body<String>()
?: return emptyList()

val html = Jsoup.parse(page, url)

val urls = html
.select(".c-wall__item > [data-video-id]")
.toList()
.filter { !it.html().contains("indisponible") }
.map { it.select("a[href]").attr("href") }
.flatMap { urlToItem(it) }

return urls
.mapNotNull(::urlToItem)
}

private fun urlToItem(pathUrl: String): Mono<ItemFromUpdate> {
return franceTvClient
private fun urlToItem(pathUrl: String): ItemFromUpdate? {

val page = franceTvClient
.get()
.uri(pathUrl)
.retrieve()
.bodyToMono<String>()
.map { Jsoup.parse(it, "https://www.france.tv/") }
.map { document ->
val jsonldTag = document.select("script[type=application/ld+json]").firstOrNull()
?: error("""No <script type="application/ld+json"></script> found""")

val jsonLd = mapper.readTree(jsonldTag.html())
val videoObject = jsonLd.firstOrNull { it.get("@type").asText() == "VideoObject" }
?: error("""No element of type VideoObject""")

val pubDate = videoObject.get("uploadDate").asText()?.let { ZonedDateTime.parse(it, DateTimeFormatter.ISO_DATE_TIME) }
?: ZonedDateTime.now(clock)

val item = ItemFromUpdate(
title = videoObject.get("name").asText().replaceFirst("Secrets d'Histoire ", ""),
description = videoObject.get("description").asText(),
length = Duration.parse(videoObject.get("duration").asText()).toLong(DurationUnit.SECONDS),
pubDate = pubDate,
url = URI("https://www.france.tv$pathUrl"),
cover = null,
mimeType = "video/mp4"
)

val cover = videoObject.get("thumbnailUrl").firstOrNull()?.asText()
?.let(URI::create)

item to cover
}
.flatMap { (item, cover) -> item.toMono().zipWith(image.fetchCoverInformationOrOption(cover)) }
.map { (item, cover) -> item.copy(cover = cover.orNull()) }
.onErrorResume {
val message = "Error during fetch of $pathUrl"
log.error(message)
log.debug(message, it)
Mono.empty()
}
.body<String>()
?: return null

val html = Jsoup.parse(page, "https://www.france.tv/")

val jsonldTag = html.select("script[type=application/ld+json]").firstOrNull()
?: error("""No <script type="application/ld+json"></script> found""")

val jsonLd = mapper.readTree(jsonldTag.html())
val videoObject = jsonLd.firstOrNull { it.get("@type").asText() == "VideoObject" }
?: error("""No element of type VideoObject""")

val pubDate = when(val uploadDate = videoObject["uploadDate"]?.asText()) {
null -> ZonedDateTime.now(clock)
else -> ZonedDateTime.parse(uploadDate, DateTimeFormatter.ISO_DATE_TIME)
}

val cover = videoObject.get("thumbnailUrl")
.asSequence()
.map(JsonNode::asText)
.map(URI::create)
.map(image::fetchCoverUpdateInformation)
.firstOrNull()

return ItemFromUpdate(
title = videoObject.get("name").asText().replaceFirst("Secrets d'Histoire ", ""),
description = videoObject.get("description").asText(),
length = Duration.parse(videoObject.get("duration").asText()).toLong(DurationUnit.SECONDS),
pubDate = pubDate,
url = URI("https://www.france.tv$pathUrl"),
cover = cover,
mimeType = "video/mp4"
)
}

override fun signatureOf(url: URI): Mono<String> {

override fun signatureOfBlocking(url: URI): String {
val replay = replayUrl(url)

return franceTvClient
val page = franceTvClient
.get()
.uri(replay)
.retrieve()
.bodyToMono<String>()
.map { Jsoup.parse(it, url.toASCIIString()) }
.flatMapIterable { it.select(".c-wall__item > [data-video-id]") }
.body<String>()
?: return ""

val html = Jsoup.parse(page, url.toASCIIString())
val ids = html.select(".c-wall__item > [data-video-id]")
.toList()
.filter { !it.html().contains("indisponible") }

if (ids.isEmpty()) return ""

return ids
.asSequence()
.map { it.select("a[href]").attr("href") }
.sort()
.sorted()
.reduce { t, u -> """$t-$u""" }
.map { DigestUtils.md5DigestAsHex(it.toByteArray()) }
.switchIfEmpty("".toMono())
.let { DigestUtils.md5DigestAsHex(it.toByteArray()) }
}

override fun type() = Type("FranceTv", "France•tv")
Expand All @@ -129,12 +138,4 @@ class FranceTvUpdater(
.substringAfter("https://www.france.tv/")
}

}

private fun ImageService.fetchCoverInformationOrOption(url: URI?): Mono<Optional<ItemFromUpdate.Cover>> {
return Mono.justOrEmpty(url)
.flatMap { fetchCoverInformation(url!!) }
.map { ItemFromUpdate.Cover(it.width, it.height, it.url) }
.map { Optional.of(it) }
.switchIfEmpty { Optional.empty<ItemFromUpdate.Cover>().toMono() }
}
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
package com.github.davinkevin.podcastserver.update.updaters.francetv

import com.fasterxml.jackson.databind.ObjectMapper
import com.github.davinkevin.podcastserver.service.image.ImageService
import com.github.davinkevin.podcastserver.service.image.ImageServiceConfig
import org.springframework.context.annotation.Bean
import org.springframework.context.annotation.Configuration
import org.springframework.context.annotation.Import
import org.springframework.web.reactive.function.client.WebClient
import com.github.davinkevin.podcastserver.service.image.ImageService
import org.springframework.web.client.RestClient
import java.time.Clock

/**
Expand All @@ -18,12 +18,12 @@ class FranceTvUpdaterConfig {

@Bean
fun franceTvUpdater(
wcb: WebClient.Builder,
image: ImageService,
mapper: ObjectMapper,
clock: Clock
rcb: RestClient.Builder,
image: ImageService,
mapper: ObjectMapper,
clock: Clock
): FranceTvUpdater {
val franceTvClient = wcb.clone().baseUrl("https://www.france.tv/").build()
val franceTvClient = rcb.clone().baseUrl("https://www.france.tv/").build()

return FranceTvUpdater(franceTvClient, image, mapper, clock)
}
Expand Down
Loading

0 comments on commit bcceeee

Please sign in to comment.