Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions packages/metascraper-x/src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

const {
getUrls,
$jsonld,
author,
image,
memoizeOne,
Expand All @@ -15,7 +16,9 @@ const {
const toAuthor = toRule(author)
const toImage = toRule(image)
const toTitle = toRule(title)

const toDescription = toRule(description)

const toUrl = toRule(url)

const test = memoizeOne(url =>
Expand All @@ -42,7 +45,10 @@ module.exports = ({ resolveUrls = false, resolveUrl = url => url } = {}) => {
],
description: [
toDescription(async $ => {
let description = $('meta[property="og:description"]').attr('content')
let description =
$jsonld('mainEntity.description')($) ||
$('meta[property="og:description"]').attr('content')

if (!resolveUrls) return description
const urls = getUrls(description)
const resolvedUrls = await Promise.all(urls.map(resolveUrl))
Expand All @@ -57,7 +63,10 @@ module.exports = ({ resolveUrls = false, resolveUrl = url => url } = {}) => {
],
image: [
toImage($ => {
let imageUrl = $('meta[property="og:image"]').attr('content')
let imageUrl =
$jsonld('mainEntity.image.contentUrl')($) ||
$('meta[property="og:image"]').attr('content')

if (imageUrl?.endsWith('_200x200.jpg')) {
imageUrl = imageUrl.replace('_200x200.jpg', '_400x400.jpg')
}
Expand Down
2,714 changes: 2,712 additions & 2 deletions packages/metascraper-x/test/fixtures/post-gif.html

Large diffs are not rendered by default.

2,714 changes: 2,712 additions & 2 deletions packages/metascraper-x/test/fixtures/post-image.html

Large diffs are not rendered by default.

2,714 changes: 2,712 additions & 2 deletions packages/metascraper-x/test/fixtures/post.html

Large diffs are not rendered by default.

2,734 changes: 2,726 additions & 8 deletions packages/metascraper-x/test/fixtures/profile-video.html

Large diffs are not rendered by default.

3,734 changes: 3,718 additions & 16 deletions packages/metascraper-x/test/fixtures/profile.html

Large diffs are not rendered by default.

62 changes: 31 additions & 31 deletions packages/metascraper-x/test/snapshots/index.js.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,14 @@ Generated by [AVA](https://avajs.dev).
> Snapshot 1

{
author: '#!/kiko/beats (Kikobeats)',
date: null,
description: 'engineering ▲ @vercel; founder of https://t.co/4PQvCsVNsA https://t.co/fpiHwbEPBv https://t.co/IG8Qq0IDKi https://t.co/gblDRx1P9D https://t.co/SmoZi3hAhb https://t.co/Y0Uk1XU3Eu https://t.co/PAq3eTEhmI',
image: 'https://pbs.twimg.com/profile_images/1846292082501054464/oKUC44PF_400x400.jpg',
author: 'Kiko Beats (Kikobeats) / X',
date: '2010-01-02T11:14:31.000Z',
description: 'Building web infra & open source Running https://t.co/4PQvCsVNsA · https://t.co/IG8Qq0IDKi Formerly @vercel ▲',
image: 'https://pbs.twimg.com/profile_images/2000931726244106244/Ty7-KT2j_400x400.jpg',
lang: 'en',
publisher: 'X',
title: '@kikobeats on X',
url: 'https://x.com/Kikobeats',
url: 'https://x.com/kikobeats',
video: null,
}

Expand All @@ -25,14 +25,14 @@ Generated by [AVA](https://avajs.dev).
> Snapshot 1

{
author: '#!/kiko/beats (Kikobeats)',
date: null,
description: 'engineering ▲ @vercel; founder of microlink.io teslahunt.io unavatar.io keyv.js.org osom.js.org browserless.js.org metascraper.js.org',
image: 'https://pbs.twimg.com/profile_images/1846292082501054464/oKUC44PF_400x400.jpg',
author: 'Kiko Beats (Kikobeats) / X',
date: '2010-01-02T11:14:31.000Z',
description: 'Building web infra & open source Running microlink.io · unavatar.io Formerly @vercel ▲',
image: 'https://pbs.twimg.com/profile_images/2000931726244106244/Ty7-KT2j_400x400.jpg',
lang: 'en',
publisher: 'X',
title: '@Kikobeats on X',
url: 'https://x.com/Kikobeats',
url: 'https://x.com/kikobeats',
video: null,
}

Expand All @@ -41,30 +41,30 @@ Generated by [AVA](https://avajs.dev).
> Snapshot 1

{
author: 'Javi López ⛩️ (javilop)',
date: null,
description: 'Comparto tutoriales, herramientas y noticias de IA. Fundador @Magnific_AI 🔥 Guía IAs: https://t.co/JApwm5Tmfo 🗞️ Newsletter: https://t.co/tMELO1P8Wk',
author: 'Javi López ⛩️ (javilop) / X',
date: '2008-08-29T18:52:07.000Z',
description: 'Comparto tutoriales, herramientas y noticias de IA. Y también chorradas. Fundador @Magnific_AI (acq. por @freepik) 🗞️ Newsletter: https://t.co/tMELO1P8Wk',
image: 'https://pbs.twimg.com/profile_images/1581679886267301888/BHGZpOc6_400x400.jpg',
lang: 'en',
publisher: 'X',
title: '@javilop on X',
url: 'https://twitter.com/javilop',
video: null,
url: 'https://x.com/javilop',
video: 'blob:https://x.com/748f188e-eea8-4c74-87c9-00235f0067a8',
}

## from a post

> Snapshot 1

{
author: 'Donald J. Trump (realDonaldTrump)',
date: null,
description: '“Schiff blasted for not focusing on California homeless.” @foxandfriends His District is in terrible shape. He is a corrupt pol who only dreams of the Impeachment Hoax. In my opinion he is mentally deranged!',
image: 'https://pbs.twimg.com/profile_images/874276197357596672/kUuht00m_400x400.jpg',
author: 'Donald J. Trump',
date: '2020-01-30T15:39:43.000Z',
description: null,
image: 'https://abs.twimg.com/rweb/ssr/default/v2/og/image.png',
lang: 'en',
publisher: 'X',
title: '@realDonaldTrump on X',
url: 'https://twitter.com/realDonaldTrump/status/1222907250383245320',
url: 'https://x.com/realDonaldTrump/status/1222907250383245320',
video: null,
}

Expand All @@ -73,29 +73,29 @@ Generated by [AVA](https://avajs.dev).
> Snapshot 1

{
author: '#!/kiko/beats (Kikobeats)',
date: null,
description: 'Experimenting with Clearbit API + Apple TV 3D Parallax https://t.co/Qsm163k4mJ',
image: 'https://pbs.twimg.com/tweet_video_thumb/DDbh3WCXYAAZfz9.jpg:large',
author: 'Kiko Beats',
date: '2017-06-28T19:01:34.000Z',
description: null,
image: 'https://abs.twimg.com/rweb/ssr/default/v2/og/image.png',
lang: 'en',
publisher: 'X',
title: '@Kikobeats on X',
url: 'https://twitter.com/Kikobeats/status/880139124791029763',
video: null,
url: 'https://x.com/Kikobeats/status/880139124791029763',
video: 'https://video.twimg.com/tweet_video/DDbh3WCXYAAZfz9.mp4',
}

## from a post with an image

> Snapshot 1

{
author: 'SmartUA (UaSmart)',
date: null,
description: 'Y terminamos el dia con Cultura de empresa con @patoroco, @flopezluis, Katia, Angélica en @codemotion_es #codemotion2017',
image: 'https://pbs.twimg.com/media/DPadOKpXcAIL-NW.jpg:large',
author: 'SmartUA',
date: '2017-11-24T17:10:07.000Z',
description: null,
image: 'https://abs.twimg.com/rweb/ssr/default/v2/og/image.png',
lang: 'en',
publisher: 'X',
title: '@UaSmart on X',
url: 'https://twitter.com/UaSmart/status/934106870834454529',
url: 'https://x.com/UaSmart/status/934106870834454529',
video: null,
}
Binary file modified packages/metascraper-x/test/snapshots/index.js.snap
Binary file not shown.
Loading