Skip to content

Commit

Permalink
Refactor download logic (mcdallas#1)
Browse files Browse the repository at this point in the history
* temp

* convert downloaded gifs to mp4

* implement gfycat gifs

* add support for reddit videos

* add support for Imgur albums

* cleanup

* rustfmt

* clippy

* clippy

* more cleanup

* add debugging

* add support for streamable

* update readme

* filter out self posts

* typo
  • Loading branch information
mcdallas authored Nov 7, 2022
1 parent 8a786a5 commit fe7067d
Show file tree
Hide file tree
Showing 8 changed files with 929 additions and 504 deletions.
292 changes: 292 additions & 0 deletions Cargo.lock

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,4 @@ which = "4.2.2"
mime = "0.3.16"
regex = "1.6.0"
thiserror = "1.0"
zip = "0.6.3"
11 changes: 7 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,20 @@ A command line tool to download media from Reddit
* Supports:
- Reddit: PNG/JPG images, GIFs, Image galleries, videos
- Giphy: GIFs
- Imgur: Direct images and GIFVs
- Imgur: Direct images, GIFVs and albums
- Gfycat/Redgifs: GIFs
- Streamable: videos
* GIF/GIFV from Imgur/Gfycat/Redgifs are downloaded as mp4
* Does *not* support downloading images from Imgur post links

## Installation

### Prerequisites

To download videos hosted by Reddit, you need to have ffmpeg installed.
Follow this [link](https://www.ffmpeg.org/download.html) for installation instructions.
There is a soft dependency on ffmpeg, for installation instructions follow this [link](https://www.ffmpeg.org/download.html).

You can skip it but without it:
* Videos hosted on reddit itself (v.reddit.com) won't have sound
* Gifs won't be automatically converted to .mp4

#### Using cargo

Expand Down
1,007 changes: 526 additions & 481 deletions src/download.rs

Large diffs are not rendered by default.

6 changes: 5 additions & 1 deletion src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,5 +32,9 @@ pub enum GertError {
#[error("Could not convert from string")]
FromStringConversionError(#[from] FromStrError),
#[error("Error parsing JSON from {0}")]
JsonParseError(String)
JsonParseError(String),
#[error("Ffmpeg error {0}")]
FfmpegError(String),
#[error("Error unzipping file")]
ZipError(#[from] zip::result::ZipError),
}
14 changes: 7 additions & 7 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ async fn main() -> Result<(), GertError> {
.long("match")
.value_name("MATCH")
.help("Pass a regex expresion to filter the title of the post")
.takes_value(true)
.takes_value(true),
)
.arg(
Arg::with_name("output_directory")
Expand Down Expand Up @@ -93,7 +93,7 @@ async fn main() -> Result<(), GertError> {
.value_name("LIMIT")
.help("Limit the number of posts to download")
.takes_value(true)
.default_value("25")
.default_value("25"),
)
.arg(
Arg::with_name("subreddits")
Expand All @@ -115,7 +115,7 @@ async fn main() -> Result<(), GertError> {
.help("Time period to download from")
.takes_value(true)
.possible_values(&["now", "hour", "day", "week", "month", "year", "all"])
.default_value("day")
.default_value("day"),
)
.arg(
Arg::with_name("feed")
Expand All @@ -125,7 +125,7 @@ async fn main() -> Result<(), GertError> {
.help("Feed to download from")
.takes_value(true)
.possible_values(&["hot", "new", "top", "rising"])
.default_value("hot")
.default_value("hot"),
)
.get_matches();

Expand Down Expand Up @@ -268,9 +268,9 @@ async fn main() -> Result<(), GertError> {
let url = format!("{}.json", url);
let single_listing: SingleListing = match session.get(&url).send().await {
Ok(response) => response.json().await.map_err(|_| GertError::JsonParseError(url))?,
Err(_) => exit(&format!("Error fetching data from {}", &url))
Err(_) => exit(&format!("Error fetching data from {}", &url)),
};

let post = single_listing.0.data.children.into_iter().next().unwrap();
if post.data.url.is_none() {
exit("Post contains no media")
Expand All @@ -280,7 +280,7 @@ async fn main() -> Result<(), GertError> {
for subreddit in &subreddits {
let listing = Subreddit::new(subreddit).get_feed(feed, limit, period).await?;
posts.extend(
listing.data.children.into_iter().filter(|post| post.data.url.is_some()).filter(
listing.data.children.into_iter().filter(|post| post.data.url.is_some() && !post.data.is_self).filter(
|post| pattern.is_match(post.data.title.as_ref().unwrap_or(&"".to_string())),
),
);
Expand Down
80 changes: 80 additions & 0 deletions src/structures.rs
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,8 @@ pub struct PostData {
pub is_video: Option<bool>,
/// Reddit Media info
pub media: Option<PostMedia>,

pub is_self: bool,
}

#[derive(Deserialize, Debug, Clone)]
Expand Down Expand Up @@ -185,6 +187,15 @@ pub struct GfyItem {
#[serde(rename = "mp4Url")]
pub mp4_url: String,
}
#[derive(Deserialize, Debug, Clone)]
pub struct StreamableApiResponse {
pub files: HashMap<String, StreamableFile>,
}

#[derive(Deserialize, Debug, Clone)]
pub struct StreamableFile {
pub url: Option<String>,
}

#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub struct Summary {
Expand All @@ -207,3 +218,72 @@ impl Add for Summary {
}
}
}

use crate::download::*;
use log::warn;
use url::{Position, Url};
impl Post {
pub fn get_url(&self) -> Option<String> {
let original = self.data.url.as_ref().unwrap();
if let Ok(mut parsed) = Url::parse(original) {
match parsed.path_segments_mut() {
Ok(mut p) => p.pop_if_empty(),
Err(_) => return None,
};
return Some(parsed[..Position::AfterPath].to_owned());
}
None
}

pub fn get_type(&self) -> MediaType {
if self.data.gallery_data.is_some() && self.data.media_metadata.is_some() {
return MediaType::Gallery;
}
let url = match self.get_url() {
Some(u) => u,
None => return MediaType::Unsupported,
};

if url.contains(REDDIT_IMAGE_SUBDOMAIN) {
// if the URL uses the reddit image subdomain and if the extension is
// jpg, png or gif, then we can use the URL as is.
if url.ends_with(JPG_EXTENSION) || url.ends_with(PNG_EXTENSION) {
return MediaType::RedditImage;
} else if url.ends_with(GIF_EXTENSION) {
return MediaType::RedditGif;
} else {
warn!("Unsupported reddit URL: {}", url);
}
}
if url.contains(REDDIT_VIDEO_SUBDOMAIN) {
return MediaType::RedditVideo;
}

if url.contains(GFYCAT_DOMAIN) || url.contains(REDGIFS_DOMAIN) {
return MediaType::GfycatGif;
}
if url.contains(GIPHY_DOMAIN) {
return MediaType::GiphyGif;
}
if url.contains(IMGUR_DOMAIN) {
if url.contains(format!("{}/a/", IMGUR_DOMAIN).as_str()) {
return MediaType::ImgurAlbum;
}
if url.contains(IMGUR_SUBDOMAIN) {
if url.ends_with(GIFV_EXTENSION) || url.ends_with(GIF_EXTENSION) {
return MediaType::ImgurGif;
} else if url.ends_with(PNG_EXTENSION) || url.ends_with(JPG_EXTENSION) {
return MediaType::ImgurImage;
} else {
warn!("Unsupported imgur URL: {}", url);
};
} else {
return MediaType::ImgurUnknown;
}
}
if url.contains(STREAMABLE_DOMAIN) {
return MediaType::StreamableVideo;
}
MediaType::Unsupported
}
}
22 changes: 11 additions & 11 deletions src/utils.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use crate::errors::GertError;
use log::debug;
use mime::Mime;
use reqwest::header::CONTENT_TYPE;
use std::env;
Expand Down Expand Up @@ -43,25 +44,24 @@ pub fn application_present(name: String) -> bool {
which(name).is_ok()
}

/// Check if the given URL contains an MP4 track using the content type
pub async fn check_url_is_mp4(url: &str) -> Result<Option<bool>, GertError> {
let response = reqwest::get(url).await?;
pub async fn check_url_has_mime_type(
url: &str,
mime_type: mime::Name<'_>,
) -> Result<bool, GertError> {
let client = reqwest::Client::new();
let response = client.head(url).send().await?;
let headers = response.headers();

match headers.get(CONTENT_TYPE) {
None => Ok(None),
None => Ok(false),
Some(content_type) => {
let content_type = Mime::from_str(content_type.to_str()?)?;
let is_video = match (content_type.type_(), content_type.subtype()) {
(mime::VIDEO, mime::MP4) => true,
(mime::APPLICATION, mime::XML) => false,
_ => false,
};
Ok(Some(is_video))
let success = matches!(content_type.subtype(), _mime_type);
debug!("Checking if URL has mime type {}, success: {}", mime_type, success);
Ok(success)
}
}
}

pub struct UserEnv {
pub username: String,
pub password: String,
Expand Down

0 comments on commit fe7067d

Please sign in to comment.