Skip to content

Commit

Permalink
Extract visitor data from sw.js_data API instead of randomly generating
Browse files Browse the repository at this point in the history
Co-Authored-By: sheepmax <32977795+sheepmax@users.noreply.github.com>
  • Loading branch information
ShareASmile and sheepmax committed Feb 2, 2025
1 parent 9c340b8 commit 66fd475
Showing 1 changed file with 50 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,11 @@ public final class YoutubeParsingHelper {
private YoutubeParsingHelper() {
}

/**
* The base URL for plain Youtube.
*/
public static final String YOUTUBE_BASE = "https://www.youtube.com/";

/**
* The base URL of requests of the {@code WEB} clients to the InnerTube internal API.
*/
Expand Down Expand Up @@ -227,6 +232,11 @@ private YoutubeParsingHelper() {
private static final String CONTENT_PLAYBACK_NONCE_ALPHABET =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";

/**
* Regex for extracing any JSON array.
*/
private static final String JSON_ARRAY = "\\[.*\\]";

/**
* The device machine id for the iPhone 15 Pro Max,
* used to get 60fps with the {@code iOS} client.
Expand Down Expand Up @@ -333,6 +343,35 @@ public static String randomVisitorData(final ContentCountry country) {
return pb.toUrlencodedBase64();
}

/**
* Requests and parses out the visitor data from the sw.js_data YT endpoint.
* This function does not parse it into a programmatic form, just returns the encoded string.
* Useful for passing into API requests which require visitorData to work.
* The function currently uses very brittle extraction logic.
* Likely to fail with future changes.
*
* @return extracted encoded visitor data string
* @throws ParsingException if the format of data is no longer a JSON array
* @throws IOException when it cannot fetch the API data
* @throws ReCaptchaException when it cannot fetch the API data
*/
public static String extractVisitorData()
throws ParsingException, IOException, ReCaptchaException {
final String url = YOUTUBE_BASE + "sw.js_data";
final var headers = getOriginReferrerHeaders(YOUTUBE_BASE);
final String response = getDownloader().get(url, headers).responseBody();
final JsonArray jsonArray = JsonUtils.toJsonArray(
Parser.matchGroup(JSON_ARRAY, response, 0));
// Got this particular extraction logic by finding where the visitor data
// lives through comparison. If the structure changes this is likely to fail.
return jsonArray
.getArray(0)
.getArray(2)
.getArray(0)
.getArray(0)
.getString(13);
}

/**
* Parses the duration string of the video expecting ":" or "." as separators
*
Expand Down Expand Up @@ -1257,6 +1296,16 @@ public static JsonBuilder<JsonObject> prepareAndroidMobileJsonBuilder(
public static JsonBuilder<JsonObject> prepareIosMobileJsonBuilder(
@Nonnull final Localization localization,
@Nonnull final ContentCountry contentCountry) {

// Try to extract the visitor data from the sw.js_data API, but otherwise
// fall back to randomly generating the visitor data.
String visitorData = null;
try {
visitorData = extractVisitorData();
} catch (ParsingException | IOException | ReCaptchaException e) {
visitorData = randomVisitorData(contentCountry);
}

// @formatter:off
return JsonObject.builder()
.object("context")
Expand All @@ -1273,7 +1322,7 @@ public static JsonBuilder<JsonObject> prepareIosMobileJsonBuilder(
// The build version corresponding to the iOS version used can be found on
// https://theapplewiki.com/wiki/Firmware/iPhone/18.x#iPhone_15_Pro_Max
.value("osVersion", "18.2.1.22C161")
.value("visitorData", randomVisitorData(contentCountry))
.value("visitorData", visitorData)
.value("hl", localization.getLocalizationCode())
.value("gl", contentCountry.getCountryCode())
.end()
Expand Down

0 comments on commit 66fd475

Please sign in to comment.