Skip to content

Commit

Permalink
Use Description object for comments text.
Browse files Browse the repository at this point in the history
  • Loading branch information
FireMasterK committed Nov 28, 2022
1 parent 60fb30f commit 39bb66b
Show file tree
Hide file tree
Showing 9 changed files with 41 additions and 30 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,14 @@
import org.schabi.newpipe.extractor.InfoItem;
import org.schabi.newpipe.extractor.Page;
import org.schabi.newpipe.extractor.localization.DateWrapper;
import org.schabi.newpipe.extractor.stream.Description;

import javax.annotation.Nullable;

public class CommentsInfoItem extends InfoItem {

private String commentId;
private String commentText;
private Description commentText;
private String uploaderName;
private String uploaderAvatarUrl;
private String uploaderUrl;
Expand Down Expand Up @@ -43,11 +44,11 @@ public void setCommentId(final String commentId) {
this.commentId = commentId;
}

public String getCommentText() {
public Description getCommentText() {
return commentText;
}

public void setCommentText(final String commentText) {
public void setCommentText(final Description commentText) {
this.commentText = commentText;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.localization.DateWrapper;
import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeCommentsInfoItemExtractor;
import org.schabi.newpipe.extractor.stream.Description;
import org.schabi.newpipe.extractor.stream.StreamExtractor;

import javax.annotation.Nullable;
Expand Down Expand Up @@ -41,8 +42,8 @@ default String getTextualLikeCount() throws ParsingException {
/**
* The text of the comment
*/
default String getCommentText() throws ParsingException {
return "";
default Description getCommentText() throws ParsingException {
return null;
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import org.jsoup.nodes.Element;
import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.stream.Description;

import java.util.Objects;

Expand All @@ -18,7 +19,7 @@ public BandcampCommentsInfoItemExtractor(final Element writing, final String url

@Override
public String getName() throws ParsingException {
return getCommentText();
return getCommentText().getContent();
}

@Override
Expand All @@ -32,12 +33,14 @@ public String getThumbnailUrl() throws ParsingException {
}

@Override
public String getCommentText() throws ParsingException {
return writing.getElementsByClass("text").stream()
public Description getCommentText() throws ParsingException {
final var text = writing.getElementsByClass("text").stream()
.filter(Objects::nonNull)
.map(Element::ownText)
.findFirst()
.orElseThrow(() -> new ParsingException("Could not get comment text"));

return new Description(text, Description.PLAIN_TEXT);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.localization.DateWrapper;
import org.schabi.newpipe.extractor.services.peertube.PeertubeParsingHelper;
import org.schabi.newpipe.extractor.stream.Description;
import org.schabi.newpipe.extractor.utils.JsonUtils;

import java.util.Objects;
Expand Down Expand Up @@ -59,13 +60,15 @@ public DateWrapper getUploadDate() throws ParsingException {
}

@Override
public String getCommentText() throws ParsingException {
public Description getCommentText() throws ParsingException {
final String htmlText = JsonUtils.getString(item, "text");
try {
final Document doc = Jsoup.parse(htmlText);
return doc.body().text();
final var text = doc.body().text();
return new Description(text, Description.PLAIN_TEXT);
} catch (final Exception e) {
return htmlText.replaceAll("(?s)<[^>]*>(\\s*<[^>]*>)*", "");
final var text = htmlText.replaceAll("(?s)<[^>]*>(\\s*<[^>]*>)*", "");
return new Description(text, Description.PLAIN_TEXT);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.localization.DateWrapper;
import org.schabi.newpipe.extractor.services.soundcloud.SoundcloudParsingHelper;
import org.schabi.newpipe.extractor.stream.Description;

import javax.annotation.Nullable;
import java.util.Objects;
Expand All @@ -24,8 +25,8 @@ public String getCommentId() {
}

@Override
public String getCommentText() {
return json.getString("body");
public Description getCommentText() {
return new Description(json.getString("body"), Description.PLAIN_TEXT);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
package org.schabi.newpipe.extractor.services.youtube.extractors;

import static org.schabi.newpipe.extractor.comments.CommentsInfoItem.UNKNOWN_REPLY_COUNT;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject;

import com.grack.nanojson.JsonArray;
import com.grack.nanojson.JsonObject;

import org.schabi.newpipe.extractor.Page;
import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.localization.DateWrapper;
import org.schabi.newpipe.extractor.localization.TimeAgoParser;
import org.schabi.newpipe.extractor.stream.Description;
import org.schabi.newpipe.extractor.utils.JsonUtils;
import org.schabi.newpipe.extractor.utils.Utils;

import javax.annotation.Nullable;

import static org.schabi.newpipe.extractor.comments.CommentsInfoItem.UNKNOWN_REPLY_COUNT;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject;

public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtractor {

private final JsonObject json;
Expand Down Expand Up @@ -176,18 +176,20 @@ public String getTextualLikeCount() throws ParsingException {
}

@Override
public String getCommentText() throws ParsingException {
public Description getCommentText() throws ParsingException {
try {
final JsonObject contentText = JsonUtils.getObject(getCommentRenderer(), "contentText");
if (contentText.isEmpty()) {
// completely empty comments as described in
// https://github.com/TeamNewPipe/NewPipeExtractor/issues/380#issuecomment-668808584
return "";
return new Description("", Description.PLAIN_TEXT);
}
final String commentText = getTextFromObject(contentText, true);
// YouTube adds U+FEFF in some comments.
// eg. https://www.youtube.com/watch?v=Nj4F63E59io<feff>
return Utils.removeUTF8BOM(commentText);
final String commentTextBomRemoved = Utils.removeUTF8BOM(commentText);

return new Description(commentTextBomRemoved, Description.HTML);
} catch (final Exception e) {
throw new ParsingException("Could not get comment text", e);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ public void testGetCommentsAllData() throws IOException, ExtractionException {
for (CommentsInfoItem c : comments.getItems()) {
assertFalse(Utils.isBlank(c.getUploaderName()));
assertFalse(Utils.isBlank(c.getUploaderAvatarUrl()));
assertFalse(Utils.isBlank(c.getCommentText()));
assertFalse(Utils.isBlank(c.getCommentText().getContent()));
assertFalse(Utils.isBlank(c.getName()));
assertFalse(Utils.isBlank(c.getThumbnailUrl()));
assertFalse(Utils.isBlank(c.getUrl()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ void testGetCommentsAllData() throws IOException, ExtractionException {
assertFalse(Utils.isBlank(c.getUploaderName()));
assertFalse(Utils.isBlank(c.getUploaderAvatarUrl()));
assertFalse(Utils.isBlank(c.getCommentId()));
assertFalse(Utils.isBlank(c.getCommentText()));
assertFalse(Utils.isBlank(c.getCommentText().getContent()));
assertFalse(Utils.isBlank(c.getName()));
assertFalse(Utils.isBlank(c.getTextualUploadDate()));
assertFalse(Utils.isBlank(c.getThumbnailUrl()));
Expand All @@ -91,7 +91,7 @@ private boolean findInComments(InfoItemsPage<CommentsInfoItem> comments, String

private boolean findInComments(List<CommentsInfoItem> comments, String comment) {
for (CommentsInfoItem c : comments) {
if (c.getCommentText().contains(comment)) {
if (c.getCommentText().getContent().contains(comment)) {
return true;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ public void testGetCommentsAllData() throws IOException, ExtractionException {
assertFalse(Utils.isBlank(c.getUploaderName()));
assertFalse(Utils.isBlank(c.getUploaderAvatarUrl()));
assertFalse(Utils.isBlank(c.getCommentId()));
assertFalse(Utils.isBlank(c.getCommentText()));
assertFalse(Utils.isBlank(c.getCommentText().getContent()));
assertFalse(Utils.isBlank(c.getName()));
assertFalse(Utils.isBlank(c.getTextualUploadDate()));
assertNotNull(c.getUploadDate());
Expand All @@ -111,7 +111,7 @@ private boolean findInComments(InfoItemsPage<CommentsInfoItem> comments, String

private boolean findInComments(List<CommentsInfoItem> comments, String comment) {
for (CommentsInfoItem c : comments) {
if (c.getCommentText().contains(comment)) {
if (c.getCommentText().getContent().contains(comment)) {
return true;
}
}
Expand Down Expand Up @@ -152,9 +152,9 @@ public void testGetCommentsAllData() throws IOException, ExtractionException {
assertFalse(Utils.isBlank(c.getUrl()));
assertTrue(c.getLikeCount() >= 0);
if (c.getCommentId().equals("Ugga_h1-EXdHB3gCoAEC")) { // comment without text
assertTrue(Utils.isBlank(c.getCommentText()));
assertTrue(Utils.isBlank(c.getCommentText().getContent()));
} else {
assertFalse(Utils.isBlank(c.getCommentText()));
assertFalse(Utils.isBlank(c.getCommentText().getContent()));
}
}
}
Expand Down Expand Up @@ -193,7 +193,7 @@ public void testGetCommentsAllData() throws IOException, ExtractionException {
assertFalse(Utils.isBlank(c.getThumbnailUrl()));
assertFalse(Utils.isBlank(c.getUrl()));
assertTrue(c.getLikeCount() >= 0);
assertFalse(Utils.isBlank(c.getCommentText()));
assertFalse(Utils.isBlank(c.getCommentText().getContent()));
if (c.isHeartedByUploader()) {
heartedByUploader = true;
}
Expand Down Expand Up @@ -233,7 +233,7 @@ public void testGetCommentsAllData() throws IOException, ExtractionException {
assertFalse(Utils.isBlank(c.getThumbnailUrl()));
assertFalse(Utils.isBlank(c.getUrl()));
assertTrue(c.getLikeCount() >= 0);
assertFalse(Utils.isBlank(c.getCommentText()));
assertFalse(Utils.isBlank(c.getCommentText().getContent()));
}

assertTrue(comments.getItems().get(0).isPinned(), "First comment isn't pinned");
Expand Down Expand Up @@ -328,7 +328,7 @@ public void testGetCommentsFirstReplies() throws IOException, ExtractionExceptio

InfoItemsPage<CommentsInfoItem> replies = extractor.getPage(firstComment.getReplies());

assertEquals("First", replies.getItems().get(0).getCommentText(),
assertEquals("First", replies.getItems().get(0).getCommentText().getContent(),
"First reply comment did not match");
}

Expand Down

0 comments on commit 39bb66b

Please sign in to comment.