Skip to content

Commit 247202d

Browse files
author
Misha
authored
Merge pull request #128 from apify/fix/relative-url-regex
2 parents 5450dcb + 969cfd5 commit 247202d

File tree

5 files changed

+51
-30
lines changed

5 files changed

+51
-30
lines changed

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
0.7.6 / 2021/04/14
2+
==================
3+
4+
- Fix relative link rendering
5+
16
0.7.5 / 2021/04/9
27
==================
38

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "apify-shared",
3-
"version": "0.7.5",
3+
"version": "0.7.6",
44
"description": "Tools and constants shared across Apify projects.",
55
"main": "build/index.js",
66
"types": "types/index.d.ts",

src/consts.js

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -603,3 +603,5 @@ export const MARKETPLACE_USER_ROLES = {
603603
DATA_EXPERT: 'DATA_EXPERT',
604604
CUSTOMER: 'CUSTOMER',
605605
};
606+
607+
export const GIT_MAIN_BRANCH = 'main';

src/markdown_renderers.js

Lines changed: 40 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
import gitUrlParse from 'git-url-parse';
22
import * as utils from './utilities.client';
3+
import { GIT_MAIN_BRANCH } from './consts';
4+
5+
const regex = require('./regexs');
36

47
/**
58
* @param {string} headingId
@@ -65,10 +68,10 @@ export const customHeadingRenderer = (text, level, raw) => {
6568
};
6669

6770
/**
68-
* @param {string} repoUrl
71+
* @param {string} gitRepoUrl
6972
*/
70-
export const parseRepoName = (repoUrl) => {
71-
const parsedRepoUrl = gitUrlParse(repoUrl);
73+
export const parseRepoName = (gitRepoUrl) => {
74+
const parsedRepoUrl = gitUrlParse(gitRepoUrl);
7275
// Can't use parsedRepoUrl.full_name on it's own as Bitbucket adds irrelevant path suffix to the end of it
7376
const repoName = parsedRepoUrl.full_name.split('/').slice(0, 2).join('/');
7477
return repoName;
@@ -77,18 +80,21 @@ export const parseRepoName = (repoUrl) => {
7780
/**
7881
* Generates URLs for RAW content such as images
7982
*
80-
* @param {string} repoUrl
81-
* @param {string} branchName
83+
* @param {string} gitRepoUrl
84+
* @param {string} gitBranchName
8285
*/
83-
export const generateRawGitRepoUrlPrefix = (repoUrl, branchName) => {
86+
export const generateRawGitRepoUrlPrefix = (gitRepoUrl, gitBranchName) => {
8487
let urlPrefix;
85-
const repoFullName = parseRepoName(repoUrl);
88+
const repoFullName = parseRepoName(gitRepoUrl);
8689

87-
if (repoUrl.includes('github.com')) {
90+
// Avoid errors created by missing branch name / badly formed URLs
91+
const branchName = gitBranchName || GIT_MAIN_BRANCH;
92+
93+
if (gitRepoUrl.includes('github.com')) {
8894
urlPrefix = `https://raw.githubusercontent.com/${repoFullName}/${branchName}`;
89-
} else if (repoUrl.includes('gitlab.com')) {
95+
} else if (gitRepoUrl.includes('gitlab.com')) {
9096
urlPrefix = `https://gitlab.com/${repoFullName}/-/raw/${branchName}`;
91-
} else if (repoUrl.includes('bitbucket.org')) {
97+
} else if (gitRepoUrl.includes('bitbucket.org')) {
9298
// Note: bytebucket is a raw content serving service by Bitbucket
9399
urlPrefix = `https://bytebucket.org/${repoFullName}/raw/${branchName}`;
94100
}
@@ -98,13 +104,13 @@ export const generateRawGitRepoUrlPrefix = (repoUrl, branchName) => {
98104
/**
99105
* Generates URLs for files and folders
100106
*
101-
* @param {string} repoUrl
102-
* @param {string} branchName
107+
* @param {string} gitRepoUrl
108+
* @param {string} gitBranchName
103109
* @param {string} href
104110
*/
105-
export const generateGitRepoUrlPrefix = (repoUrl, branchName, href) => {
111+
export const generateGitRepoUrlPrefix = (gitRepoUrl, gitBranchName, href) => {
106112
let urlPrefix;
107-
const repoFullName = parseRepoName(repoUrl);
113+
const repoFullName = parseRepoName(gitRepoUrl);
108114

109115
const hrefParts = href.split('/');
110116
const lastHrefPart = hrefParts[hrefParts.length - 1];
@@ -113,11 +119,14 @@ export const generateGitRepoUrlPrefix = (repoUrl, branchName, href) => {
113119
// otherwise we assume the link is for a directory (tree)
114120
const isTreeOrBlob = lastHrefPart.includes('.') ? 'blob' : 'tree';
115121

116-
if (repoUrl.includes('github.com')) {
122+
// Avoid errors created by missing branch name / badly formed URLs
123+
const branchName = gitBranchName || GIT_MAIN_BRANCH;
124+
125+
if (gitRepoUrl.includes('github.com')) {
117126
urlPrefix = `https://github.com/${repoFullName}/${isTreeOrBlob}/${branchName}`;
118-
} else if (repoUrl.includes('gitlab.com')) {
127+
} else if (gitRepoUrl.includes('gitlab.com')) {
119128
urlPrefix = `https://gitlab.com/${repoFullName}/-/${isTreeOrBlob}/${branchName}`;
120-
} else if (repoUrl.includes('bitbucket.org')) {
129+
} else if (gitRepoUrl.includes('bitbucket.org')) {
121130
// Note: bytebucket is a raw content serving service by Bitbucket
122131
urlPrefix = `https://bitbucket.org/${repoFullName}/src/${branchName}`;
123132
}
@@ -127,25 +136,27 @@ export const generateGitRepoUrlPrefix = (repoUrl, branchName, href) => {
127136
/**
128137
* Replaces relative links with absolute ones that point to the actor's git repo.
129138
* Mainly for use in actor READMES
130-
* Parses the actor's repo URL to extract the repo name and owner name.
139+
* The flow:
140+
* 1) handle anchors, Apify links, and contact links (these don't point to a git repo and shouldn't have rel=nofollow).
141+
* 2) handle relative links for the Git repo and convert them to absolute
142+
* 3) handle absolute links
131143
* @param {string} href
132144
* @param {string} text
133-
* @param {string} repoUrl
134-
* @param {string} branchName
145+
* @param {string} gitRepoUrl
146+
* @param {string} gitBranchName
135147
* @return {string}
136148
*/
137-
export const customLinkRenderer = (href, text, repoUrl, branchName) => {
149+
export const customLinkRenderer = (href, text, gitRepoUrl, gitBranchName) => {
138150
// Handle anchor links, local Apify links, and mailto
139151
// Return Apify domain links without rel="nofollow" for SEO
140-
const contactLinkRegex = new RegExp('^(mailto|tel|sms):.*$', 'i');
141-
if (href.startsWith('#') || href.includes('apify.com') || contactLinkRegex.test(href)) {
152+
if (href.startsWith('#') || href.includes('apify.com') || regex.CONTACT_LINK_REGEX.test(href)) {
142153
// Ensure that anchors have lowercase href
143154
return `<a href="${href.toLowerCase()}">${text}</a>`;
144155
}
145156
// Only target relative URLs, which are used to refer to the git repo, and not anchors or absolute URLs
146157
const urlIsRelative = utils.isUrlRelative(href);
147-
if (urlIsRelative) {
148-
const urlPrefix = generateGitRepoUrlPrefix(repoUrl, branchName, href);
158+
if (urlIsRelative && gitRepoUrl) {
159+
const urlPrefix = generateGitRepoUrlPrefix(gitRepoUrl, gitBranchName, href);
149160
// Since the README will always be in the root, the hrefs will have the same prefix, which needs to be taken off for the URL
150161
const cleanedHref = href.startsWith('./') ? href.replace('./', '') : href;
151162
href = `${urlPrefix}/${cleanedHref}`;
@@ -160,15 +171,15 @@ export const customLinkRenderer = (href, text, repoUrl, branchName) => {
160171
* Parses the actor's repo URL to extract the repo name and owner name.
161172
* @param {string} href
162173
* @param {string} text
163-
* @param {string} repoUrl
174+
* @param {string} gitRepoUrl
164175
* @param {string} gitBranchName
165176
* @return {string}
166177
*/
167-
export const customImageRenderer = (href, text, repoUrl, gitBranchName) => {
178+
export const customImageRenderer = (href, text, gitRepoUrl, gitBranchName) => {
168179
// Only target relative URLs, which are used to refer to the git repo, and not anchors or absolute URLs
169180
const urlIsRelative = utils.isUrlRelative(href);
170-
if (urlIsRelative) {
171-
const urlPrefix = generateRawGitRepoUrlPrefix(repoUrl, gitBranchName);
181+
if (urlIsRelative && gitRepoUrl) {
182+
const urlPrefix = generateRawGitRepoUrlPrefix(gitRepoUrl, gitBranchName);
172183
// Since the README will always be in the root, the hrefs will have the same prefix, which needs to be taken off for the URL
173184
const cleanedHref = href.startsWith('./') ? href.replace('./', '') : href;
174185
href = `${urlPrefix}/${cleanedHref}`;

src/regexs.js

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,3 +110,6 @@ export const SPLIT_PATH_REGEX = /[^/]+/g;
110110

111111
// Check if a URL is relative, i.e. does not start with a protocol
112112
export const RELATIVE_URL_REGEX = new RegExp('^(?!www.|(?:http|ftp)s?://|[A-Za-z]:\\|//).*', 'i');
113+
114+
// Check if a link is a mailto/tel/sms type
115+
export const CONTACT_LINK_REGEX = new RegExp('^(mailto|tel|sms):.*$', 'i');

0 commit comments

Comments
 (0)