11import gitUrlParse from 'git-url-parse' ;
22import * as utils from './utilities.client' ;
3+ import { GIT_MAIN_BRANCH } from './consts' ;
4+
5+ const regex = require ( './regexs' ) ;
36
47/**
58 * @param {string } headingId
@@ -65,10 +68,10 @@ export const customHeadingRenderer = (text, level, raw) => {
6568} ;
6669
6770/**
68- * @param {string } repoUrl
71+ * @param {string } gitRepoUrl
6972 */
70- export const parseRepoName = ( repoUrl ) => {
71- const parsedRepoUrl = gitUrlParse ( repoUrl ) ;
73+ export const parseRepoName = ( gitRepoUrl ) => {
74+ const parsedRepoUrl = gitUrlParse ( gitRepoUrl ) ;
7275 // Can't use parsedRepoUrl.full_name on it's own as Bitbucket adds irrelevant path suffix to the end of it
7376 const repoName = parsedRepoUrl . full_name . split ( '/' ) . slice ( 0 , 2 ) . join ( '/' ) ;
7477 return repoName ;
@@ -77,18 +80,21 @@ export const parseRepoName = (repoUrl) => {
7780/**
7881 * Generates URLs for RAW content such as images
7982 *
80- * @param {string } repoUrl
81- * @param {string } branchName
83+ * @param {string } gitRepoUrl
84+ * @param {string } gitBranchName
8285 */
83- export const generateRawGitRepoUrlPrefix = ( repoUrl , branchName ) => {
86+ export const generateRawGitRepoUrlPrefix = ( gitRepoUrl , gitBranchName ) => {
8487 let urlPrefix ;
85- const repoFullName = parseRepoName ( repoUrl ) ;
88+ const repoFullName = parseRepoName ( gitRepoUrl ) ;
8689
87- if ( repoUrl . includes ( 'github.com' ) ) {
90+ // Avoid errors created by missing branch name / badly formed URLs
91+ const branchName = gitBranchName || GIT_MAIN_BRANCH ;
92+
93+ if ( gitRepoUrl . includes ( 'github.com' ) ) {
8894 urlPrefix = `https://raw.githubusercontent.com/${ repoFullName } /${ branchName } ` ;
89- } else if ( repoUrl . includes ( 'gitlab.com' ) ) {
95+ } else if ( gitRepoUrl . includes ( 'gitlab.com' ) ) {
9096 urlPrefix = `https://gitlab.com/${ repoFullName } /-/raw/${ branchName } ` ;
91- } else if ( repoUrl . includes ( 'bitbucket.org' ) ) {
97+ } else if ( gitRepoUrl . includes ( 'bitbucket.org' ) ) {
9298 // Note: bytebucket is a raw content serving service by Bitbucket
9399 urlPrefix = `https://bytebucket.org/${ repoFullName } /raw/${ branchName } ` ;
94100 }
@@ -98,13 +104,13 @@ export const generateRawGitRepoUrlPrefix = (repoUrl, branchName) => {
98104/**
99105 * Generates URLs for files and folders
100106 *
101- * @param {string } repoUrl
102- * @param {string } branchName
107+ * @param {string } gitRepoUrl
108+ * @param {string } gitBranchName
103109 * @param {string } href
104110 */
105- export const generateGitRepoUrlPrefix = ( repoUrl , branchName , href ) => {
111+ export const generateGitRepoUrlPrefix = ( gitRepoUrl , gitBranchName , href ) => {
106112 let urlPrefix ;
107- const repoFullName = parseRepoName ( repoUrl ) ;
113+ const repoFullName = parseRepoName ( gitRepoUrl ) ;
108114
109115 const hrefParts = href . split ( '/' ) ;
110116 const lastHrefPart = hrefParts [ hrefParts . length - 1 ] ;
@@ -113,11 +119,14 @@ export const generateGitRepoUrlPrefix = (repoUrl, branchName, href) => {
113119 // otherwise we assume the link is for a directory (tree)
114120 const isTreeOrBlob = lastHrefPart . includes ( '.' ) ? 'blob' : 'tree' ;
115121
116- if ( repoUrl . includes ( 'github.com' ) ) {
122+ // Avoid errors created by missing branch name / badly formed URLs
123+ const branchName = gitBranchName || GIT_MAIN_BRANCH ;
124+
125+ if ( gitRepoUrl . includes ( 'github.com' ) ) {
117126 urlPrefix = `https://github.com/${ repoFullName } /${ isTreeOrBlob } /${ branchName } ` ;
118- } else if ( repoUrl . includes ( 'gitlab.com' ) ) {
127+ } else if ( gitRepoUrl . includes ( 'gitlab.com' ) ) {
119128 urlPrefix = `https://gitlab.com/${ repoFullName } /-/${ isTreeOrBlob } /${ branchName } ` ;
120- } else if ( repoUrl . includes ( 'bitbucket.org' ) ) {
129+ } else if ( gitRepoUrl . includes ( 'bitbucket.org' ) ) {
121130 // Note: bytebucket is a raw content serving service by Bitbucket
122131 urlPrefix = `https://bitbucket.org/${ repoFullName } /src/${ branchName } ` ;
123132 }
@@ -127,25 +136,27 @@ export const generateGitRepoUrlPrefix = (repoUrl, branchName, href) => {
127136/**
128137 * Replaces relative links with absolute ones that point to the actor's git repo.
129138 * Mainly for use in actor READMES
130- * Parses the actor's repo URL to extract the repo name and owner name.
139+ * The flow:
140+ * 1) handle anchors, Apify links, and contact links (these don't point to a git repo and shouldn't have rel=nofollow).
141+ * 2) handle relative links for the Git repo and convert them to absolute
142+ * 3) handle absolute links
131143 * @param {string } href
132144 * @param {string } text
133- * @param {string } repoUrl
134- * @param {string } branchName
145+ * @param {string } gitRepoUrl
146+ * @param {string } gitBranchName
135147 * @return {string }
136148*/
137- export const customLinkRenderer = ( href , text , repoUrl , branchName ) => {
149+ export const customLinkRenderer = ( href , text , gitRepoUrl , gitBranchName ) => {
138150 // Handle anchor links, local Apify links, and mailto
139151 // Return Apify domain links without rel="nofollow" for SEO
140- const contactLinkRegex = new RegExp ( '^(mailto|tel|sms):.*$' , 'i' ) ;
141- if ( href . startsWith ( '#' ) || href . includes ( 'apify.com' ) || contactLinkRegex . test ( href ) ) {
152+ if ( href . startsWith ( '#' ) || href . includes ( 'apify.com' ) || regex . CONTACT_LINK_REGEX . test ( href ) ) {
142153 // Ensure that anchors have lowercase href
143154 return `<a href="${ href . toLowerCase ( ) } ">${ text } </a>` ;
144155 }
145156 // Only target relative URLs, which are used to refer to the git repo, and not anchors or absolute URLs
146157 const urlIsRelative = utils . isUrlRelative ( href ) ;
147- if ( urlIsRelative ) {
148- const urlPrefix = generateGitRepoUrlPrefix ( repoUrl , branchName , href ) ;
158+ if ( urlIsRelative && gitRepoUrl ) {
159+ const urlPrefix = generateGitRepoUrlPrefix ( gitRepoUrl , gitBranchName , href ) ;
149160 // Since the README will always be in the root, the hrefs will have the same prefix, which needs to be taken off for the URL
150161 const cleanedHref = href . startsWith ( './' ) ? href . replace ( './' , '' ) : href ;
151162 href = `${ urlPrefix } /${ cleanedHref } ` ;
@@ -160,15 +171,15 @@ export const customLinkRenderer = (href, text, repoUrl, branchName) => {
160171 * Parses the actor's repo URL to extract the repo name and owner name.
161172 * @param {string } href
162173 * @param {string } text
163- * @param {string } repoUrl
174+ * @param {string } gitRepoUrl
164175 * @param {string } gitBranchName
165176 * @return {string }
166177*/
167- export const customImageRenderer = ( href , text , repoUrl , gitBranchName ) => {
178+ export const customImageRenderer = ( href , text , gitRepoUrl , gitBranchName ) => {
168179 // Only target relative URLs, which are used to refer to the git repo, and not anchors or absolute URLs
169180 const urlIsRelative = utils . isUrlRelative ( href ) ;
170- if ( urlIsRelative ) {
171- const urlPrefix = generateRawGitRepoUrlPrefix ( repoUrl , gitBranchName ) ;
181+ if ( urlIsRelative && gitRepoUrl ) {
182+ const urlPrefix = generateRawGitRepoUrlPrefix ( gitRepoUrl , gitBranchName ) ;
172183 // Since the README will always be in the root, the hrefs will have the same prefix, which needs to be taken off for the URL
173184 const cleanedHref = href . startsWith ( './' ) ? href . replace ( './' , '' ) : href ;
174185 href = `${ urlPrefix } /${ cleanedHref } ` ;
0 commit comments