Skip to content
This repository has been archived by the owner on Jan 16, 2023. It is now read-only.

Commit

Permalink
fix: deduplicated translate
Browse files Browse the repository at this point in the history
  • Loading branch information
theowenyoung committed Nov 7, 2022
1 parent 5e0b163 commit d4176fb
Show file tree
Hide file tree
Showing 6 changed files with 109 additions and 40 deletions.
8 changes: 8 additions & 0 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,14 @@ Firefox商店[沉浸式翻译](https://addons.mozilla.org/en-US/firefox/addon/im

---

## Todo


- [ ] - backup默认文件名还是TWP
- [ ] - option页面从extension中打开的显示问题


---

## Install

Expand Down
2 changes: 2 additions & 0 deletions src/background/background.js
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ chrome.runtime.onMessage.addListener((request, sender, sendResponse) => {
return true
} else if (request.action === "getTabHostName") {
sendResponse(new URL(sender.tab.url).hostname)
}else if (request.action === "getTabUrl") {
sendResponse((sender.tab.url))
} else if (request.action === "thisFrameIsInFocus") {
chrome.tabs.sendMessage(sender.tab.id, {action: "anotherFrameIsInFocus"}, checkedLastError)
} else if (request.action === "getTabMimeType") {
Expand Down
2 changes: 1 addition & 1 deletion src/chrome_manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"default_locale": "en",
"name": "Immersive Translate - 沉浸式翻译",
"description": "支持整页翻译·双语显示·只翻译正文。布局全面优化,和浏览器阅读模式类似,对Twitter, Reddit等网站做了定制优化",
"version": "0.0.7",
"version": "0.0.9",
"homepage_url": "https://github.com/theowenyoung/Traduzir-paginas-web",

"commands": {
Expand Down
116 changes: 82 additions & 34 deletions src/contentScript/enhance.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@ const enhanceOriginalDisplayValueAttributeName = "data-translationoriginaldispla
const enhanceHtmlTagsInlineIgnore = ['BR', 'CODE', 'KBD', 'WBR'] // and input if type is submit or button, and pre depending on settings
const enhanceHtmlTagsNoTranslate = ['TITLE', 'SCRIPT', 'STYLE', 'TEXTAREA', 'SVG', 'svg'] //TODO verificar porque 'svg' é com letras minúsculas
const blockElements = [
'H1', 'H2', 'H3', 'H4', 'H5', 'H6', 'OL', 'P', 'TABLE', 'UL'
'H1', 'H2', 'H3', 'H4', 'H5', 'H6','TABLE', 'OL', 'UL', 'P',
];
const headingElements = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'];

const pdfSelectorsConfig = {
regex:
Expand Down Expand Up @@ -98,7 +99,18 @@ const translateSelectors = [

]
},
pdfSelectorsConfig
pdfSelectorsConfig,
{
hostname:"www.cell.com",
selectors:[
"div.section-paragraph > div.section-paragraph > div.section-paragraph",
"section > div.section-paragraph",
"h4","h3","h2"
],
blockSelectors:[
"div"
]
}

]

Expand All @@ -109,8 +121,8 @@ const containerSelectorConfigs = [
}
]

function getAllBlocksSelectors(){
const currentUrl = window.location.href;
function getAllBlocksSelectors(ctx){
const currentUrl = ctx.tabUrl;
const currentUrlObj = new URL(currentUrl);
const currentHostname = currentUrlObj.hostname;
const currentUrlWithoutSearch = currentUrlObj.origin + currentUrlObj.pathname;
Expand Down Expand Up @@ -145,10 +157,10 @@ function getAllBlocksSelectors(){
}
return allNodesSelectors;
}
const allBlocksSelectors = getAllBlocksSelectors();

function getContainerSelector(){
const currentUrl = window.location.href;

function getContainerSelector(ctx){
const currentUrl = ctx.tabUrl;
const currentUrlObj = new URL(currentUrl);
const currentUrlWithoutSearch = currentUrlObj.origin + currentUrlObj.pathname;
const currentHostname = currentUrlObj.hostname;
Expand All @@ -164,7 +176,6 @@ function getContainerSelector(){
}
}
}
const containerSelector = getContainerSelector();

function isValidNode(node){
if(node.hasAttribute && node.hasAttribute(enhanceMarkAttributeName)){
Expand All @@ -177,6 +188,16 @@ function isValidNode(node){
node.isContentEditable) {
return false
}

// check is parent has enhanceMarkAttributeName
if(node.parentNode && node.parentNode.hasAttribute && node.parentNode.hasAttribute(enhanceMarkAttributeName)){
return false;
}
// check ancestors
if(node.closest && node.closest(`[${enhanceMarkAttributeName}=copiedNode]`)){
return false;
}

// check is there is notranslate class
return true;
}
Expand Down Expand Up @@ -234,23 +255,33 @@ function getTitleContainer(root,hostname){

}
}
function getNodesThatNeedToTranslate(root,hostname,options){
function isDuplicatedChild(array,child){
for(const item of array){
if(item.contains(child)){
return true;
}
}
return false;
}
function getNodesThatNeedToTranslate(root,ctx,options){
options = options || {};
const allBlocksSelectors = getAllBlocksSelectors(ctx);
// all block nodes, nodes should have a order from top to bottom
let allNodes = [];

const currentUrl = window.location.href;
const currentUrl = ctx.tabUrl;
const currentUrlObj = new URL(currentUrl);
const currentUrlWithoutSearch = currentUrlObj.origin + currentUrlObj.pathname;
const currentHostname = currentUrlObj.hostname;
let currentTargetLanguage = twpConfig.get("targetLanguage")

// check sites
// console.log("allBlocksSelectors",root, allBlocksSelectors)
if(allBlocksSelectors.length>0){
for(const selector of allBlocksSelectors){
const nodes = root.querySelectorAll(selector);
for(const node of nodes){
if(hostname==="twitter.com"){
if(currentHostname==="twitter.com"){
// check language
try{
const lang = node.getAttribute("lang");
Expand All @@ -263,39 +294,54 @@ function getNodesThatNeedToTranslate(root,hostname,options){
}
}

if(isValidNode(node)){
if(isValidNode(node) && !isDuplicatedChild(allNodes,node)){
allNodes.push(node);
}
}
}
}else{
const titleContainer = getTitleContainer(root,hostname);
if(titleContainer){
allNodes.push(titleContainer);
}
// const titleContainer = getTitleContainer(root,hostname);
// if(titleContainer){
// allNodes.push(titleContainer);
// }
const contentContainer = getContainer(root);
// }
const contentContainer = getContainer(root,ctx);
console.log("contentContainer", contentContainer)
if(contentContainer){
// get all paragraphs
for(const blockTag of blockElements){
const paragraphs = contentContainer.querySelectorAll(blockTag.toLowerCase());
for (const paragraph of paragraphs) {
if(isValidNode(paragraph)){
allNodes.push(paragraph);
}
}
}
}else{
for(const blockTag of blockElements){
const paragraphs = root.querySelectorAll(blockTag.toLowerCase());
for (const paragraph of paragraphs) {
if(isValidNode(paragraph)){
allNodes.push(paragraph);
}
root = contentContainer;

}

for(const blockTag of blockElements){
const paragraphs = root.querySelectorAll(blockTag.toLowerCase());
for (const paragraph of paragraphs) {
if(isValidNode(paragraph) && !isDuplicatedChild(allNodes,paragraph)){
allNodes.push(paragraph);
}
}
}
}
// add addition heading nodes
// for(const headingTag of headingElements){
// const headings = root.querySelectorAll(headingTag.toLowerCase());
// for (const heading of headings) {
// if(isValidNode(heading)){
// // check if there is already exist in allNodes
// let isExist = false;
// for(const node of allNodes){
// if(node === heading){
// isExist = true;
// break;
// }
// }
// if(!isExist){
// allNodes.push(heading);
// }
// }
// }
// }


// sort allNodes, from top to bottom
allNodes.sort(function(a, b) {
return a.compareDocumentPosition(b) & Node.DOCUMENT_POSITION_FOLLOWING ? -1 : 1;
Expand Down Expand Up @@ -348,7 +394,9 @@ function getNodesThatNeedToTranslate(root,hostname,options){

// get the main container, copy from: https://github.com/ZachSaucier/Just-Read/blob/master/content_script.js

function getContainer(root) {
function getContainer(root,ctx) {

const containerSelector = getContainerSelector(ctx);

if(containerSelector){
const container = root.querySelector(containerSelector);
Expand Down
19 changes: 15 additions & 4 deletions src/contentScript/pageTranslator.js
Original file line number Diff line number Diff line change
Expand Up @@ -207,9 +207,20 @@ function getTabHostName() {
return new Promise(resolve => chrome.runtime.sendMessage({action: "getTabHostName"}, result => resolve(result)))
}

Promise.all([twpConfig.onReady(), getTabHostName()])
function getTabUrl() {
return new Promise(resolve => chrome.runtime.sendMessage({action: "getTabUrl"}, result => resolve(result)))
}
Promise.all([twpConfig.onReady(), getTabUrl()])
.then(function (_) {
const tabHostName = _[1]
const tabUrl = _[1];
const tabUrlObj = new URL(tabUrl);
const tabHostName = tabUrlObj.hostname;
const tabUrlWithoutSearch = tabUrlObj.origin + tabUrlObj.pathname;
const ctx = {
tabUrl,
tabHostName,
tabUrlWithoutSearch,
}
const htmlTagsInlineText = ['#text', 'A', 'ABBR', 'ACRONYM', 'B', 'BDO', 'BIG', 'CITE', 'DFN', 'EM', 'I', 'LABEL', 'Q', 'S', 'SMALL', 'SPAN', 'STRONG', 'SUB', 'SUP', 'U', 'TT', 'VAR']
const htmlTagsInlineIgnore = ['BR', 'CODE', 'KBD', 'WBR'] // and input if type is submit or button, and pre depending on settings
const htmlTagsNoTranslate = ['TITLE', 'SCRIPT', 'STYLE', 'TEXTAREA', 'SVG', 'svg'] //TODO verificar porque 'svg' é com letras minúsculas
Expand Down Expand Up @@ -262,7 +273,7 @@ Promise.all([twpConfig.onReady(), getTabHostName()])
if (removedNodes.indexOf(nn) != -1) return;

// let newPiecesToTranslate = getPiecesToTranslate(nn)
let newPiecesToTranslate = getNodesThatNeedToTranslate(nn,tabHostName).reduce((acc, node) => {
let newPiecesToTranslate = getNodesThatNeedToTranslate(nn,ctx).reduce((acc, node) => {
return acc.concat(getPiecesToTranslate(node))
}, [])

Expand Down Expand Up @@ -808,7 +819,7 @@ Promise.all([twpConfig.onReady(), getTabHostName()])
}

// piecesToTranslate = getPiecesToTranslate()
piecesToTranslate = getNodesThatNeedToTranslate(document.body,tabHostName).reduce((acc, node) => {
piecesToTranslate = getNodesThatNeedToTranslate(document.body,ctx).reduce((acc, node) => {
return acc.concat(getPiecesToTranslate(node))
}, [])
attributesToTranslate = getAttributesToTranslate()
Expand Down
2 changes: 1 addition & 1 deletion src/manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"default_locale": "en",
"name": "Immersive Translate - 沉浸式翻译",
"description": "支持整页翻译·双语显示·只翻译正文。布局全面优化,和浏览器阅读模式类似,对Twitter, Reddit等网站做了定制优化",
"version": "0.0.7",
"version": "0.0.9",
"homepage_url": "https://github.com/theowenyoung/Traduzir-paginas-web",

"browser_specific_settings": {
Expand Down

0 comments on commit d4176fb

Please sign in to comment.