Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ git clone git@github.com:jackwener/opencli.git && cd opencli && npm install && n
| **twitter** | `trending` `search` `timeline` `bookmarks` `post` `download` `profile` `article` `like` `likes` `notifications` `reply` `reply-dm` `thread` `follow` `unfollow` `followers` `following` `block` `unblock` `bookmark` `unbookmark` `delete` `hide-reply` `accept` |
| **reddit** | `hot` `frontpage` `popular` `search` `subreddit` `read` `user` `user-posts` `user-comments` `upvote` `upvoted` `save` `saved` `comment` `subscribe` |
| **amazon** | `bestsellers` `search` `product` `offer` `discussion` `movers-shakers` `new-releases` |
| **1688** | `search` `item` `store` |
| **1688** | `search` `item` `assets` `download` `store` |
| **gemini** | `new` `ask` `image` `deep-research` `deep-research-result` |
| **yuanbao** | `new` `ask` |
| **notebooklm** | `status` `list` `open` `current` `get` `history` `summary` `note-list` `notes-get` `source-list` `source-get` `source-fulltext` `source-guide` |
Expand Down Expand Up @@ -191,6 +191,7 @@ OpenCLI supports downloading images, videos, and articles from supported platfor
| **twitter** | Images, Videos | From user media tab or single tweet |
| **douban** | Images | Poster / still image lists |
| **pixiv** | Images | Original-quality illustrations, multi-page |
| **1688** | Images, Videos | Downloads page-visible product media from item pages |
| **zhihu** | Articles (Markdown) | Exports with optional image download |
| **weixin** | Articles (Markdown) | WeChat Official Account articles |

Expand All @@ -200,6 +201,7 @@ For video downloads, install `yt-dlp` first: `brew install yt-dlp`
opencli xiaohongshu download abc123 --output ./xhs
opencli bilibili download BV1xxx --output ./bilibili
opencli twitter download elonmusk --limit 20 --output ./twitter
opencli 1688 download 841141931191 --output ./1688-downloads
```

## Output Formats
Expand Down
6 changes: 5 additions & 1 deletion README.zh-CN.md
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ npx skills add jackwener/opencli --skill opencli-oneshot # 快速命令参
| **facebook** | `feed` `profile` `search` `friends` `groups` `events` `notifications` `memories` `add-friend` `join-group` | 浏览器 |
| **google** | `news` `search` `suggest` `trends` | 公开 |
| **amazon** | `bestsellers` `search` `product` `offer` `discussion` `movers-shakers` `new-releases` | 浏览器 |
| **1688** | `search` `item` `store` | 浏览器 |
| **1688** | `search` `item` `assets` `download` `store` | 浏览器 |
| **gemini** | `new` `ask` `image` `deep-research` `deep-research-result` | 浏览器 |
| **spotify** | `auth` `status` `play` `pause` `next` `prev` `volume` `search` `queue` `shuffle` `repeat` | OAuth API |
| **notebooklm** | `status` `list` `open` `current` `get` `history` `summary` `note-list` `notes-get` `source-list` `source-get` `source-fulltext` `source-guide` | 浏览器 |
Expand Down Expand Up @@ -258,6 +258,7 @@ OpenCLI 支持从各平台下载图片、视频和文章。
| **B站** | 视频 | 需要安装 `yt-dlp` |
| **Twitter/X** | 图片、视频 | 从用户媒体页或单条推文下载 |
| **Pixiv** | 图片 | 下载原始画质插画,支持多页作品 |
| **1688** | 图片、视频 | 下载商品页中可见的商品素材 |
| **知乎** | 文章(Markdown) | 导出文章,可选下载图片到本地 |
| **微信公众号** | 文章(Markdown) | 导出微信公众号文章为 Markdown |
| **豆瓣** | 图片 | 下载电影条目的海报 / 剧照图片 |
Expand Down Expand Up @@ -292,6 +293,9 @@ opencli twitter download --tweet-url "https://x.com/user/status/123" --output ./
# 下载豆瓣电影海报 / 剧照
opencli douban download 30382501 --output ./douban

# 下载 1688 商品页中的图片 / 视频素材
opencli 1688 download 841141931191 --output ./1688-downloads

# 导出知乎文章为 Markdown
opencli zhihu download "https://zhuanlan.zhihu.com/p/xxx" --output ./zhihu

Expand Down
42 changes: 42 additions & 0 deletions clis/1688/assets.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import { describe, expect, it } from 'vitest';
import { __test__ } from './assets.js';
import { __test__ as sharedTest } from './shared.js';

describe('1688 assets normalization', () => {
it('normalizes gallery and scanned assets into grouped media lists', () => {
const result = __test__.normalizeAssets({
href: 'https://detail.1688.com/offer/887904326744.html',
title: '测试商品 - 阿里巴巴',
offerTitle: '测试商品',
offerId: 887904326744,
gallery: {
mainImage: ['//img.example.com/main-1.jpg'],
offerImgList: ['https://img.example.com/main-2.jpg'],
wlImageInfos: [{ fullPathImageURI: 'https://img.example.com/main-3.jpg' }],
},
scannedAssets: [
{ type: 'image', group: 'sku', url: 'https://img.example.com/sku-1.png', source: 'dom:.sku' },
{ type: 'image', group: 'detail', url: 'https://img.example.com/detail-1.jpg', source: 'dom:.detail' },
{ type: 'video', group: 'video', url: 'https://video.example.com/demo.mp4', source: 'script' },
{ type: 'image', group: 'detail', url: 'blob:https://detail.1688.com/1', source: 'ignore' },
],
});

expect(result.offer_id).toBe('887904326744');
expect(result.main_images).toEqual([
'https://img.example.com/main-1.jpg',
'https://img.example.com/main-2.jpg',
'https://img.example.com/main-3.jpg',
]);
expect(result.sku_images).toEqual(['https://img.example.com/sku-1.png']);
expect(result.detail_images).toEqual(['https://img.example.com/detail-1.jpg']);
expect(result.videos).toEqual(['https://video.example.com/demo.mp4']);
expect(result.main_count).toBe(3);
expect(result.video_count).toBe(1);
});

it('normalizes media urls from style syntax and protocol-relative URLs', () => {
expect(sharedTest.normalizeMediaUrl('url("//img.example.com/1.jpg")')).toBe('https://img.example.com/1.jpg');
expect(sharedTest.normalizeMediaUrl('blob:https://detail.1688.com/1')).toBe('');
});
});
257 changes: 257 additions & 0 deletions clis/1688/assets.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,257 @@
import { cli, Strategy } from '@jackwener/opencli/registry';
import type { IPage } from '@jackwener/opencli/types';
import {
assertAuthenticatedState,
buildDetailUrl,
buildProvenance,
cleanText,
extractOfferId,
gotoAndReadState,
type MediaSource,
uniqueMediaSources,
} from './shared.js';

interface AssetBrowserPayload {
href?: string;
title?: string;
offerTitle?: string;
offerId?: string | number;
gallery?: {
mainImage?: string[];
offerImgList?: string[];
wlImageInfos?: Array<{ fullPathImageURI?: string }>;
[key: string]: unknown;
};
scannedAssets?: MediaSource[];
}

export interface Normalized1688Assets {
offer_id: string | null;
title: string | null;
item_url: string;
main_images: string[];
sku_images: string[];
detail_images: string[];
videos: string[];
other_images: string[];
raw_assets: MediaSource[];
source: string[];
main_count: number;
sku_count: number;
detail_count: number;
video_count: number;
source_url: string;
fetched_at: string;
strategy: string;
}

function scriptToReadAssets(): string {
return `
(() => {
const root = window.context ?? {};
const model = root.result?.global?.globalData?.model ?? null;
const gallery = root.result?.data?.gallery?.fields ?? null;
const defaultSrcProps = ['data-lazyload-src', 'data-src', 'data-ks-lazyload', 'currentSrc', 'src'];
const groups = [
{ key: 'main', type: 'image', selectors: ['#dt-tab img', '.detail-gallery-turn img.detail-gallery-img', '.img-list-wrapper img.od-gallery-img', '.od-scroller-item span'] },
{ key: 'video', type: 'video', selectors: ['.lib-video video', 'video[src]', 'video source[src]'] },
{ key: 'sku', type: 'image', selectors: ['.pc-sku-wrapper .prop-item-inner-wrapper', '.sku-item-wrapper', '.specification-cell', '.sku-filter-button', '.expand-view-item', '.feature-item img'], srcProps: ['backgroundImage'] },
{ key: 'detail', type: 'image', selectors: ['.de-description-detail img', '#detailContentContainer img', '.html-description img', '.html-description source', '.desc-lazyload-container img'] },
];
const assets = [];
const seen = new Set();

const normalizeUrl = (value) => {
if (typeof value !== 'string') return '';
let next = value
.replace(/^url\\((.*)\\)$/i, '$1')
.replace(/^['"]|['"]$/g, '')
.replace(/\\\\u002F/g, '/')
.replace(/&amp;/g, '&')
.trim();
if (!next || next.startsWith('blob:') || next.startsWith('data:')) return '';
if (next.startsWith('//')) next = 'https:' + next;
try {
return new URL(next, location.href).toString();
} catch {
return '';
}
};

const push = (type, group, url, source) => {
const normalized = normalizeUrl(url);
if (!normalized) return;
const key = type + ':' + normalized;
if (seen.has(key)) return;
seen.add(key);
assets.push({ type, group, url: normalized, source });
};

const queryAllDeep = (selector) => {
const results = [];
const visitedRoots = new Set();
const walkRoots = (root, fn) => {
if (!root || visitedRoots.has(root)) return;
visitedRoots.add(root);
fn(root);
const childElements = root.querySelectorAll ? Array.from(root.querySelectorAll('*')) : [];
for (const child of childElements) {
if (child && child.shadowRoot) {
walkRoots(child.shadowRoot, fn);
}
}
};
walkRoots(document, (root) => {
if (root.querySelectorAll) {
results.push(...Array.from(root.querySelectorAll(selector)));
}
});
return results;
};

const valuesFromElement = (element, srcProps) => {
const values = [];
const props = srcProps && srcProps.length ? srcProps : defaultSrcProps;
for (const prop of props) {
try {
if (prop === 'backgroundImage') {
const bg = getComputedStyle(element).backgroundImage || '';
const matches = bg.match(/url\\(([^)]+)\\)/g) || [];
for (const match of matches) {
const clean = match.replace(/^url\\(/, '').replace(/\\)$/, '');
values.push(clean);
}
continue;
}

const direct = element[prop];
if (typeof direct === 'string' && direct) values.push(direct);
const attr = element.getAttribute ? element.getAttribute(prop) : '';
if (attr) values.push(attr);
} catch {}
}

if (element.tagName === 'SOURCE' && element.parentElement?.tagName === 'VIDEO') {
values.push(element.src || element.getAttribute('src') || '');
}

if (element.tagName === 'VIDEO') {
values.push(element.currentSrc || '');
values.push(element.src || '');
}

return values;
};

for (const group of groups) {
for (const selector of group.selectors) {
for (const element of queryAllDeep(selector)) {
for (const value of valuesFromElement(element, group.srcProps)) {
push(group.type, group.key, value, 'dom:' + selector);
}
}
}
}

const scriptTexts = Array.from(document.scripts).map((script) => script.textContent || '');
const videoRegex = /https?:\\/\\/[^"'\\s]+\\.(?:mp4|m3u8)(?:\\?[^"'\\s]*)?/gi;
for (const scriptText of scriptTexts) {
const matches = scriptText.match(videoRegex) || [];
for (const match of matches) {
push('video', 'video', match, 'script');
}
}

const toJson = (value) => JSON.parse(JSON.stringify(value ?? null));
return {
href: window.location.href,
title: document.title || '',
offerTitle: model?.offerTitleModel?.subject ?? '',
offerId: model?.tradeModel?.offerId ?? '',
gallery: toJson(gallery),
scannedAssets: assets,
};
})()
`;
}

function normalizeAssets(payload: AssetBrowserPayload): Normalized1688Assets {
const offerId = cleanText(String(payload.offerId ?? '')) || extractOfferId(cleanText(payload.href)) || null;
const itemUrl = offerId ? buildDetailUrl(offerId) : cleanText(payload.href);
const seededAssets: MediaSource[] = [
...((payload.gallery?.mainImage ?? []).map((url) => ({ type: 'image' as const, group: 'main' as const, url, source: 'page_state:mainImage' }))),
...((payload.gallery?.offerImgList ?? []).map((url) => ({ type: 'image' as const, group: 'main' as const, url, source: 'page_state:offerImgList' }))),
...((payload.gallery?.wlImageInfos ?? []).map((item) => ({
type: 'image' as const,
group: 'main' as const,
url: item?.fullPathImageURI ?? '',
source: 'page_state:wlImageInfos',
}))),
];

const assets = uniqueMediaSources([...seededAssets, ...(payload.scannedAssets ?? [])]);

const mainImages = assets.filter((item) => item.type === 'image' && item.group === 'main').map((item) => item.url);
const skuImages = assets.filter((item) => item.type === 'image' && item.group === 'sku').map((item) => item.url);
const detailImages = assets.filter((item) => item.type === 'image' && item.group === 'detail').map((item) => item.url);
const videos = assets.filter((item) => item.type === 'video').map((item) => item.url);
const otherImages = assets
.filter((item) => item.type === 'image' && !['main', 'sku', 'detail'].includes(item.group))
.map((item) => item.url);

return {
offer_id: offerId,
title: cleanText(payload.offerTitle) || cleanText(payload.title) || null,
item_url: itemUrl,
main_images: mainImages,
sku_images: skuImages,
detail_images: detailImages,
videos,
other_images: otherImages,
raw_assets: assets,
source: [...new Set(assets.map((item) => cleanText(item.source)).filter(Boolean))],
main_count: mainImages.length,
sku_count: skuImages.length,
detail_count: detailImages.length,
video_count: videos.length,
...buildProvenance(cleanText(payload.href) || itemUrl),
};
}

async function readAssetsPayload(page: IPage, itemUrl: string): Promise<AssetBrowserPayload> {
const state = await gotoAndReadState(page, itemUrl, 2500, 'assets');
assertAuthenticatedState(state, 'assets');
await page.autoScroll({ times: 3, delayMs: 400 });
await page.wait(1);
return await page.evaluate(scriptToReadAssets()) as AssetBrowserPayload;
}

export async function extractAssetsForInput(page: IPage, input: string): Promise<Normalized1688Assets> {
const itemUrl = buildDetailUrl(String(input ?? ''));
const payload = await readAssetsPayload(page, itemUrl);
return normalizeAssets(payload);
}

cli({
site: '1688',
name: 'assets',
description: '列出 1688 商品页可提取的图片/视频素材',
domain: 'www.1688.com',
strategy: Strategy.COOKIE,
args: [
{
name: 'input',
required: true,
positional: true,
help: '1688 商品 URL 或 offer ID(如 887904326744)',
},
],
columns: ['offer_id', 'title', 'main_count', 'sku_count', 'detail_count', 'video_count'],
func: async (page, kwargs) => {
return [await extractAssetsForInput(page, String(kwargs.input ?? ''))];
},
});

export const __test__ = {
normalizeAssets,
};
33 changes: 33 additions & 0 deletions clis/1688/download.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import { describe, expect, it } from 'vitest';
import { __test__ } from './download.js';

describe('1688 download helpers', () => {
it('builds stable filenames for grouped assets', () => {
const items = __test__.toDownloadItems('887904326744', {
offer_id: '887904326744',
title: '测试商品',
item_url: 'https://detail.1688.com/offer/887904326744.html',
main_images: ['https://img.example.com/a.jpg'],
sku_images: ['https://img.example.com/b.png'],
detail_images: ['https://img.example.com/c.webp'],
videos: ['https://video.example.com/d.mp4'],
other_images: [],
raw_assets: [],
source: [],
main_count: 1,
sku_count: 1,
detail_count: 1,
video_count: 1,
source_url: 'https://detail.1688.com/offer/887904326744.html',
fetched_at: new Date().toISOString(),
strategy: 'cookie',
});

expect(items.map((item) => item.filename)).toEqual([
'887904326744_main_01.jpg',
'887904326744_sku_01.png',
'887904326744_detail_01.webp',
'887904326744_video_01.mp4',
]);
});
});
Loading