Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/clis/1688/item.ts
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ function extractKeywordLine(bodyText: string, keywords: string[]): string | null
}

function extractSalesText(bodyText: string): string | null {
const match = bodyText.match(/(?:全网销量|已售)\s*\d+(?:\.\d+)?\+?[件套个]?/);
const match = bodyText.match(/(?:全网销量|已售)\s*\d+(?:\.\d+)?\+?\s*[件套个单]?/);
return match ? cleanText(match[0]) : null;
}

Expand Down
14 changes: 14 additions & 0 deletions src/clis/1688/search.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,20 @@ describe('1688 search normalization', () => {
expect(result.return_rate_text).toBe('回头率52%');
});

it('does not use hover_price_text as MOQ source', () => {
const result = __test__.normalizeSearchCandidate({
item_url: 'https://detail.1688.com/offer/887904326744.html',
title: 'test',
container_text: 'test ¥56.00',
price_text: '¥ 56 .00',
hover_price_text: '¥56.00 3件起批',
moq_text: null,
}, 'https://s.1688.com/selloffer/offer_search.htm?charset=utf8&keywords=test');
// hover_price_text should not be used for MOQ extraction
expect(result.moq_text).toBeNull();
expect(result.moq_value).toBeNull();
});

it('extracts offer id from mobile detail search links', () => {
const result = __test__.normalizeSearchCandidate({
item_url: 'http://detail.m.1688.com/page/index.html?offerId=910933345396&sortType=&pageId=',
Expand Down
7 changes: 3 additions & 4 deletions src/clis/1688/search.ts
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,6 @@ function normalizeSearchCandidate(
const priceRange = parsePriceText(priceText || containerText);
const moq = parseMoqText(firstNonEmpty([
normalizeInlineText(candidate.moq_text),
normalizeInlineText(extractMoqText(candidate.hover_price_text)),
normalizeInlineText(extractMoqText(containerText)),
]));
const canonicalSellerUrl = canonicalizeSellerUrl(cleanText(candidate.seller_url));
Expand All @@ -111,7 +110,7 @@ function normalizeSearchCandidate(
offer_id: extractOfferId(canonicalItemUrl ?? '') ?? null,
member_id: extractMemberId(canonicalSellerUrl ?? '') ?? null,
shop_id: extractShopId(canonicalSellerUrl ?? '') ?? null,
title: cleanText(candidate.title) || firstLine(containerText) || null,
title: cleanText(candidate.title) || firstWord(containerText) || null,
item_url: canonicalItemUrl,
seller_name: cleanText(candidate.seller_name) || null,
seller_url: canonicalSellerUrl,
Expand Down Expand Up @@ -154,7 +153,7 @@ function extractSalesText(text: string | null | undefined): string {
return match ? cleanText(match[0]) : '';
}

function firstLine(text: string): string {
function firstWord(text: string): string {
return text.split(/\s+/).find(Boolean) ?? '';
}

Expand Down Expand Up @@ -398,6 +397,6 @@ export const __test__ = {
normalizeSearchCandidate,
extractMoqText,
extractSalesText,
firstLine,
firstWord,
buildDedupeKey,
};