Skip to content

Commit c262dc7

Browse files
authored
1 parent 951321f commit c262dc7

File tree

3 files changed

+251
-24
lines changed

3 files changed

+251
-24
lines changed

src/backlinks/handler.js

Lines changed: 70 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import calculateKpiMetrics from './kpi-metrics.js';
1818
import { convertToOpportunity } from '../common/opportunity.js';
1919
import { createOpportunityData } from './opportunity-data-mapper.js';
2020
import { syncSuggestions } from '../utils/data-access.js';
21+
import { filterByAuditScope, extractPathPrefix } from '../internal-links/subpath-filter.js';
2122

2223
const { AUDIT_STEP_DESTINATIONS } = Audit;
2324

@@ -108,16 +109,23 @@ export async function runAuditAndImportTopPages(context) {
108109

109110
export async function submitForScraping(context) {
110111
const {
111-
site, dataAccess, audit,
112+
site, dataAccess, audit, log,
112113
} = context;
113114
const { SiteTopPage } = dataAccess;
114115
const auditResult = audit.getAuditResult();
115116
if (auditResult.success === false) {
116117
throw new Error('Audit failed, skipping scraping and suggestions generation');
117118
}
118119
const topPages = await SiteTopPage.allBySiteIdAndSourceAndGeo(site.getId(), 'ahrefs', 'global');
120+
121+
// Filter top pages by audit scope (subpath/locale) if baseURL has a subpath
122+
const baseURL = site.getBaseURL();
123+
const filteredTopPages = filterByAuditScope(topPages, baseURL, { urlProperty: 'getUrl' }, log);
124+
125+
log.info(`Found ${topPages.length} top pages, ${filteredTopPages.length} within audit scope`);
126+
119127
return {
120-
urls: topPages.map((topPage) => ({ url: topPage.getUrl() })),
128+
urls: filteredTopPages.map((topPage) => ({ url: topPage.getUrl() })),
121129
siteId: site.getId(),
122130
type: 'broken-backlinks',
123131
};
@@ -183,24 +191,78 @@ export const generateSuggestionData = async (context) => {
183191
opportunity.getId(),
184192
SuggestionModel.STATUSES.NEW,
185193
);
194+
195+
// Build broken links array
196+
const brokenLinks = suggestions
197+
.map((suggestion) => ({
198+
urlFrom: suggestion?.getData()?.url_from,
199+
urlTo: suggestion?.getData()?.url_to,
200+
suggestionId: suggestion?.getId(),
201+
}))
202+
.filter((link) => link.urlFrom && link.urlTo && link.suggestionId); // Filter invalid entries
203+
204+
// Get top pages and filter by audit scope
186205
const topPages = await SiteTopPage.allBySiteIdAndSourceAndGeo(site.getId(), 'ahrefs', 'global');
206+
const baseURL = site.getBaseURL();
207+
const filteredTopPages = filterByAuditScope(topPages, baseURL, { urlProperty: 'getUrl' }, log);
208+
209+
// Filter alternatives by locales/subpaths present in broken links
210+
// This limits suggestions to relevant locales only
211+
const allTopPageUrls = filteredTopPages.map((page) => page.getUrl());
212+
213+
// Extract unique locales/subpaths from broken links
214+
const brokenLinkLocales = new Set();
215+
brokenLinks.forEach((link) => {
216+
const locale = extractPathPrefix(link.urlTo);
217+
if (locale) {
218+
brokenLinkLocales.add(locale);
219+
}
220+
});
221+
222+
// Filter alternatives to only include URLs matching broken links' locales
223+
// If no locales found (no subpath), include all alternatives
224+
// Always ensure alternativeUrls is an array (even if empty)
225+
let alternativeUrls = [];
226+
if (brokenLinkLocales.size > 0) {
227+
alternativeUrls = allTopPageUrls.filter((url) => {
228+
const urlLocale = extractPathPrefix(url);
229+
// Include if URL matches one of the broken links' locales, or has no locale
230+
return !urlLocale || brokenLinkLocales.has(urlLocale);
231+
});
232+
} else {
233+
// No locale prefixes found, include all alternatives
234+
alternativeUrls = allTopPageUrls;
235+
}
236+
237+
// Validate before sending to Mystique
238+
if (brokenLinks.length === 0) {
239+
log.warn('No valid broken links to send to Mystique. Skipping message.');
240+
return {
241+
status: 'complete',
242+
};
243+
}
244+
245+
if (alternativeUrls.length === 0) {
246+
log.warn('No alternative URLs available. Cannot generate suggestions. Skipping message to Mystique.');
247+
return {
248+
status: 'complete',
249+
};
250+
}
251+
187252
const message = {
188253
type: 'guidance:broken-links',
189254
siteId: site.getId(),
190255
auditId: audit.getId(),
191256
deliveryType: site.getDeliveryType(),
192257
time: new Date().toISOString(),
193258
data: {
194-
alternativeUrls: topPages.map((page) => page.getUrl()),
259+
alternativeUrls,
195260
opportunityId: opportunity?.getId(),
196-
brokenLinks: suggestions.map((suggestion) => ({
197-
urlFrom: suggestion?.getData()?.url_from,
198-
urlTo: suggestion?.getData()?.url_to,
199-
suggestionId: suggestion?.getId(),
200-
})),
261+
brokenLinks,
201262
},
202263
};
203264
await sqs.sendMessage(env.QUEUE_SPACECAT_TO_MYSTIQUE, message);
265+
log.debug(`Message sent to Mystique: ${JSON.stringify(message)}`);
204266
return {
205267
status: 'complete',
206268
};

src/internal-links/handler.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -266,7 +266,7 @@ export const opportunityAndSuggestionsStep = async (context) => {
266266
// Extract unique locales/subpaths from broken links
267267
const brokenLinkLocales = new Set();
268268
brokenLinks.forEach((link) => {
269-
const locale = extractPathPrefix(link.urlTo) || extractPathPrefix(link.urlFrom);
269+
const locale = extractPathPrefix(link.urlTo);
270270
if (locale) {
271271
brokenLinkLocales.add(locale);
272272
}

test/audits/backlinks.test.js

Lines changed: 180 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,10 @@ describe('Backlinks Tests', function () {
5252
{ getUrl: () => 'https://example.com/blog/page1' },
5353
{ getUrl: () => 'https://example.com/blog/page2' },
5454
];
55+
const topPagesNoPrefix = [
56+
{ getUrl: () => 'https://example.com/page1' },
57+
{ getUrl: () => 'https://example.com/page2' },
58+
];
5559
const auditUrl = 'https://audit.url';
5660
const audit = {
5761
getId: () => auditDataMock.id,
@@ -173,11 +177,37 @@ describe('Backlinks Tests', function () {
173177

174178
const result = await submitForScraping(context);
175179

180+
// filterByAuditScope returns all items when baseURL has no subpath
176181
expect(result).to.deep.equal({
177182
siteId: contextSite.getId(),
178183
type: 'broken-backlinks',
179184
urls: topPages.map((topPage) => ({ url: topPage.getUrl() })),
180185
});
186+
expect(context.log.info).to.have.been.calledWith(sinon.match(/Found.*top pages.*within audit scope/));
187+
});
188+
189+
it('should filter top pages by audit scope when baseURL has subpath', async () => {
190+
context.audit.getAuditResult.returns({ success: true });
191+
const siteWithSubpath = {
192+
...contextSite,
193+
getBaseURL: () => 'https://example.com/uk',
194+
};
195+
context.site = siteWithSubpath;
196+
197+
const topPagesWithSubpaths = [
198+
{ getUrl: () => 'https://example.com/uk/page1' },
199+
{ getUrl: () => 'https://example.com/uk/page2' },
200+
{ getUrl: () => 'https://example.com/fr/page1' }, // Should be filtered out
201+
];
202+
context.dataAccess.SiteTopPage.allBySiteIdAndSourceAndGeo.resolves(topPagesWithSubpaths);
203+
204+
const result = await submitForScraping(context);
205+
206+
// Should only include URLs within /uk subpath
207+
expect(result.urls).to.have.length(2);
208+
expect(result.urls.map((u) => u.url)).to.include('https://example.com/uk/page1');
209+
expect(result.urls.map((u) => u.url)).to.include('https://example.com/uk/page2');
210+
expect(result.urls.map((u) => u.url)).to.not.include('https://example.com/fr/page1');
181211
});
182212

183213
it('should not submit urls for scraping step when audit was not successful', async () => {
@@ -333,26 +363,161 @@ describe('Backlinks Tests', function () {
333363
brokenBacklinksOpportunity.getSuggestions.returns([]);
334364
brokenBacklinksOpportunity.addSuggestions.returns(brokenBacklinksSuggestions);
335365

366+
// Mock calculateKpiMetrics S3 calls (needed for the function to complete)
367+
context.s3Client.send.onCall(0).resolves(null); // No RUM traffic data
368+
context.s3Client.send.onCall(1).resolves(null); // No organic traffic data
369+
370+
// Mock suggestions with broken link that has root-level URL (no path prefix)
371+
// This ensures alternatives with any prefix or no prefix will be included
372+
// IMPORTANT: Match the exact structure from the original test that works
373+
const suggestionsWithRootUrl = [
374+
{
375+
opportunityId: 'test-opportunity-id',
376+
getId: () => 'test-suggestion-1',
377+
type: 'REDIRECT_UPDATE',
378+
rank: 550000,
379+
getData: () => ({
380+
url_from: 'https://from.com/from-2',
381+
url_to: 'https://example.com', // Root-level URL - extractPathPrefix returns ''
382+
}),
383+
},
384+
];
385+
// Create new stub like internal links test does - MUST be set before generateSuggestionData is called
386+
// The stub needs to accept opportunityId and status as parameters
387+
context.dataAccess.Suggestion.allByOpportunityIdAndStatus = sandbox.stub()
388+
.withArgs('opportunity-id', sinon.match.any)
389+
.resolves(suggestionsWithRootUrl);
390+
391+
// Use top pages with any prefix - since broken link has no prefix, all will be included
392+
context.dataAccess.SiteTopPage.allBySiteIdAndSourceAndGeo = sandbox.stub()
393+
.resolves(topPages);
394+
336395
const result = await generateSuggestionData(context);
337396

338-
// 4x for headers + 4x for each page
339397
expect(result.status).to.deep.equal('complete');
340-
expect(context.sqs.sendMessage).to.have.been.calledWithMatch('test-queue', {
341-
type: 'guidance:broken-links',
342-
siteId: 'site-id',
343-
auditId: 'audit-id',
344-
deliveryType: 'aem_cs',
345-
time: sinon.match.any,
346-
data: {
347-
opportunityId: 'opportunity-id',
348-
alternativeUrls: topPages.map((page) => page.getUrl()),
349-
brokenLinks: [{
350-
urlFrom: 'https://from.com/from-2',
351-
urlTo: 'https://foo.com/redirects-throws-error',
352-
suggestionId: 'test-suggestion-1',
353-
}],
398+
399+
// Verify no warnings were called (meaning both brokenLinks and alternativeUrls have items)
400+
expect(context.log.warn).to.not.have.been.calledWith('No valid broken links to send to Mystique. Skipping message.');
401+
expect(context.log.warn).to.not.have.been.calledWith('No alternative URLs available. Cannot generate suggestions. Skipping message to Mystique.');
402+
403+
// Verify message was sent with correct structure
404+
expect(context.sqs.sendMessage).to.have.been.calledOnce;
405+
const sentMessage = context.sqs.sendMessage.getCall(0).args[1];
406+
expect(sentMessage.type).to.equal('guidance:broken-links');
407+
expect(sentMessage.siteId).to.equal('site-id');
408+
expect(sentMessage.auditId).to.equal('audit-id');
409+
expect(sentMessage.deliveryType).to.equal('aem_cs');
410+
expect(sentMessage.data.opportunityId).to.equal('opportunity-id');
411+
expect(sentMessage.data.alternativeUrls).to.deep.equal(topPages.map((page) => page.getUrl()));
412+
expect(sentMessage.data.brokenLinks).to.be.an('array');
413+
expect(sentMessage.data.brokenLinks.length).to.equal(1);
414+
expect(sentMessage.data.brokenLinks[0]).to.deep.include({
415+
urlFrom: 'https://from.com/from-2',
416+
urlTo: 'https://example.com',
417+
suggestionId: 'test-suggestion-1',
418+
});
419+
420+
expect(context.log.debug).to.have.been.calledWith(sinon.match(/Message sent to Mystique/));
421+
});
422+
423+
it('should filter alternative URLs by locale when broken links have locales', async () => {
424+
configuration.isHandlerEnabledForSite.returns(true);
425+
context.audit.getAuditResult.returns({
426+
success: true,
427+
brokenBacklinks: auditDataMock.auditResult.brokenBacklinks,
428+
});
429+
brokenBacklinksOpportunity.getSuggestions.returns([]);
430+
brokenBacklinksOpportunity.addSuggestions.returns(brokenBacklinksSuggestions);
431+
432+
// Mock suggestions with locale-specific broken links
433+
const suggestionsWithLocale = [
434+
{
435+
getId: () => 'test-suggestion-1',
436+
getData: () => ({
437+
url_from: 'https://from.com/from-1',
438+
url_to: 'https://example.com/uk/en/old-page',
439+
}),
354440
},
441+
];
442+
context.dataAccess.Suggestion.allByOpportunityIdAndStatus.resolves(suggestionsWithLocale);
443+
444+
// Mock top pages with different locales
445+
const topPagesWithLocales = [
446+
{ getUrl: () => 'https://example.com/uk/en/page1' },
447+
{ getUrl: () => 'https://example.com/uk/en/page2' },
448+
{ getUrl: () => 'https://example.com/fr/page1' }, // Should be filtered out
449+
{ getUrl: () => 'https://example.com/de/page1' }, // Should be filtered out
450+
];
451+
context.dataAccess.SiteTopPage.allBySiteIdAndSourceAndGeo.resolves(topPagesWithLocales);
452+
453+
const result = await generateSuggestionData(context);
454+
455+
expect(result.status).to.deep.equal('complete');
456+
const sentMessage = context.sqs.sendMessage.getCall(0).args[1];
457+
expect(sentMessage.data.alternativeUrls).to.have.length(2);
458+
expect(sentMessage.data.alternativeUrls).to.include('https://example.com/uk/en/page1');
459+
expect(sentMessage.data.alternativeUrls).to.include('https://example.com/uk/en/page2');
460+
expect(sentMessage.data.alternativeUrls).to.not.include('https://example.com/fr/page1');
461+
expect(sentMessage.data.alternativeUrls).to.not.include('https://example.com/de/page1');
462+
});
463+
464+
it('should skip sending message when no valid broken links', async () => {
465+
configuration.isHandlerEnabledForSite.returns(true);
466+
context.audit.getAuditResult.returns({
467+
success: true,
468+
brokenBacklinks: auditDataMock.auditResult.brokenBacklinks,
469+
});
470+
brokenBacklinksOpportunity.getSuggestions.returns([]);
471+
brokenBacklinksOpportunity.addSuggestions.returns(brokenBacklinksSuggestions);
472+
473+
// Mock suggestions with invalid data (missing fields)
474+
const invalidSuggestions = [
475+
{
476+
getId: () => 'test-suggestion-1',
477+
getData: () => ({
478+
url_from: '', // Invalid - empty
479+
url_to: 'https://example.com/page',
480+
}),
481+
},
482+
];
483+
context.dataAccess.Suggestion.allByOpportunityIdAndStatus.resolves(invalidSuggestions);
484+
485+
const result = await generateSuggestionData(context);
486+
487+
expect(result.status).to.deep.equal('complete');
488+
expect(context.sqs.sendMessage).to.not.have.been.called;
489+
expect(context.log.warn).to.have.been.calledWith('No valid broken links to send to Mystique. Skipping message.');
490+
});
491+
492+
it('should skip sending message when no alternative URLs available', async () => {
493+
configuration.isHandlerEnabledForSite.returns(true);
494+
context.audit.getAuditResult.returns({
495+
success: true,
496+
brokenBacklinks: auditDataMock.auditResult.brokenBacklinks,
355497
});
498+
brokenBacklinksOpportunity.getSuggestions.returns([]);
499+
brokenBacklinksOpportunity.addSuggestions.returns(brokenBacklinksSuggestions);
500+
501+
// Mock suggestions
502+
const validSuggestions = [
503+
{
504+
getId: () => 'test-suggestion-1',
505+
getData: () => ({
506+
url_from: 'https://from.com/from-1',
507+
url_to: 'https://example.com/uk/en/old-page',
508+
}),
509+
},
510+
];
511+
context.dataAccess.Suggestion.allByOpportunityIdAndStatus.resolves(validSuggestions);
512+
513+
// Mock empty top pages (after filtering)
514+
context.dataAccess.SiteTopPage.allBySiteIdAndSourceAndGeo.resolves([]);
515+
516+
const result = await generateSuggestionData(context);
517+
518+
expect(result.status).to.deep.equal('complete');
519+
expect(context.sqs.sendMessage).to.not.have.been.called;
520+
expect(context.log.warn).to.have.been.calledWith('No alternative URLs available. Cannot generate suggestions. Skipping message to Mystique.');
356521
});
357522
});
358523

0 commit comments

Comments
 (0)