Skip to content

Commit a3a3146

Browse files
authored
🐜 Fix syncing Courses with duplicate Number + Title (#79)
A bug was recently discovered where course data was missing for courses that had identical titles and numbers, but different subjects: #76 As an example instance, in Spring 2025, MyPurdue lists `MA 41600` and `STAT 41600`, both titled "Probability," however Purdue.io only shows `MA 41600` with every CRN from both of the original courses grouped under it. The root cause is that CatalogSync caches courses indexed only by their number and title, not by their subject. This causes any courses with an identical number and title to be grouped together, even those from different subjects. This change modifies the index to key off of subject code in addition to number and title, resolving the initial bug. It also includes the logic needed to "move" classes that were erroneously synced with the existing logic to the correct place.
1 parent cc07773 commit a3a3146

File tree

1 file changed

+18
-3
lines changed

1 file changed

+18
-3
lines changed

src/CatalogSync/FastSync.cs

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -87,8 +87,9 @@ public static async Task SynchronizeAsync(IScraper scraper, ApplicationDbContext
8787
private Dictionary<string, DatabaseCampus> dbCachedCampuses =
8888
new Dictionary<string, DatabaseCampus>();
8989

90-
private Dictionary<(string number, string title), DatabaseCourse> dbCachedCourses =
91-
new Dictionary<(string number, string title), DatabaseCourse>();
90+
private Dictionary<(string subjectCode, string number, string title), DatabaseCourse>
91+
dbCachedCourses =
92+
new Dictionary<(string subjectCode, string number, string title), DatabaseCourse>();
9293

9394
private Dictionary<(Guid campusId, string buildingCode), DatabaseBuilding> dbCachedBuildings
9495
= new Dictionary<(Guid campusId, string buildingCode), DatabaseBuilding>();
@@ -374,6 +375,7 @@ private void InternalSynchronizeClass(DatabaseTerm term, DatabaseSubject subject
374375
.ToList();
375376
if (dbSections.Count == 0)
376377
{
378+
// The sections don't exist, so the class doesn't either.
377379
var newClass = new DatabaseClass()
378380
{
379381
Id = Guid.NewGuid(),
@@ -386,7 +388,19 @@ private void InternalSynchronizeClass(DatabaseTerm term, DatabaseSubject subject
386388
}
387389
else
388390
{
391+
// At least one of the sections exists in the database.
389392
classId = dbSections.First().ClassId;
393+
394+
// Detect if the section's Class has moved to a different Course
395+
// (handle the rare case where a CRN gets moved to a different Course)
396+
var dbSectionGroupClass = dbContext.Classes.Single(c => c.Id == classId);
397+
if (dbSectionGroupClass.CourseId != course.Id)
398+
{
399+
dbSectionGroupClass.CourseId = course.Id;
400+
dbContext.Entry(dbSectionGroupClass)
401+
.Property(s => s.CourseId).CurrentValue = course.Id;
402+
dbContext.Entry(dbSectionGroupClass).State = EntityState.Modified;
403+
}
390404
}
391405

392406
// Hydrate each section
@@ -438,7 +452,8 @@ private DatabaseCourse FetchOrAddCourse(DatabaseSubject subject, string courseNu
438452
string courseTitle, double creditHours, string courseDescription)
439453
{
440454
DatabaseCourse course;
441-
var courseKey = (number: courseNumber, title: courseTitle);
455+
var courseKey = (subjectCode: subject.Abbreviation,
456+
number: courseNumber, title: courseTitle);
442457
if (dbCachedCourses.ContainsKey(courseKey))
443458
{
444459
course = dbCachedCourses[courseKey];

0 commit comments

Comments
 (0)