@@ -82,20 +82,53 @@ def get_all_courses(self):
8282 Returns:
8383 list: Course data with prerequisites, descriptions, and instructors
8484 """
85- self ._ensure_initialized () # Make sure crawler is initialized
86-
87- # Get data from course selection APIs
88- courses_data = self ._get_lesson_tasks ()
89- course_details = self ._get_course_catalog ()
90- prerequisites = self ._get_prerequisites ()
85+ # Ask user which data sources to use
86+ use_coursesel = self ._ask_user_choice (
87+ "Crawl course selection system data? (y/n): " , default = "y"
88+ )
89+ use_official = self ._ask_user_choice (
90+ "Crawl official website data? (y/n): " , default = "y"
91+ )
9192
92- # Get official website data for enhanced descriptions
93- official_data = self ._get_official_website_data ()
93+ courses_data = []
94+ course_details = {}
95+ prerequisites = {}
96+ official_data = {}
97+
98+ if use_coursesel :
99+ self ._ensure_initialized () # Make sure crawler is initialized
100+ print ("Crawling course selection system data..." )
101+ # Get data from course selection APIs
102+ courses_data = self ._get_lesson_tasks ()
103+ course_details = self ._get_course_catalog ()
104+ prerequisites = self ._get_prerequisites ()
105+ else :
106+ print ("Skipping course selection system data" )
107+
108+ if use_official :
109+ print ("Crawling official website data..." )
110+ # Get official website data for enhanced descriptions
111+ official_data = self ._get_official_website_data ()
112+ else :
113+ print ("Skipping official website data" )
94114
95115 return self ._integrate_course_data (
96116 courses_data , course_details , prerequisites , official_data
97117 )
98118
119+ def _ask_user_choice (self , prompt , default = "y" ):
120+ """Ask user for yes/no choice with default value"""
121+ while True :
122+ response = input (prompt ).strip ().lower ()
123+ if not response :
124+ response = default .lower ()
125+ if response in ["y" , "yes" , "true" ]:
126+ return True
127+ elif response in ["n" , "no" , "false" ]:
128+ return False
129+ else :
130+ print ("Please enter y/yes or n/no" )
131+
99132 def _get_current_elect_turn_id (self ):
100133 """Get current election turn ID dynamically"""
101134 url = f"{ BASE_URL } /tpm/findStudentElectTurns_ElectTurn.action"
@@ -330,40 +363,62 @@ def _integrate_course_data(
330363 f"Starting integration with { len (courses_data )} courses, { len (prerequisites )} prereq groups, { len (official_data )} official records"
331364 )
332365
333- courses_by_code = defaultdict (list )
334- for course in courses_data :
335- course_code = course .get ("courseCode" )
336- if course_code :
337- courses_by_code [course_code ].append (course )
338-
339366 integrated_courses = []
340367 courses_with_prereqs = 0
341368
342- for course_code , course_list in courses_by_code .items ():
343- merged = self ._merge_course_sections (course_list )
344- if not merged :
345- continue
346-
347- course_id = merged .get ("courseId" )
348- catalog_info = course_details .get (course_id , {})
349- prereq_info = prerequisites .get (course_id , [])
350- official_info = official_data .get (course_code , {})
369+ # If we have course selection data, process it
370+ if courses_data :
371+ courses_by_code = defaultdict (list )
372+ for course in courses_data :
373+ course_code = course .get ("courseCode" )
374+ if course_code :
375+ courses_by_code [course_code ].append (course )
376+
377+ for course_code , course_list in courses_by_code .items ():
378+ merged = self ._merge_course_sections (course_list )
379+ if not merged :
380+ continue
381+
382+ course_id = merged .get ("courseId" )
383+ catalog_info = course_details .get (course_id , {})
384+ prereq_info = prerequisites .get (course_id , [])
385+ official_info = official_data .get (course_code , {})
386+
387+ if prereq_info :
388+ courses_with_prereqs += 1
389+ logger .debug (
390+ f"Course { course_code } (ID: { course_id } ) has { len (prereq_info )} prereqs"
391+ )
351392
352- if prereq_info :
353- courses_with_prereqs += 1
354- logger .debug (
355- f"Course { course_code } (ID: { course_id } ) has { len (prereq_info )} prereqs"
393+ course_data = self ._build_course_record (
394+ course_code , merged , catalog_info , prereq_info , official_info
356395 )
357396
358- course_data = self ._build_course_record (
359- course_code , merged , catalog_info , prereq_info , official_info
360- )
397+ if course_data :
398+ integrated_courses .append (course_data )
399+
400+ # If we only have official data (no course selection data), create courses from official data
401+ elif official_data :
402+ logger .info ("Creating courses from official website data only" )
403+ for course_code , official_info in official_data .items ():
404+ # Create empty main_data for courses that only exist in official website
405+ empty_main_data = {}
406+ empty_catalog_data = {}
407+ empty_prereq_data = []
408+
409+ course_data = self ._build_course_record (
410+ course_code ,
411+ empty_main_data ,
412+ empty_catalog_data ,
413+ empty_prereq_data ,
414+ official_info ,
415+ )
361416
362- if course_data :
363- integrated_courses .append (course_data )
417+ if course_data :
418+ integrated_courses .append (course_data )
364419
365420 logger .info (
366- f"Integration complete: { courses_with_prereqs } courses have prerequisites"
421+ f"Integration complete: { courses_with_prereqs } courses have prerequisites, { len ( integrated_courses ) } total courses "
367422 )
368423 return integrated_courses
369424
@@ -428,6 +483,10 @@ def _extract_course_title(self, main_data, catalog_data, official_data=None):
428483 """Extract course title (prefer English name)"""
429484 if official_data is None :
430485 official_data = {}
486+ if main_data is None :
487+ main_data = {}
488+ if catalog_data is None :
489+ catalog_data = {}
431490
432491 return (
433492 official_data .get ("course_title" , "" )
@@ -442,8 +501,8 @@ def _parse_course_code(self, course_code):
442501 number = 0
443502
444503 if course_code :
445- # Match DEPT#### J? (J is optional)
446- match = re .match (r"^([A-Z]{2,4})(\d{4})J?$" , course_code )
504+ # Match DEPT###(#)? J? (3 or 4 digits, J is optional)
505+ match = re .match (r"^([A-Z]{2,4})(\d{3, 4})J?$" , course_code )
447506 if match :
448507 department = match .group (1 )
449508 number = int (match .group (2 ))
@@ -452,6 +511,11 @@ def _parse_course_code(self, course_code):
452511
453512 def _extract_course_credits (self , main_data , catalog_data ):
454513 """Extract course credits"""
514+ if main_data is None :
515+ main_data = {}
516+ if catalog_data is None :
517+ catalog_data = {}
518+
455519 course_credits = main_data .get ("totalCredit" , 0 ) or catalog_data .get (
456520 "credit" , 0
457521 )
@@ -520,6 +584,11 @@ def _extract_description(self, official_data=None):
520584
521585 def _extract_instructors (self , main_data , catalog_data ):
522586 """Extract and merge instructor information"""
587+ if main_data is None :
588+ main_data = {}
589+ if catalog_data is None :
590+ catalog_data = {}
591+
523592 instructors = main_data .get ("all_instructors" , [])
524593 teacher_name = catalog_data .get ("teacherName" , "" )
525594
@@ -532,6 +601,8 @@ def _extract_instructors(self, main_data, catalog_data):
532601
533602 def _build_course_url (self , main_data ):
534603 """Build course detail page URL"""
604+ if main_data is None :
605+ main_data = {}
535606 course_id = main_data .get ("courseId" )
536607 return f"{ COURSE_DETAIL_URL_PREFIX } { course_id } " if course_id else ""
537608
0 commit comments