1010import downloader
1111from logger import setup_logger
1212from config import SUPPORTED_FORMATS , BOOK_LANGUAGE , AA_BASE_URL
13- from env import AA_DONATOR_KEY , USE_CF_BYPASS , PRIORITIZE_WELIB
13+ from env import AA_DONATOR_KEY , USE_CF_BYPASS , PRIORITIZE_WELIB , ALLOW_USE_WELIB
1414from models import BookInfo , SearchFilters
1515logger = setup_logger (__name__ )
1616
@@ -169,21 +169,6 @@ def _parse_book_info_page(soup: BeautifulSoup, book_id: str) -> BookInfo:
169169
170170 data = soup .find_all ("div" , {"class" : "main-inner" })[0 ].find_next ("div" )
171171 divs = list (data .children )
172- _details = divs [13 ].text .strip ().lower ().split (" · " )
173- format = ""
174- size = ""
175- for f in _details :
176- if format == "" and f .strip ().lower () in SUPPORTED_FORMATS :
177- format = f .strip ().lower ()
178- if size == "" and any (u in f .strip ().lower () for u in ["mb" , "kb" , "gb" ]):
179- size = f .strip ().lower ()
180-
181- if format == "" or size == "" :
182- for f in _details :
183- if f == "" and not " " in f .strip ().lower ():
184- format = f .strip ().lower ()
185- if size == "" and "." in f .strip ().lower ():
186- size = f .strip ().lower ()
187172
188173 every_url = soup .find_all ("a" )
189174 slow_urls_no_waitlist = set ()
@@ -237,20 +222,49 @@ def _parse_book_info_page(soup: BeautifulSoup, book_id: str) -> BookInfo:
237222 # Remove empty urls
238223 urls = [url for url in urls if url != "" ]
239224
225+ # Filter out divs that are not text
226+ original_divs = divs
227+ divs = [div .text .strip () for div in divs if div .text .strip () != "" ]
228+
229+ separator_index = 6
230+ for i , div in enumerate (divs ):
231+ if "·" in div .strip ():
232+ separator_index = i
233+ break
234+
235+ _details = divs [separator_index ].lower ().split (" · " )
236+ format = ""
237+ size = ""
238+ for f in _details :
239+ if format == "" and f .strip ().lower () in SUPPORTED_FORMATS :
240+ format = f .strip ().lower ()
241+ if size == "" and any (u in f .strip ().lower () for u in ["mb" , "kb" , "gb" ]):
242+ size = f .strip ().lower ()
243+
244+ if format == "" or size == "" :
245+ for f in _details :
246+ if f == "" and not " " in f .strip ().lower ():
247+ format = f .strip ().lower ()
248+ if size == "" and "." in f .strip ().lower ():
249+ size = f .strip ().lower ()
250+
251+
252+ book_title = divs [separator_index - 3 ].strip ("🔍" )
253+
240254 # Extract basic information
241255 book_info = BookInfo (
242256 id = book_id ,
243257 preview = preview ,
244- title = divs [ 7 ]. text . strip () ,
245- publisher = divs [11 ]. text . strip () ,
246- author = divs [9 ]. text . strip () ,
258+ title = book_title ,
259+ publisher = divs [separator_index - 1 ] ,
260+ author = divs [separator_index - 2 ] ,
247261 format = format ,
248262 size = size ,
249263 download_urls = urls ,
250264 )
251265
252266 # Extract additional metadata
253- info = _extract_book_metadata (divs [- 6 ])
267+ info = _extract_book_metadata (original_divs [- 6 ])
254268 book_info .info = info
255269
256270 # Set language and year from metadata if available
@@ -262,6 +276,8 @@ def _parse_book_info_page(soup: BeautifulSoup, book_id: str) -> BookInfo:
262276 return book_info
263277
264278def _get_download_urls_from_welib (book_id : str ) -> set [str ]:
279+ if ALLOW_USE_WELIB == False :
280+ return set ()
265281 """Get download urls from welib.org."""
266282 url = f"https://welib.org/md5/{ book_id } "
267283 logger .info (f"Getting download urls from welib.org for { book_id } . While this uses the bypasser, it will not start downloading them yet." )
0 commit comments