Fixing the title and book details from AA (#289)

calibrain · web-flow · commit 8ea2fee0bbdf · 2025-10-04T14:44:10.000-04:00
Should fix #288
diff --git a/book_manager.py b/book_manager.py
@@ -10,7 +10,7 @@
 import downloader
 from logger import setup_logger
 from config import SUPPORTED_FORMATS, BOOK_LANGUAGE, AA_BASE_URL
-from env import AA_DONATOR_KEY, USE_CF_BYPASS, PRIORITIZE_WELIB
+from env import AA_DONATOR_KEY, USE_CF_BYPASS, PRIORITIZE_WELIB, ALLOW_USE_WELIB
 from models import BookInfo, SearchFilters
 logger = setup_logger(__name__)
 
@@ -169,21 +169,6 @@ def _parse_book_info_page(soup: BeautifulSoup, book_id: str) -> BookInfo:
 
     data = soup.find_all("div", {"class": "main-inner"})[0].find_next("div")
     divs = list(data.children)
-    _details = divs[13].text.strip().lower().split(" · ")
-    format = ""
-    size = ""
-    for f in _details:
-        if format == "" and f.strip().lower() in SUPPORTED_FORMATS:
-            format = f.strip().lower()
-        if size == "" and any(u in f.strip().lower() for u in ["mb", "kb", "gb"]):
-            size = f.strip().lower()
-
-    if format == "" or size == "":
-        for f in _details:
-            if f == "" and not " " in f.strip().lower():
-                format = f.strip().lower()
-            if size == "" and "." in f.strip().lower():
-                size = f.strip().lower()
 
     every_url = soup.find_all("a")
     slow_urls_no_waitlist = set()
@@ -237,20 +222,49 @@ def _parse_book_info_page(soup: BeautifulSoup, book_id: str) -> BookInfo:
     # Remove empty urls
     urls = [url for url in urls if url != ""]
 
+    # Filter out divs that are not text
+    original_divs = divs
+    divs = [div.text.strip() for div in divs if div.text.strip() != ""]
+
+    separator_index = 6
+    for i, div in enumerate(divs):
+        if "·" in div.strip():
+            separator_index = i
+            break
+            
+    _details = divs[separator_index].lower().split(" · ")
+    format = ""
+    size = ""
+    for f in _details:
+        if format == "" and f.strip().lower() in SUPPORTED_FORMATS:
+            format = f.strip().lower()
+        if size == "" and any(u in f.strip().lower() for u in ["mb", "kb", "gb"]):
+            size = f.strip().lower()
+
+    if format == "" or size == "":
+        for f in _details:
+            if f == "" and not " " in f.strip().lower():
+                format = f.strip().lower()
+            if size == "" and "." in f.strip().lower():
+                size = f.strip().lower()
+
+    
+    book_title = divs[separator_index-3].strip("🔍")
+
     # Extract basic information
     book_info = BookInfo(
         id=book_id,
         preview=preview,
-        title=divs[7].text.strip(),
-        publisher=divs[11].text.strip(),
-        author=divs[9].text.strip(),
+        title=book_title,
+        publisher=divs[separator_index-1],
+        author=divs[separator_index-2],
         format=format,
         size=size,
         download_urls=urls,
     )
 
     # Extract additional metadata
-    info = _extract_book_metadata(divs[-6])
+    info = _extract_book_metadata(original_divs[-6])
     book_info.info = info
 
     # Set language and year from metadata if available
@@ -262,6 +276,8 @@ def _parse_book_info_page(soup: BeautifulSoup, book_id: str) -> BookInfo:
     return book_info
 
 def _get_download_urls_from_welib(book_id: str) -> set[str]:
+    if ALLOW_USE_WELIB == False:
+        return set()
     """Get download urls from welib.org."""
     url = f"https://welib.org/md5/{book_id}"
     logger.info(f"Getting download urls from welib.org for {book_id}. While this uses the bypasser, it will not start downloading them yet.")
diff --git a/env.py b/env.py
@@ -28,6 +28,7 @@ def string_to_bool(s: str) -> bool:
 DEBUG = string_to_bool(os.getenv("DEBUG", "false"))
 APP_ENV = os.getenv("APP_ENV", "N/A").lower()
 PRIORITIZE_WELIB = string_to_bool(os.getenv("PRIORITIZE_WELIB", "false"))
+ALLOW_USE_WELIB = string_to_bool(os.getenv("ALLOW_USE_WELIB", "true"))
 
 # Version information from Docker build
 BUILD_VERSION = os.getenv("BUILD_VERSION", "N/A")
diff --git a/readme.md b/readme.md
@@ -83,6 +83,7 @@ Note that if using TOR, the TZ will be calculated automatically based on IP.
 | `AA_DONATOR_KEY`       | Optional Donator key for Anna's Archive fast download API | ``                                |
 | `USE_BOOK_TITLE`       | Use book title as filename instead of ID                  | `false`                           |
 | `PRIORITIZE_WELIB`     | When downloading, download from WELIB first instead of AA | `false`                           |
+| `ALLOW_USE_WELIB`       | Allow usage of welib for downloading books if found there | `true`                            |
 
 If you change `BOOK_LANGUAGE`, you can add multiple comma separated languages, such as `en,fr,ru` etc.