Skip to content

Commit 8ea2fee

Browse files
authored
Fixing the title and book details from AA (#289)
Should fix #288
1 parent 1c24312 commit 8ea2fee

File tree

3 files changed

+38
-20
lines changed

3 files changed

+38
-20
lines changed

book_manager.py

Lines changed: 36 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
import downloader
1111
from logger import setup_logger
1212
from config import SUPPORTED_FORMATS, BOOK_LANGUAGE, AA_BASE_URL
13-
from env import AA_DONATOR_KEY, USE_CF_BYPASS, PRIORITIZE_WELIB
13+
from env import AA_DONATOR_KEY, USE_CF_BYPASS, PRIORITIZE_WELIB, ALLOW_USE_WELIB
1414
from models import BookInfo, SearchFilters
1515
logger = setup_logger(__name__)
1616

@@ -169,21 +169,6 @@ def _parse_book_info_page(soup: BeautifulSoup, book_id: str) -> BookInfo:
169169

170170
data = soup.find_all("div", {"class": "main-inner"})[0].find_next("div")
171171
divs = list(data.children)
172-
_details = divs[13].text.strip().lower().split(" · ")
173-
format = ""
174-
size = ""
175-
for f in _details:
176-
if format == "" and f.strip().lower() in SUPPORTED_FORMATS:
177-
format = f.strip().lower()
178-
if size == "" and any(u in f.strip().lower() for u in ["mb", "kb", "gb"]):
179-
size = f.strip().lower()
180-
181-
if format == "" or size == "":
182-
for f in _details:
183-
if f == "" and not " " in f.strip().lower():
184-
format = f.strip().lower()
185-
if size == "" and "." in f.strip().lower():
186-
size = f.strip().lower()
187172

188173
every_url = soup.find_all("a")
189174
slow_urls_no_waitlist = set()
@@ -237,20 +222,49 @@ def _parse_book_info_page(soup: BeautifulSoup, book_id: str) -> BookInfo:
237222
# Remove empty urls
238223
urls = [url for url in urls if url != ""]
239224

225+
# Filter out divs that are not text
226+
original_divs = divs
227+
divs = [div.text.strip() for div in divs if div.text.strip() != ""]
228+
229+
separator_index = 6
230+
for i, div in enumerate(divs):
231+
if "·" in div.strip():
232+
separator_index = i
233+
break
234+
235+
_details = divs[separator_index].lower().split(" · ")
236+
format = ""
237+
size = ""
238+
for f in _details:
239+
if format == "" and f.strip().lower() in SUPPORTED_FORMATS:
240+
format = f.strip().lower()
241+
if size == "" and any(u in f.strip().lower() for u in ["mb", "kb", "gb"]):
242+
size = f.strip().lower()
243+
244+
if format == "" or size == "":
245+
for f in _details:
246+
if f == "" and not " " in f.strip().lower():
247+
format = f.strip().lower()
248+
if size == "" and "." in f.strip().lower():
249+
size = f.strip().lower()
250+
251+
252+
book_title = divs[separator_index-3].strip("🔍")
253+
240254
# Extract basic information
241255
book_info = BookInfo(
242256
id=book_id,
243257
preview=preview,
244-
title=divs[7].text.strip(),
245-
publisher=divs[11].text.strip(),
246-
author=divs[9].text.strip(),
258+
title=book_title,
259+
publisher=divs[separator_index-1],
260+
author=divs[separator_index-2],
247261
format=format,
248262
size=size,
249263
download_urls=urls,
250264
)
251265

252266
# Extract additional metadata
253-
info = _extract_book_metadata(divs[-6])
267+
info = _extract_book_metadata(original_divs[-6])
254268
book_info.info = info
255269

256270
# Set language and year from metadata if available
@@ -262,6 +276,8 @@ def _parse_book_info_page(soup: BeautifulSoup, book_id: str) -> BookInfo:
262276
return book_info
263277

264278
def _get_download_urls_from_welib(book_id: str) -> set[str]:
279+
if ALLOW_USE_WELIB == False:
280+
return set()
265281
"""Get download urls from welib.org."""
266282
url = f"https://welib.org/md5/{book_id}"
267283
logger.info(f"Getting download urls from welib.org for {book_id}. While this uses the bypasser, it will not start downloading them yet.")

env.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ def string_to_bool(s: str) -> bool:
2828
DEBUG = string_to_bool(os.getenv("DEBUG", "false"))
2929
APP_ENV = os.getenv("APP_ENV", "N/A").lower()
3030
PRIORITIZE_WELIB = string_to_bool(os.getenv("PRIORITIZE_WELIB", "false"))
31+
ALLOW_USE_WELIB = string_to_bool(os.getenv("ALLOW_USE_WELIB", "true"))
3132

3233
# Version information from Docker build
3334
BUILD_VERSION = os.getenv("BUILD_VERSION", "N/A")

readme.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ Note that if using TOR, the TZ will be calculated automatically based on IP.
8383
| `AA_DONATOR_KEY` | Optional Donator key for Anna's Archive fast download API | `` |
8484
| `USE_BOOK_TITLE` | Use book title as filename instead of ID | `false` |
8585
| `PRIORITIZE_WELIB` | When downloading, download from WELIB first instead of AA | `false` |
86+
| `ALLOW_USE_WELIB` | Allow usage of welib for downloading books if found there | `true` |
8687

8788
If you change `BOOK_LANGUAGE`, you can add multiple comma separated languages, such as `en,fr,ru` etc.
8889

0 commit comments

Comments
 (0)