Skip to content

Commit eb09096

Browse files
committed
Update core.py
1 parent 410f3e4 commit eb09096

File tree

2 files changed

+11
-64
lines changed

2 files changed

+11
-64
lines changed

Watcher/Watcher/threats_watcher/core.py

Lines changed: 0 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,6 @@ def main_watch():
166166
- close_old_connections()
167167
- load_feeds()
168168
- fetch_last_posts(settings.POSTS_DEPTH)
169-
# - fetch_last_posts_bluesky(settings.POSTS_DEPTH)
170169
- tokenize_count_urls()
171170
- remove_banned_words()
172171
- focus_five_letters()
@@ -179,7 +178,6 @@ def main_watch():
179178
load_feeds()
180179
logger.info("Loaded feeds.")
181180
fetch_last_posts(settings.POSTS_DEPTH)
182-
# fetch_last_posts_bluesky(settings.POSTS_DEPTH)
183181
logger.info("Fetched last posts.")
184182
tokenize_count_urls()
185183
logger.info("Tokenized words.")
@@ -227,17 +225,6 @@ def fetch_last_posts(nb_max_post):
227225
posts = dict()
228226
tmp_posts = dict()
229227
posts_published = dict()
230-
# for url in rss_urls:
231-
# if "bsky.app" in url:
232-
# continue
233-
# try:
234-
# feed_content = requests.get(url, timeout=10, headers=HEADERS)
235-
# if feed_content.status_code == 200:
236-
# feeds.append(feedparser.parse(feed_content.text))
237-
# else:
238-
# logger.warning(f"Feed: {url} => Error: Status code: {feed_content.status_code}")
239-
# except requests.exceptions.RequestException as e:
240-
# logger.error(str(e))
241228

242229
for url in rss_urls:
243230
try:
@@ -271,56 +258,6 @@ def fetch_last_posts(nb_max_post):
271258
posts[title] = url
272259

273260

274-
# def fetch_last_posts_bluesky(nb_max_post):
275-
# """
276-
# Fetch the nb last posts for each Bluesky feed (domain 'bsky.app').
277-
278-
# :param nb_max_post: The deepness of the search on each feed.
279-
# """
280-
# global posts
281-
# global posts_published
282-
# posts = dict()
283-
# tmp_posts = dict()
284-
# posts_published = dict()
285-
# for url in rss_urls:
286-
# if "bsky.app" not in url:
287-
# continue
288-
# try:
289-
# feed_content = requests.get(url, timeout=10)
290-
# if feed_content.status_code == 200:
291-
# feeds.append(feedparser.parse(feed_content.text))
292-
# else:
293-
# logger.warning(f"Feed: {url} => Error: Status code: {feed_content.status_code}")
294-
# except requests.exceptions.RequestException as e:
295-
# logger.error(str(e))
296-
297-
# for feed in feeds:
298-
# count = 1
299-
# for entry in feed.entries:
300-
# if count <= nb_max_post:
301-
# count += 1
302-
# dt = "no-date"
303-
# parsed = entry.get('published_parsed') or entry.get('updated_parsed')
304-
# if parsed:
305-
# try:
306-
# dt = datetime.fromtimestamp(calendar.timegm(parsed))
307-
# except Exception:
308-
# dt = "no-date"
309-
310-
# link = entry.get('link') or entry.get('guid') or entry.get('id') or None
311-
312-
# title_raw = entry.get('title') or entry.get('summary') or entry.get('description') or (entry.get('guid') if isinstance(entry.get('guid'), str) else None) or link or ""
313-
314-
# title_clean = re.sub(r'<[^>]+>', '', title_raw).replace(u'\xa0', u' ').strip()
315-
316-
# if link and title_clean:
317-
# tmp_posts[title_clean] = link
318-
# posts_published[link] = dt
319-
320-
# for title, url in tmp_posts.items():
321-
# posts[title] = url
322-
323-
324261
def tokenize_count_urls():
325262
"""
326263
For each title (≤ 30 days):

Watcher/Watcher/threats_watcher/datas/banned_words.csv

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,4 +92,14 @@ Office
9292
Linked
9393
Hackers
9494
North Korea
95-
Secure
95+
Secure
96+
Information
97+
Iranian
98+
Japan
99+
Australia
100+
University
101+
Technologies
102+
Italian
103+
International
104+
Denmark
105+
European

0 commit comments

Comments
 (0)