@@ -327,6 +327,102 @@ def get_info(self) -> dict[str, Any]:
327327 ]
328328 ],
329329 ),
330+ TestCase (
331+ name = "oss_text_file" ,
332+ description = "User message with text and file" ,
333+ scene_data = [
334+ [
335+ {
336+ "role" : "user" ,
337+ "content" : [
338+ {"type" : "text" , "text" : "请阅读这个PDF,总结里面的要点。" },
339+ {
340+ "type" : "file" ,
341+ "file" : {
342+ "file_id" : "file_123" ,
343+ "filename" : "report.pdf" ,
344+ "file_data" : "@http://139.196.232.20:9090/graph-test/algorithm/2025_11_13/1763043889_1763043782_PM1%E8%BD%A6%E9%97%B4PMT%E9%9D%B4%E5%8E%8B%E8%BE%B9%E5%8E%8B%E5%8E%8B%E5%8A%9B%E6%97%A0%E6%B3%95%E5%BB%BA%E7%AB%8B%E6%95%85%E9%9A%9C%E6%8A%A5%E5%91%8A20240720.md" ,
345+ },
346+ },
347+ ],
348+ "chat_time" : "2025-11-24T10:21:00Z" ,
349+ "message_id" : "mm-file-1" ,
350+ }
351+ ]
352+ ],
353+ ),
354+ TestCase (
355+ name = "pure_data_file" ,
356+ description = "User message with text and file" ,
357+ scene_data = [
358+ [
359+ {
360+ "role" : "user" ,
361+ "content" : [
362+ {"type" : "text" , "text" : "请阅读这个PDF,总结里面的要点。" },
363+ {
364+ "type" : "file" ,
365+ "file" : {
366+ "file_id" : "file_123" ,
367+ "filename" : "report.pdf" ,
368+ "file_data" : "明文记忆是系统与用户对话、操作等交互中动态习得,以及外部提供的、可显式管理的结构化知识形态,通常以文档、提示模板、图结构或用户规则等形式存在。它具备编辑性、可共享性与治理友好性,适合存储需要频繁修改、可审计或多方协同使用的信息。 在 MemOS 中,明文记忆可用于动态生成推理上下文、个性化偏好注入、多代理协作共享等场景,成为连接人类输入与模型认知的关键桥梁。激活记忆是指模型在推理过程中产生的瞬时性认知状态,包括 KV cache、隐藏层激活、注意力权重等中间张量结构。它通常用于维持上下文连续性、对话一致性与行为风格控制。 MemOS 将激活记忆抽象为可调度资源,支持按需唤醒、延迟卸载与结构变换。例如,某些上下文状态可以被压缩为“半结构化记忆片段”用于未来复用,也可以在任务级别转化为参数化模块,支持短期记忆的长期化演进。这一机制为模型行为一致性、风格保持与状态持续性提供了基础。" ,
369+ },
370+ },
371+ ],
372+ "chat_time" : "2025-11-24T10:21:00Z" ,
373+ "message_id" : "mm-file-1" ,
374+ }
375+ ]
376+ ],
377+ ),
378+ TestCase (
379+ name = "local_data_file" ,
380+ description = "User message with text and file" ,
381+ scene_data = [
382+ [
383+ {
384+ "role" : "user" ,
385+ "content" : [
386+ {"type" : "text" , "text" : "请阅读这个PDF,总结里面的要点。" },
387+ {
388+ "type" : "file" ,
389+ "file" : {
390+ "file_id" : "file_123" ,
391+ "filename" : "report.pdf" ,
392+ "file_data" : "./my_local_file/report.pdf" ,
393+ },
394+ },
395+ ],
396+ "chat_time" : "2025-11-24T10:21:00Z" ,
397+ "message_id" : "mm-file-1" ,
398+ }
399+ ]
400+ ],
401+ ),
402+ TestCase (
403+ name = "internet_file" ,
404+ description = "User message with text and file" ,
405+ scene_data = [
406+ [
407+ {
408+ "role" : "user" ,
409+ "content" : [
410+ {"type" : "text" , "text" : "请阅读这个PDF,总结里面的要点。" },
411+ {
412+ "type" : "file" ,
413+ "file" : {
414+ "file_id" : "file_123" ,
415+ "filename" : "report.pdf" ,
416+ "file_data" : "https://upload.wikimedia.org/wikipedia/commons/c/cb/NLC416-16jh004830-88775_%E7%B4%85%E6%A8%93%E5%A4%A2.pdf" ,
417+ },
418+ },
419+ ],
420+ "chat_time" : "2025-11-24T10:21:00Z" ,
421+ "message_id" : "mm-file-1" ,
422+ }
423+ ]
424+ ],
425+ ),
330426 TestCase (
331427 name = "multimodal_mixed" ,
332428 description = "Mixed multimodal message (text + file + image)" ,
@@ -661,6 +757,12 @@ def get_reader_config() -> dict[str, Any]:
661757 },
662758 }
663759
760+ # Get direct markdown hostnames from environment variable
761+ direct_markdown_hostnames = None
762+ env_hostnames = os .getenv ("FILE_PARSER_DIRECT_MARKDOWN_HOSTNAMES" , "139.196.232.20" )
763+ if env_hostnames :
764+ direct_markdown_hostnames = [h .strip () for h in env_hostnames .split ("," ) if h .strip ()]
765+
664766 return {
665767 "llm" : llm_config ,
666768 "embedder" : embedder_config ,
@@ -673,6 +775,7 @@ def get_reader_config() -> dict[str, Any]:
673775 "min_sentences_per_chunk" : 1 ,
674776 },
675777 },
778+ "direct_markdown_hostnames" : direct_markdown_hostnames ,
676779 }
677780
678781
@@ -863,13 +966,13 @@ def main():
863966 parser .add_argument (
864967 "--example" ,
865968 type = str ,
866- default = "all " ,
969+ default = "oss_text_file " ,
867970 help = "Test case name, category name, or 'all' to run all cases (default: all)" ,
868971 )
869972 parser .add_argument (
870973 "--mode" ,
871974 choices = ["fast" , "fine" ],
872- default = "fast " ,
975+ default = "fine " ,
873976 help = "Processing mode: fast (quick) or fine (with LLM) (default: fast)" ,
874977 )
875978 parser .add_argument (
0 commit comments