@@ -75,24 +75,65 @@ https://github.com/dotnetcore/DotnetSpider/wiki
7575[ View complete Codes] ( https://github.com/zlzforever/DotnetSpider/blob/master/src/DotnetSpider.Sample/samples/EntitySpider.cs )
7676
7777```` csharp
78- public class EntitySpider : Spider
78+ public class EntitySpider (
79+ IOptions <SpiderOptions > options ,
80+ DependenceServices services ,
81+ ILogger <Spider > logger )
82+ : Spider (options , services , logger )
7983{
80- public EntitySpider (IOptions <SpiderOptions > options , SpiderServices services , ILogger <Spider > logger ) : base (
81- options , services , logger )
84+ public static async Task RunAsync ()
85+ {
86+ var builder = Builder .CreateDefaultBuilder <EntitySpider >(options =>
87+ {
88+ options .Speed = 1 ;
89+ });
90+ builder .UseSerilog ();
91+ builder .IgnoreServerCertificateError ();
92+ await builder .Build ().RunAsync ();
93+ }
94+
95+ public static async Task RunMySqlQueueAsync ()
96+ {
97+ var builder = Builder .CreateDefaultBuilder <EntitySpider >(options =>
98+ {
99+ options .Speed = 1 ;
100+ });
101+ builder .UseSerilog ();
102+ builder .IgnoreServerCertificateError ();
103+ builder .UseMySqlQueueBfsScheduler ((context , options ) =>
104+ {
105+ options .ConnectionString = context .Configuration [" SchedulerConnectionString" ];
106+ });
107+ await builder .Build ().RunAsync ();
108+ }
109+
110+ protected override async Task InitializeAsync (CancellationToken stoppingToken = default )
82111 {
112+ AddDataFlow <DataParser <CnblogsEntry >>();
113+ AddDataFlow (GetDefaultStorage );
114+ await AddRequestsAsync (
115+ new Request (
116+ " https://news.cnblogs.com/n/page/1" , new Dictionary <string , object > { { " 网站" , " 博客园" } }));
83117 }
84118
85- #region Nested type: CnblogsEntry
119+ protected override SpiderId GenerateSpiderId ()
120+ {
121+ return new (ObjectId .CreateId ().ToString (), " 博客园" );
122+ }
86123
87124 [Schema (" cnblogs" , " news" )]
88125 [EntitySelector (Expression = " .//div[@class='news_block']" , Type = SelectorType .XPath )]
89126 [GlobalValueSelector (Expression = " .//a[@class='current']" , Name = " 类别" , Type = SelectorType .XPath )]
90- [FollowRequestSelector (XPaths = new []
91- {
92- " //div[@class='pager']"
93- })]
127+ [GlobalValueSelector (Expression = " //title" , Name = " Title" , Type = SelectorType .XPath )]
128+ [FollowRequestSelector (Expressions = [" //div[@class='pager']" ])]
94129 public class CnblogsEntry : EntityBase <CnblogsEntry >
95130 {
131+ protected override void Configure ()
132+ {
133+ HasIndex (x => x .Title );
134+ HasIndex (x => new { x .WebSite , x .Guid }, true );
135+ }
136+
96137 public int Id { get ; set ; }
97138
98139 [Required ]
@@ -106,7 +147,7 @@ public class EntitySpider : Spider
106147 public string WebSite { get ; set ; }
107148
108149 [StringLength (200 )]
109- [ValueSelector (Expression = " //title " )]
150+ [ValueSelector (Expression = " Title " , Type = SelectorType . Environment )]
110151 [ReplaceFormatter (NewValue = " " , OldValue = " - 博客园" )]
111152 public string Title { get ; set ; }
112153
@@ -121,55 +162,15 @@ public class EntitySpider : Spider
121162 public string Url { get ; set ; }
122163
123164 [ValueSelector (Expression = " .//div[@class='entry_summary']" )]
165+ [TrimFormatter ]
124166 public string PlainText { get ; set ; }
125167
126168 [ValueSelector (Expression = " DATETIME" , Type = SelectorType .Environment )]
127169 public DateTime CreationTime { get ; set ; }
128-
129- protected override void Configure ()
130- {
131- HasIndex (x => x .Title );
132- HasIndex (x => new
133- {
134- x .WebSite ,
135- x .Guid
136- }, true );
137- }
138- }
139-
140- #endregion
141-
142- public static async Task RunAsync ()
143- {
144- var builder = Builder .CreateDefaultBuilder <EntitySpider >();
145- builder .UseSerilog ();
146- await builder .Build ()
147- .RunAsync ();
148- }
149-
150- protected override async Task InitializeAsync (CancellationToken stoppingToken )
151- {
152- AddDataFlow (new DataParser <CnblogsEntry >());
153- AddDataFlow (GetDefaultStorage ());
154- await AddRequestsAsync (new Request (" https://news.cnblogs.com/n/page/1/" , new Dictionary <string , string >
155- {
156- {
157- " 网站" , " 博客园"
158- }
159- }), new Request (" https://news.cnblogs.com/n/page/2/" , new Dictionary <string , string >
160- {
161- {
162- " 网站" , " 博客园"
163- }
164- }));
165- }
166-
167- protected override (string Id , string Name ) GetIdAndName ()
168- {
169- return (ObjectId .NewId .ToString (), " 博客园" );
170170 }
171171}
172172
173+
173174````
174175
175176#### Distributed spider
0 commit comments