Selenium 與PhantomJS
步驟
接下來安裝 Selenium.WebDriver, Selenium.PhantomJS.WebDriver, Selenium.WebDriver.PhantomJS.Xplatform, Selenium.Support 四個套件.
完成安裝四個套件後, 底下的爬蟲程式即可抓取台灣銀行黃金存簿的歷史金價, 並存入MySQL資料庫中
using MySql.Data.MySqlClient; using OpenQA.Selenium; using OpenQA.Selenium.PhantomJS; using OpenQA.Selenium.Support.UI; using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading; using System.Threading.Tasks; namespace ConsoleApp1 { class Program { static IWebDriver web; static string connStr = "server=ip;uid=account;pwd=pwd;database=db"; static MySqlConnection mySqlConn = new MySqlConnection(connStr); static void Main(string[] args) { Random r = new Random(); web = new PhantomJSDriver(PhantomJSDriverService.CreateDefaultService()); for (int y = 2018; y <= 2019; y++) { for (int m = 1; m <= 12; m++) { if (y == 2019 && m > 8) break; WebCollect(y, m); int delay = (int)(r.NextDouble() * 1000) + 1000; Console.WriteLine("sleep : {0} ms", delay); Thread.Sleep(delay); } } web.Close(); } static void WebCollect(int yy, int mm) { mySqlConn.Open(); StringBuilder sbInsert = new StringBuilder(); sbInsert.Append("insert into taiwan_bank_gold (gd_date, gd_buy, gd_sale) values "); Dictionary<string, List<int>> wareHouse = new Dictionary<string, List<int>>();web.Navigate().GoToUrl("https://rate.bot.com.tw/gold/passbook"); IWebElement radio = web.FindElement(By.Id("input_date")); radio.Click(); SelectElement select_yy = new SelectElement(web.FindElement(By.Name("year"))); SelectElement select_mm = new SelectElement(web.FindElement(By.Name("month"))); select_yy.SelectByValue(string.Format("{0}", yy)); select_mm.SelectByValue(string.Format("{0:00}", mm)); IWebElement btn = web.FindElement(By.CssSelector("[class='btn btn-inverse noscript']")); btn.Submit(); IWebElement tbody = web.FindElement(By.TagName("tbody")); IReadOnlyCollection<IWebElemnet> trs = tbody.FindElements(By.TagName("tr")); foreach (IWebElement tr in trs) { IReadOnlyCollection<IWebElement> tds = tr.FindElements(By.TagName("td")); List<int> list = new List<int> (); list.Add(Int32.Parse(tds.ToList ()[3].Text.Replace(",", ""))); list.Add(Int32.Parse(tds.ToList ()[4].Text.Replace(",", ""))); wareHouse.Add(tds.ToList ()[0].Text, list); } wareHouse = wareHouse.OrderBy(o => o.Key).ToDictionary(o => o.Key, p => p.Value); foreach (string key in wareHouse.Keys) { List<int> list = wareHouse[key]; sbInsert.Append(string.Format("('{0}', {1}, {2}),", key, list[0], list[1])); } sbInsert.Remove(sbInsert.Length - 1, 1); try { new MySqlCommand(sbInsert.ToString(), mySqlConn).ExecuteNonQuery(); Console.WriteLine("{0}/{1} : write to database successful", yy, mm); } catch(Exception e) { Console.WriteLine(e.Message); } finally { mySqlConn.Close(); } } } }
MySQL資料庫結構
CREATE DEFINER=`account`@`%` PROCEDURE `Create_taiwan_bank_gold`() BEGIN drop table IF EXISTS taiwan_bank_gold; CREATE TABLE `taiwan_bank_gold` ( `gd_id` int(11) NOT NULL AUTO_INCREMENT, `gd_date` date NOT NULL, `gd_buy` double DEFAULT NULL, `gd_sale` double DEFAULT NULL, PRIMARY KEY (`gd_id`), UNIQUE KEY `gd_date_UNIQUE` (`gd_date`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci; END