C# 爬蟲

      在〈C# 爬蟲〉中尚無留言

Selenium 與PhantomJS

步驟

nuget

接下來安裝 Selenium.WebDriver, Selenium.PhantomJS.WebDriver, Selenium.WebDriver.PhantomJS.Xplatform, Selenium.Support 四個套件.

完成安裝四個套件後, 底下的爬蟲程式即可抓取台灣銀行黃金存簿的歷史金價, 並存入MySQL資料庫中

using MySql.Data.MySqlClient;
using OpenQA.Selenium;
using OpenQA.Selenium.PhantomJS;
using OpenQA.Selenium.Support.UI;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading;
using System.Threading.Tasks;

namespace ConsoleApp1
{
    class Program
    {
        static IWebDriver web;
        static string connStr = "server=ip;uid=account;pwd=pwd;database=db";
        static MySqlConnection mySqlConn = new MySqlConnection(connStr);

        static void Main(string[] args)
        {
            Random r = new Random();
            web = new PhantomJSDriver(PhantomJSDriverService.CreateDefaultService());
            for (int y = 2018; y <= 2019; y++)
            {
                for (int m = 1; m <= 12; m++) { if (y == 2019 && m > 8) break;
                    WebCollect(y, m);
                    int delay = (int)(r.NextDouble() * 1000) + 1000;
                    Console.WriteLine("sleep : {0} ms", delay);
                    Thread.Sleep(delay);
                }
            }
            web.Close();
        }
        static void WebCollect(int yy, int mm)
        {
            mySqlConn.Open();
            StringBuilder sbInsert = new StringBuilder();
            sbInsert.Append("insert into taiwan_bank_gold (gd_date, gd_buy, gd_sale) values ");
            Dictionary<string, List<int>> wareHouse = new Dictionary<string, List<int>>();
            web.Navigate().GoToUrl("https://rate.bot.com.tw/gold/passbook");
            IWebElement radio = web.FindElement(By.Id("input_date"));
            radio.Click();
            SelectElement select_yy = new SelectElement(web.FindElement(By.Name("year")));
            SelectElement select_mm = new SelectElement(web.FindElement(By.Name("month")));
            select_yy.SelectByValue(string.Format("{0}", yy));
            select_mm.SelectByValue(string.Format("{0:00}", mm));
            IWebElement btn = web.FindElement(By.CssSelector("[class='btn btn-inverse noscript']"));
            btn.Submit();
            IWebElement tbody = web.FindElement(By.TagName("tbody"));
            IReadOnlyCollection<IWebElemnet> trs = tbody.FindElements(By.TagName("tr"));
            foreach (IWebElement tr in trs)
            {
                IReadOnlyCollection<IWebElement> tds = tr.FindElements(By.TagName("td"));
                List<int> list = new List<int>();
                list.Add(Int32.Parse(tds.ToList()[3].Text.Replace(",", "")));
                list.Add(Int32.Parse(tds.ToList()[4].Text.Replace(",", "")));
                wareHouse.Add(tds.ToList()[0].Text, list);
            }
            wareHouse = wareHouse.OrderBy(o => o.Key).ToDictionary(o => o.Key, p => p.Value);
            foreach (string key in wareHouse.Keys)
            {
                List<int>list = wareHouse[key];
                sbInsert.Append(string.Format("('{0}', {1}, {2}),", key, list[0], list[1]));
            }
            sbInsert.Remove(sbInsert.Length - 1, 1);
            try
            {
                new MySqlCommand(sbInsert.ToString(), mySqlConn).ExecuteNonQuery();
                Console.WriteLine("{0}/{1} : write to database successful", yy, mm);
            }
            catch(Exception e)
            {

                Console.WriteLine(e.Message);
            }
            finally
            {
                mySqlConn.Close();
            }

        }
    }
}

MySQL資料庫結構

CREATE DEFINER=`account`@`%` PROCEDURE `Create_taiwan_bank_gold`()
BEGIN
	drop table IF EXISTS taiwan_bank_gold;
	CREATE TABLE `taiwan_bank_gold` (
	  `gd_id` int(11) NOT NULL AUTO_INCREMENT,
	  `gd_date` date NOT NULL,
	  `gd_buy` double DEFAULT NULL,
	  `gd_sale` double DEFAULT NULL,
	  PRIMARY KEY (`gd_id`),
	  UNIQUE KEY `gd_date_UNIQUE` (`gd_date`)
	) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci;
END

發佈留言

發佈留言必須填寫的電子郵件地址不會公開。 必填欄位標示為 *