DataFrame 與 MySQL

      在〈DataFrame 與 MySQL〉中尚無留言

MySQL 轉 DataFrame

底下代碼說明如何取得資料庫的欄位名稱,再產生 DataFrame 格式。

import mysql.connector as mysql
import pandas as pd
conn=mysql.connect(host="ip", user="帳號", password="密碼", database="資料庫")
cursor=conn.cursor()
cursor.execute("select * from 台灣股市 order by 日期")
rs=cursor.fetchall()

#取得資料庫欄位名
descriptions=cursor.description
columns=[d[0] for d in descriptions]

df=pd.DataFrame(data=rs, columns=columns)
print(df)
結果:
         id          日期        開盤        最高        最低        收盤
0         1  1999-01-05   6310.41   6310.41   6111.64   6152.43
1         2  1999-01-06   6082.02   6280.93   5988.06   6199.91
2         3  1999-01-07   6280.38   6409.55   6181.62   6404.31
3         4  1999-01-08   6371.34   6492.87   6371.34   6421.75
4         5  1999-01-11   6472.02   6492.90   6392.49   6406.99

DataFram 儲存庫料庫

df.values 是將 df 轉成 numpy 格式,但 cursor.executemany() 並不吃二維 numpy 格式,所以需把二維 numpy 改成二維 list,如下代碼所示。

import mysql.connector as mysql
conn, cursor=G.connect()
df=......................
datas=[row.tolist() for row in df.values]
cmd="insert into table ...............")
cursor.executemany(cmd, datas)
conn.commit()
conn.close()

台新銀行信用卡帳單

底下可以將台新銀行每月的帳單存入資料庫中。

「信用卡期數」資料表的欄位名稱可以由如下建立。

CREATE TABLE `信用卡期數` (
  `id` int NOT NULL AUTO_INCREMENT,
  `期數` varchar(8) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci DEFAULT NULL,
  PRIMARY KEY (`id`),
  UNIQUE KEY `期數_UNIQUE` (`期數`)
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci

「信用卡明細」資料表的欄位名稱可以由如下建立。

CREATE TABLE `信用卡明細` (
  `id` int NOT NULL AUTO_INCREMENT,
  `期數` varchar(6) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci DEFAULT NULL,
  `卡號` varchar(15) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci DEFAULT NULL,
  `消費日` date DEFAULT NULL,
  `起息日` date DEFAULT NULL,
  `明細` varchar(45) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci DEFAULT NULL,
  `幣別` varchar(5) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci DEFAULT NULL,
  `金額` int DEFAULT NULL,
  `外幣折算日` date DEFAULT NULL,
  `消費地` varchar(5) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci DEFAULT NULL,
  `外幣金額` varchar(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci DEFAULT NULL,
  PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci

台新銀行帳單的格式請由如下下載。台新銀行帳號

將帳單資料寫入資料庫的完整代碼如下

import pandas as pd
from G import G
display=pd.options.display
display.max_columns=None
display.max_rows=None
display.width=None
display.max_colwidth=None
year=eval(input("請輸入西元年 : "))
month=eval(input("請輸入月份 : "))
conn, cursor = G.connect()
#cursor.execute(truncate table 信用卡期數")
cmd=f"select * from 信用卡期數 where 期數 = '{year}{month:02d}'"
cursor.execute(cmd)
rs=cursor.fetchall()
if len(rs)>0:
    print("此期數已新增.....")
    exit()

cmd=f"insert into 信用卡期數 (期數) values('{year}{month:02d}')"
cursor.execute(cmd)
conn.commit()

file=f"F:/信用卡/台新銀行 -  {year}{month:02d} 信用卡明細.xlsx"
df = pd.read_excel(file)
df.columns=["消費日", "起息日", "明細", "幣別", "金額", "外幣折算日", "消費地", "外幣金額"]
df.insert(0,"卡號", "")
df.insert(0, "期數", f"2025{month:02d}")
carno=0
for idx,row in df.iterrows():
    df.at[idx,"卡號"]=carno
    if "卡號末四碼:" in str(row):
        carno=row["消費日"].split(":")[1][:-1]
df['消費日'] = pd.to_datetime(df['消費日'], format='%Y/%m/%d',errors='coerce').dt.date
df['起息日'] = pd.to_datetime(df['起息日'],format='%Y/%m/%d', errors='coerce').dt.date
df['外幣折算日'] = str(year)+"/"+df['外幣折算日'].where(pd.notna(df['外幣折算日']), None)
df = df.dropna(subset=['消費日', '起息日']).query("卡號!=0")
count=len(df.columns)
data = df.where(pd.notnull(df), None).values.tolist()
columns=str(df.columns.values.tolist())[1:-1].replace("'","")
cmd=f"insert into 信用卡明細 ({columns}) values ({('%s,'*count)[:-1]})"
cursor.executemany(cmd, data)
conn.commit()
print(df)
conn.close()

台新銀行信用卡分析

底下可由資料庫中撈出每個月的消費明細。

import pandas as pd
from openpyxl import load_workbook, Workbook
from openpyxl.styles import Font, Border, Side
from G import G
display=pd.options.display
display.max_columns=None
display.max_rows=None
display.width=None
display.max_colwidth=None

def noBlod(sheet):
    for row in sheet.iter_rows():
        for cell in row:
            #if cell.font.bold:  # 如果原本是粗體
            cell.font = Font(bold=False, size=12)  # 取消粗體
            cell.border = thin_border

thin_border = Border(
    left=Side(style='thin', color='000000'),
    right=Side(style='thin', color='000000'),
    top=Side(style='thin', color='000000'),
    bottom=Side(style='thin', color='000000')
)
year=int(input("請輸入年份 : "))
month=int(input("請輸入月份 : "))
file=f"f:/信用卡/信用卡.xlsx"
sheetname=f"{year}{month:02d}"
conn, cursor = G.connect()
cursor.execute(f"select * from 信用卡明細 where 期數='{sheetname}'")
rs=cursor.fetchall()
columns=[d[0] for d in cursor.description]
conn.close()

df=pd.DataFrame(data=rs, columns=columns)
total_jenny=df.query("卡號=='1004'")["金額"].sum()
total_thomas=df.query("卡號!='1004'")["金額"].sum()
total=df["金額"].sum()
df.drop(["id", "期數"], axis=1, inplace=True)
total_row = {'明細': 'Total','金額': total_jenny,}
df = pd.concat([df, pd.DataFrame([total_row])], ignore_index=True)

total_row = {'明細': 'Thomas','金額': total_thomas,}
df = pd.concat([df, pd.DataFrame([total_row])], ignore_index=True)

total_row = {'明細': '鏓計','金額': total,}
df = pd.concat([df, pd.DataFrame([total_row])], ignore_index=True)

df = df.where(pd.notna(df), None)
print(df)

with pd.ExcelWriter(
        file, 
        mode='a', 
        engine='openpyxl', 
        if_sheet_exists="replace") as writer:
    df.to_excel(writer, sheet_name=sheetname, index=False)

wb=load_workbook(file)
ws = wb[sheetname]
ws.column_dimensions['A'].width = 7
ws.column_dimensions['B'].width = 11.5
ws.column_dimensions['C'].width = 11.5
ws.column_dimensions['D'].width = 49
ws.column_dimensions['E'].width = 9
ws.column_dimensions['F'].width = 11
ws.column_dimensions['G'].width = 12.5
ws.column_dimensions['H'].width = 8
ws.column_dimensions['I'].width = 12
wb.active=ws
noBlod(ws)
for cell in ws['F']:
    cell.number_format = '#,##0'
wb.save(file)

發佈留言

發佈留言必須填寫的電子郵件地址不會公開。 必填欄位標示為 *