MySQL 轉 DataFrame
底下代碼說明如何取得資料庫的欄位名稱,再產生 DataFrame 格式。
import mysql.connector as mysql
import pandas as pd
conn=mysql.connect(host="ip", user="帳號", password="密碼", database="資料庫")
cursor=conn.cursor()
cursor.execute("select * from 台灣股市 order by 日期")
rs=cursor.fetchall()
#取得資料庫欄位名
descriptions=cursor.description
columns=[d[0] for d in descriptions]
df=pd.DataFrame(data=rs, columns=columns)
print(df)
結果:
id 日期 開盤 最高 最低 收盤
0 1 1999-01-05 6310.41 6310.41 6111.64 6152.43
1 2 1999-01-06 6082.02 6280.93 5988.06 6199.91
2 3 1999-01-07 6280.38 6409.55 6181.62 6404.31
3 4 1999-01-08 6371.34 6492.87 6371.34 6421.75
4 5 1999-01-11 6472.02 6492.90 6392.49 6406.99
DataFram 儲存庫料庫
df.values 是將 df 轉成 numpy 格式,但 cursor.executemany() 並不吃二維 numpy 格式,所以需把二維 numpy 改成二維 list,如下代碼所示。
import mysql.connector as mysql
conn, cursor=G.connect()
df=......................
datas=[row.tolist() for row in df.values]
cmd="insert into table ...............")
cursor.executemany(cmd, datas)
conn.commit()
conn.close()
台新銀行信用卡帳單
底下可以將台新銀行每月的帳單存入資料庫中。
「信用卡期數」資料表的欄位名稱可以由如下建立。
CREATE TABLE `信用卡期數` ( `id` int NOT NULL AUTO_INCREMENT, `期數` varchar(8) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci DEFAULT NULL, PRIMARY KEY (`id`), UNIQUE KEY `期數_UNIQUE` (`期數`) ) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci
「信用卡明細」資料表的欄位名稱可以由如下建立。
CREATE TABLE `信用卡明細` ( `id` int NOT NULL AUTO_INCREMENT, `期數` varchar(6) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci DEFAULT NULL, `卡號` varchar(15) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci DEFAULT NULL, `消費日` date DEFAULT NULL, `起息日` date DEFAULT NULL, `明細` varchar(45) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci DEFAULT NULL, `幣別` varchar(5) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci DEFAULT NULL, `金額` int DEFAULT NULL, `外幣折算日` date DEFAULT NULL, `消費地` varchar(5) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci DEFAULT NULL, `外幣金額` varchar(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci DEFAULT NULL, PRIMARY KEY (`id`) ) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci
台新銀行帳單的格式請由如下下載。台新銀行帳號
將帳單資料寫入資料庫的完整代碼如下
import pandas as pd
from G import G
display=pd.options.display
display.max_columns=None
display.max_rows=None
display.width=None
display.max_colwidth=None
year=eval(input("請輸入西元年 : "))
month=eval(input("請輸入月份 : "))
conn, cursor = G.connect()
#cursor.execute(truncate table 信用卡期數")
cmd=f"select * from 信用卡期數 where 期數 = '{year}{month:02d}'"
cursor.execute(cmd)
rs=cursor.fetchall()
if len(rs)>0:
print("此期數已新增.....")
exit()
cmd=f"insert into 信用卡期數 (期數) values('{year}{month:02d}')"
cursor.execute(cmd)
conn.commit()
file=f"F:/信用卡/台新銀行 - {year}{month:02d} 信用卡明細.xlsx"
df = pd.read_excel(file)
df.columns=["消費日", "起息日", "明細", "幣別", "金額", "外幣折算日", "消費地", "外幣金額"]
df.insert(0,"卡號", "")
df.insert(0, "期數", f"2025{month:02d}")
carno=0
for idx,row in df.iterrows():
df.at[idx,"卡號"]=carno
if "卡號末四碼:" in str(row):
carno=row["消費日"].split(":")[1][:-1]
df['消費日'] = pd.to_datetime(df['消費日'], format='%Y/%m/%d',errors='coerce').dt.date
df['起息日'] = pd.to_datetime(df['起息日'],format='%Y/%m/%d', errors='coerce').dt.date
df['外幣折算日'] = str(year)+"/"+df['外幣折算日'].where(pd.notna(df['外幣折算日']), None)
df = df.dropna(subset=['消費日', '起息日']).query("卡號!=0")
count=len(df.columns)
data = df.where(pd.notnull(df), None).values.tolist()
columns=str(df.columns.values.tolist())[1:-1].replace("'","")
cmd=f"insert into 信用卡明細 ({columns}) values ({('%s,'*count)[:-1]})"
cursor.executemany(cmd, data)
conn.commit()
print(df)
conn.close()
台新銀行信用卡分析
底下可由資料庫中撈出每個月的消費明細。
import pandas as pd
from openpyxl import load_workbook, Workbook
from openpyxl.styles import Font, Border, Side
from G import G
display=pd.options.display
display.max_columns=None
display.max_rows=None
display.width=None
display.max_colwidth=None
def noBlod(sheet):
for row in sheet.iter_rows():
for cell in row:
#if cell.font.bold: # 如果原本是粗體
cell.font = Font(bold=False, size=12) # 取消粗體
cell.border = thin_border
thin_border = Border(
left=Side(style='thin', color='000000'),
right=Side(style='thin', color='000000'),
top=Side(style='thin', color='000000'),
bottom=Side(style='thin', color='000000')
)
year=int(input("請輸入年份 : "))
month=int(input("請輸入月份 : "))
file=f"f:/信用卡/信用卡.xlsx"
sheetname=f"{year}{month:02d}"
conn, cursor = G.connect()
cursor.execute(f"select * from 信用卡明細 where 期數='{sheetname}'")
rs=cursor.fetchall()
columns=[d[0] for d in cursor.description]
conn.close()
df=pd.DataFrame(data=rs, columns=columns)
total_jenny=df.query("卡號=='1004'")["金額"].sum()
total_thomas=df.query("卡號!='1004'")["金額"].sum()
total=df["金額"].sum()
df.drop(["id", "期數"], axis=1, inplace=True)
total_row = {'明細': 'Total','金額': total_jenny,}
df = pd.concat([df, pd.DataFrame([total_row])], ignore_index=True)
total_row = {'明細': 'Thomas','金額': total_thomas,}
df = pd.concat([df, pd.DataFrame([total_row])], ignore_index=True)
total_row = {'明細': '鏓計','金額': total,}
df = pd.concat([df, pd.DataFrame([total_row])], ignore_index=True)
df = df.where(pd.notna(df), None)
print(df)
with pd.ExcelWriter(
file,
mode='a',
engine='openpyxl',
if_sheet_exists="replace") as writer:
df.to_excel(writer, sheet_name=sheetname, index=False)
wb=load_workbook(file)
ws = wb[sheetname]
ws.column_dimensions['A'].width = 7
ws.column_dimensions['B'].width = 11.5
ws.column_dimensions['C'].width = 11.5
ws.column_dimensions['D'].width = 49
ws.column_dimensions['E'].width = 9
ws.column_dimensions['F'].width = 11
ws.column_dimensions['G'].width = 12.5
ws.column_dimensions['H'].width = 8
ws.column_dimensions['I'].width = 12
wb.active=ws
noBlod(ws)
for cell in ws['F']:
cell.number_format = '#,##0'
wb.save(file)
