admin管理员组

文章数量:1532323

2024年1月25日发(作者:)

内存df_RAM = data['内存容量'].value_counts().sort_index()df_ = df_("str")

draw_bar(df_RAM, '2021年上半年各品牌笔记本新品内存分布')

显卡draw_boxplot('显卡')def card_func(x): if x in ['RTX 3050Ti', 'RTX 3050', 'GTX 1650', '集显', 'MX450', 'MX350', 'GTX 1650Ti']: res = '1' elif x in ['RTX 3060', 'RTX 3070', 'RTX 3060MQ', 'RTX 3070MQ']: res = '2' else: res = '3' return resdata['card_level'] = data['显卡'].apply(lambda x : card_func(x))data['card_level'].value_counts()内存draw_boxplot('内存容量')

data = ("硬盘容量 != '1TB HDD机械硬盘'") #

删除def hard_disk_func(x): if x in ['512GB SSD固态硬盘', '256GB SSD固态硬盘', '512GB+1TB 混合硬盘', '128GB SSD固态硬盘']: res = '1' elif x in ['1TB SSD固态硬盘', '1TB+2TB 混合硬盘', '512GB+1TB 混合硬盘', '512GB+1TB SSD固态硬盘']: res = '2' else: res = '3' return resdata['hard_disk_level'] = data['硬盘容量'].apply(lambda x : hard_disk_func(x))data['hard_disk_level'].value_counts()操作系统draw_boxplot('操作系统')

删去UOS 20data = ("操作系统 != 'UOS 20' ")data['system'] = data['操作系统'].e(" ", "_")查看所有分完类的不同特征的价格是否存在差异tmp_list = [['RAM_level','brand_level','screen_level','resolution_level'], [ 'system','cpu_level','card_level','hard_disk_level']]

fig, ax = ts(ncols=2, nrows=4, figsize=(16, 20))for i,j in enumerate(tmp_list): for k, l in enumerate(j):

t(x= l, y='价格', data=data, ax=ax[k][i], linewidth=2)

特征内的分类基本都存在差异2.4 建模2.4.1 划分测试集验证集from _selection import train_test_splittrain, test = train_test_split(data, test_size=0.2, random_state=42, )2.4.2 变量筛选

残差近似符合正态分布2.4.6 模型效果评估from s import mean_squared_error, r2_score, explained_variance_scorepred_ols_model = ols_t(test)print('r2_score:', r2_score(_ln, pred_ols_model))

test['pred_price'] = (np.e, pred_ols_model)print('RMSE:', mean_squared_error(test['价格'], test['pred_price'])** 0.5)r2_score: 0.82244RMSE: 3499.9fig, ax = ts(figsize=(8,8))r(test['价格'], test['pred_price'])(xy1=(0, 0), xy2=(55000, 55000), linestyle='--', color='red')_xlabel('实际价格')_ylabel('预测价格')()

本文标签: 回归硬盘价格笔记本新品