import pandas as pds
import numpy
dataset1 = pds.read_excel(r'C:\Users\Bill Kerman\Desktop\dataset1.xlsx')

print('数据集中是否存在缺失值:\n',any(dataset1.isnull()))
数据集中是否存在缺失值: True


#使用Lagrange插值
from scipy.interpolate import lagrange
x = dataset1[{'ID','DATA1'}].dropna()['ID']
y = dataset1[{'ID','DATA1'}].dropna()['DATA1']
nullval = []
xarray = []
yarray = []
for i in x:
xarray.append(i)
for i in y:
yarray.append(i)
for i,val in enumerate(dataset1[{'DATA1'}].isnull()['DATA1']):
if val:
nullval.append(i)
lag = lagrange(xarray,yarray)
for val in nullval:
print(lag(val+1))
0.13293862262205647
需要转换类型只需在read_excel里使用convetor即可
import pandas as pd
import numpy as np
sale_data=pd.read_excel(r'C:\Users\Bill Kerman\Documents\StudyMaterial\数据挖掘\朝阳医院2018年销售数据.xlsx',
converters={1:str})
print(any(sale_data.isnull()))
使用多个方法进行插值:
sale_data.fillna(sale_data.median())
sale_data.fillna(sale_data.mean())
#Lagrange方法的实现
x = list(range(0,10))
del x[4]
for label in {"销售数量","应收金额","实收金额"}:
y = sale_data[label].dropna()
yarray = []
for i in y:
yarray.append(i)
print(label)
print(lagrange(x,yarray[6567:6578])(4))
结果:
应收金额 57.555555555566414 实收金额 56.6314285714182 销售数量 10.277777777775938