当前位置：首页 > news >正文

如何上传网站模板网站开发 .net

news 2025/11/15 1:36:18

如何上传网站模板,网站开发 .net,做平团的网站,设计类专业就业前景Pandas的基本使用点击标题即可获取文章源代码和笔记 4.1.0 概要 Pandas基础处理Pandas是什么#xff1f;为什么用#xff1f;核心数据结构DataFramePanelSeries基本操作运算画图文件的读取与存储高级处理4.1Pandas介绍4.1.1 Pandas介绍 - 数据处理工具panel data analysis…Pandas的基本使用点击标题即可获取文章源代码和笔记 4.1.0 概要 Pandas基础处理Pandas是什么为什么用核心数据结构DataFramePanelSeries基本操作运算画图文件的读取与存储高级处理4.1Pandas介绍4.1.1 Pandas介绍 - 数据处理工具panel data analysispanel面板数据 - 计量经济学三维数据4.1.2 为什么使用Pandas便捷的数据处理能力读取文件方便封装了Matplotlib、Numpy的画图和计算4.1.3 DataFrame结构既有行索引又有列索引的二维数组属性shapeindexcolumnsvaluesT方法head()tail()3 DataFrame索引的设置1修改行列索引值2重设索引3设置新索引2 PanelDataFrame的容器3 Series带索引的一维数组属性indexvalues总结DataFrame是Series的容器Panel是DataFrame的容器 4.2 基本数据操作4.2.1 索引操作1直接索引先列后行2按名字索引loc3按数字索引iloc4组合索引数字、名字4.2.3 排序对内容排序dataframeseries对索引排序dataframeseries 4.3 DataFrame运算算术运算逻辑运算逻辑运算符布尔索引逻辑运算函数query()isin()统计运算min max mean median var stdnp.argmax()np.argmin()自定义运算apply(func, axis0)Truefunc:自定义函数 4.4 Pandas画图sr.plot() 4.5 文件读取与存储4.5.1 CSVpd.read_csv(path)usecolsnamesdataframe.to_csv(path)columns[]indexFalseheaderFalse4.5.2 HDF5hdf5 存储 3维数据的文件key1 dataframe1二维数据key2 dataframe2二维数据pd.read_hdf(path, key)df.to_hdf(path, key)4.5.3 JSONpd.read_json(path)orientrecordslinesTruedf.to_json(patn)orientrecordslinesTrue 4.1.3 DataFrame import numpy as np # 创建一个符合正态分布的10个股票5天的涨跌幅数据 stock_change np.random.normal(0,1,(10,5)) stock_changearray([[ 0.77072465, 1.30408183, -0.44043464, 0.8900768 , -0.80947118],[ 0.92407994, 0.01646795, -1.26614793, 1.52393669, -0.85373051],[-1.68378051, 0.4302981 , 0.8069393 , 0.60557427, -0.03960376],[ 0.75708007, -0.39899325, 0.23027082, -0.89585658, -1.86590247],[-0.41516245, -1.31841546, 0.16256478, -0.67449097, -1.26234013],[-0.27687242, -0.74154521, -0.03755446, 1.24182603, -0.79444361],[-0.2549323 , -0.41034663, -1.85076521, -1.28663451, -0.28566877],[ 1.22453612, -1.60200055, -1.83171522, -0.85322799, -1.70950421],[ 2.00461483, 1.49338564, 0.33928513, -0.1776084 , -0.39698965],[ 0.2184662 , -0.03868143, -0.21432675, 0.00604093, 1.35011139]])import pandas as pd pd.DataFrame(stock_change)0123400.7707251.304082-0.4404350.890077-0.80947110.9240800.016468-1.2661481.523937-0.8537312-1.6837810.4302980.8069390.605574-0.03960430.757080-0.3989930.230271-0.895857-1.8659024-0.415162-1.3184150.162565-0.674491-1.2623405-0.276872-0.741545-0.0375541.241826-0.7944446-0.254932-0.410347-1.850765-1.286635-0.28566971.224536-1.602001-1.831715-0.853228-1.70950482.0046151.4933860.339285-0.177608-0.39699090.218466-0.038681-0.2143270.0060411.350111 # 构造行索引序列 stock_code [股票 str(i) for i in range(stock_change.shape[0])] stock_code[股票0, 股票1, 股票2, 股票3, 股票4, 股票5, 股票6, 股票7, 股票8, 股票9]# 添加行索引 data pd.DataFrame(stock_change,indexstock_code) data01234股票00.7707251.304082-0.4404350.890077-0.809471股票10.9240800.016468-1.2661481.523937-0.853731股票2-1.6837810.4302980.8069390.605574-0.039604股票30.757080-0.3989930.230271-0.895857-1.865902股票4-0.415162-1.3184150.162565-0.674491-1.262340股票5-0.276872-0.741545-0.0375541.241826-0.794444股票6-0.254932-0.410347-1.850765-1.286635-0.285669股票71.224536-1.602001-1.831715-0.853228-1.709504股票82.0046151.4933860.339285-0.177608-0.396990股票90.218466-0.038681-0.2143270.0060411.350111 # 添加列索引 date pd.date_range(start20200618,periods5,freqB) # start 开始时间 periods 间隔时间freq 按照什么间隔 d w 5h dateDatetimeIndex([2020-06-18, 2020-06-19, 2020-06-22, 2020-06-23,2020-06-24],dtypedatetime64[ns], freqB)# 添加列索引 data pd.DataFrame(stock_change,indexstock_code,columnsdate) data2020-06-182020-06-192020-06-222020-06-232020-06-24股票00.7707251.304082-0.4404350.890077-0.809471股票10.9240800.016468-1.2661481.523937-0.853731股票2-1.6837810.4302980.8069390.605574-0.039604股票30.757080-0.3989930.230271-0.895857-1.865902股票4-0.415162-1.3184150.162565-0.674491-1.262340股票5-0.276872-0.741545-0.0375541.241826-0.794444股票6-0.254932-0.410347-1.850765-1.286635-0.285669股票71.224536-1.602001-1.831715-0.853228-1.709504股票82.0046151.4933860.339285-0.177608-0.396990股票90.218466-0.038681-0.2143270.0060411.350111 DataFrame属性 data.shape(10, 5)data.indexIndex([股票0, 股票1, 股票2, 股票3, 股票4, 股票5, 股票6, 股票7, 股票8, 股票9], dtypeobject)data.columnsDatetimeIndex([2020-06-18, 2020-06-19, 2020-06-22, 2020-06-23,2020-06-24],dtypedatetime64[ns], freqB)data.valuesarray([[ 0.77072465, 1.30408183, -0.44043464, 0.8900768 , -0.80947118],[ 0.92407994, 0.01646795, -1.26614793, 1.52393669, -0.85373051],[-1.68378051, 0.4302981 , 0.8069393 , 0.60557427, -0.03960376],[ 0.75708007, -0.39899325, 0.23027082, -0.89585658, -1.86590247],[-0.41516245, -1.31841546, 0.16256478, -0.67449097, -1.26234013],[-0.27687242, -0.74154521, -0.03755446, 1.24182603, -0.79444361],[-0.2549323 , -0.41034663, -1.85076521, -1.28663451, -0.28566877],[ 1.22453612, -1.60200055, -1.83171522, -0.85322799, -1.70950421],[ 2.00461483, 1.49338564, 0.33928513, -0.1776084 , -0.39698965],[ 0.2184662 , -0.03868143, -0.21432675, 0.00604093, 1.35011139]])data.T股票0股票1股票2股票3股票4股票5股票6股票7股票8股票92020-06-180.7707250.924080-1.6837810.757080-0.415162-0.276872-0.2549321.2245362.0046150.2184662020-06-191.3040820.0164680.430298-0.398993-1.318415-0.741545-0.410347-1.6020011.493386-0.0386812020-06-22-0.440435-1.2661480.8069390.2302710.162565-0.037554-1.850765-1.8317150.339285-0.2143272020-06-230.8900771.5239370.605574-0.895857-0.6744911.241826-1.286635-0.853228-0.1776080.0060412020-06-24-0.809471-0.853731-0.039604-1.865902-1.262340-0.794444-0.285669-1.709504-0.3969901.350111 DataFrame方法 data.head() # 返回前5行数据2020-06-182020-06-192020-06-222020-06-232020-06-24股票00.7707251.304082-0.4404350.890077-0.809471股票10.9240800.016468-1.2661481.523937-0.853731股票2-1.6837810.4302980.8069390.605574-0.039604股票30.757080-0.3989930.230271-0.895857-1.865902股票4-0.415162-1.3184150.162565-0.674491-1.262340 data.tail() # 返回后5行数据2020-06-182020-06-192020-06-222020-06-232020-06-24股票5-0.276872-0.741545-0.0375541.241826-0.794444股票6-0.254932-0.410347-1.850765-1.286635-0.285669股票71.224536-1.602001-1.831715-0.853228-1.709504股票82.0046151.4933860.339285-0.177608-0.396990股票90.218466-0.038681-0.2143270.0060411.350111 3 DataFrame索引的设置修改行列索引值 data.index[2]股票2data.index[2] 股票88 # 注意单独修改每一列的索引是不行的在DataFrame中只能对索引进行整体的修改---------------------------------------------------------------------------TypeError Traceback (most recent call last)ipython-input-19-9e95917cc4d9 in module ---- 1 data.index[2] 股票88D:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in __setitem__(self, key, value)3908 3909 def __setitem__(self, key, value): - 3910 raise TypeError(Index does not support mutable operations)3911 3912 def __getitem__(self, key):TypeError: Index does not support mutable operationsstock_ [股票_{}.format(i) for i in range(10)]data.index stock_data.indexIndex([股票_0, 股票_1, 股票_2, 股票_3, 股票_4, 股票_5, 股票_6, 股票_7, 股票_8,股票_9],dtypeobject)重设索引 reset_indexdropFalse设置新的下标索引drop默认为False不删除原来索引如果为True删除原来的索引值 # 重置索引dropFalse data.reset_index()index2020-06-18 00:00:002020-06-19 00:00:002020-06-22 00:00:002020-06-23 00:00:002020-06-24 00:00:000股票_00.7707251.304082-0.4404350.890077-0.8094711股票_10.9240800.016468-1.2661481.523937-0.8537312股票_2-1.6837810.4302980.8069390.605574-0.0396043股票_30.757080-0.3989930.230271-0.895857-1.8659024股票_4-0.415162-1.3184150.162565-0.674491-1.2623405股票_5-0.276872-0.741545-0.0375541.241826-0.7944446股票_6-0.254932-0.410347-1.850765-1.286635-0.2856697股票_71.224536-1.602001-1.831715-0.853228-1.7095048股票_82.0046151.4933860.339285-0.177608-0.3969909股票_90.218466-0.038681-0.2143270.0060411.350111 # 重置索引dropTrue data.reset_index(dropTrue)2020-06-182020-06-192020-06-222020-06-232020-06-2400.7707251.304082-0.4404350.890077-0.80947110.9240800.016468-1.2661481.523937-0.8537312-1.6837810.4302980.8069390.605574-0.03960430.757080-0.3989930.230271-0.895857-1.8659024-0.415162-1.3184150.162565-0.674491-1.2623405-0.276872-0.741545-0.0375541.241826-0.7944446-0.254932-0.410347-1.850765-1.286635-0.28566971.224536-1.602001-1.831715-0.853228-1.70950482.0046151.4933860.339285-0.177608-0.39699090.218466-0.038681-0.2143270.0060411.350111 以某列值设置为新的索引 set_index(keys,dropTrue)keys:列索引名或者列索引名称的列表drop:boolean,default True 当作新的索引删除原来的索引列设置新索引案例 1.创建 df pd.DataFrame({month:[1,4,7,10],year:[2012,2014,2013,2014],sale:[55,40,84,31] }) dfmonthyearsale012012551420144027201384310201431 2、以月份设置新的索引 df.set_index(month)yearsalemonth12012554201440720138410201431 设置多个索引以年和月份 new_df df.set_index([year,month])new_dfsaleyearmonth20121552014440201378420141031 new_df.indexMultiIndex([(2012, 1),(2014, 4),(2013, 7),(2014, 10)],names[year, month])4.1.4 MultiIndex 与 Panel的关系 1 Multilndex多级或分层索引对象。 index属性 names: levels的名称 levels每个level的元组值 new_df.index.namesFrozenList([year, month])new_df.index.levelsFrozenList([[2012, 2013, 2014], [1, 4, 7, 10]])2 Panel p pd.Panel() p # 新版本已移除该函数D:\ProgramData\Anaconda3\lib\site-packages\ipykernel_launcher.py:1: FutureWarning: The Panel class is removed from pandas. Accessing it from the top-level namespace will also be removed in the next versionEntry point for launching an IPython kernel.pandas.__getattr__.locals.Panel at 0x203fd31ea08data2020-06-182020-06-192020-06-222020-06-232020-06-24股票_00.7707251.304082-0.4404350.890077-0.809471股票_10.9240800.016468-1.2661481.523937-0.853731股票_2-1.6837810.4302980.8069390.605574-0.039604股票_30.757080-0.3989930.230271-0.895857-1.865902股票_4-0.415162-1.3184150.162565-0.674491-1.262340股票_5-0.276872-0.741545-0.0375541.241826-0.794444股票_6-0.254932-0.410347-1.850765-1.286635-0.285669股票_71.224536-1.602001-1.831715-0.853228-1.709504股票_82.0046151.4933860.339285-0.177608-0.396990股票_90.218466-0.038681-0.2143270.0060411.350111 Series data.iloc[1,:] # 带索引的一维数组2020-06-18 0.924080 2020-06-19 0.016468 2020-06-22 -1.266148 2020-06-23 1.523937 2020-06-24 -0.853731 Freq: B, Name: 股票_1, dtype: float64type(data.iloc[1,:])pandas.core.series.Series属性 data.iloc[1,:].indexDatetimeIndex([2020-06-18, 2020-06-19, 2020-06-22, 2020-06-23,2020-06-24],dtypedatetime64[ns], freqB)data.iloc[1,:].valuesarray([ 0.92407994, 0.01646795, -1.26614793, 1.52393669, -0.85373051])1. 创建Series 通过已有数据创建指定内容默认索引 pd.Series(np.arange(10))0 0 1 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 dtype: int32指定索引 pd.Series([6.7,5.6,3,10,2],index[1,2,3,4,5])1 6.7 2 5.6 3 3.0 4 10.0 5 2.0 dtype: float64通过字典数据创建 pd.Series({red:100,blue:200,green:500,yellow:1000 })red 100 blue 200 green 500 yellow 1000 dtype: int64总结 DataFrame 是 Series的容器Panel 是 DataFrame的容器 4.2 基本数据操作 datas pd.read_excel(./datas/szfj_baoan.xls)datasdistrictroomnumhallAREAC_floorfloor_numschoolsubwayper_price0baoan3289.3middle31007.07731baoan42127.0high31006.92912baoan1128.0low39003.92863baoan1128.0middle30003.35684baoan2278.0middle8115.0769..............................1246baoan4289.3low8004.25531247baoan2167.0middle30003.80601248baoan2267.4middle29105.34121249baoan2273.1low15105.95081250baoan3286.2middle32014.5244 1251 rows × 9 columns datas.columnsIndex([district, roomnum, hall, AREA, C_floor, floor_num, school,subway, per_price],dtypeobject)# 删除列 datas datas.drop(columns[ school,subway,],axis0)datasdistrictroomnumhallAREAC_floorfloor_numper_price0baoan3289.3middle317.07731baoan42127.0high316.92912baoan1128.0low393.92863baoan1128.0middle303.35684baoan2278.0middle85.0769........................1246baoan4289.3low84.25531247baoan2167.0middle303.80601248baoan2267.4middle295.34121249baoan2273.1low155.95081250baoan3286.2middle324.5244 1251 rows × 7 columns 4.2.1 索引操作 1.直接使用行列索引先列后行 datas[per_price][0]7.07732. 按名字索引(先行后列) datas.loc[0][per_price]7.0773datas.loc[0,per_price]7.07733.按数字索引 datas.iloc[0,6]7.0773# 通过索引值获取行名 datas.index[0:4]RangeIndex(start0, stop4, step1)datas.loc[datas.index[0:4],[district,roomnum]]districtroomnum0baoan31baoan42baoan13baoan1 # datas.columns.get_indexer() 通过列名获取索引值 datas.columns.get_indexer([district,roomnum])array([0, 1], dtypeint64)datas.iloc[0:4,datas.columns.get_indexer([district,roomnum])]districtroomnum0baoan31baoan42baoan13baoan1 4.2.2 赋值操作 # 直接修改原来的值 datas[hall] 5datas.head()districtroomnumhallAREAC_floorfloor_numper_price0baoan3589.3middle317.07731baoan45127.0high316.92912baoan1528.0low393.92863baoan1528.0middle303.35684baoan2578.0middle85.0769 # 或者 datas.hall 1datas.head()districtroomnumhallAREAC_floorfloor_numper_price0baoan3189.3middle317.07731baoan41127.0high316.92912baoan1128.0low393.92863baoan1128.0middle303.35684baoan2178.0middle85.0769 datas.iloc[0,0] zzzzdatas.head()districtroomnumhallAREAC_floorfloor_numper_price0zzzz3189.3middle317.07731baoan41127.0high316.92912baoan1128.0low393.92863baoan1128.0middle303.35684baoan2178.0middle85.0769 4.2.3 排序 # 对内容进行排序, ascendingFalse降序排列默认为True升序排列 datas.sort_values(byper_price,ascendingFalse)districtroomnumhallAREAC_floorfloor_numper_price917baoan4193.59high2821.9040356baoan81248.99low721.2860576baoan1121.95middle2219.3622296baoan4193.59high2819.2328186baoan31113.60middle3116.5493........................911baoan2189.00middle161.6854841baoan2175.00high71.66671188baoan31110.00middle331.5909684baoan3189.00middle261.22471047baoan3198.90middle261.1931 1251 rows × 7 columns datas.sort_values(byper_price)districtroomnumhallAREAC_floorfloor_numper_price1047baoan3198.90middle261.1931684baoan3189.00middle261.22471188baoan31110.00middle331.5909841baoan2175.00high71.6667911baoan2189.00middle161.6854........................186baoan31113.60middle3116.5493296baoan4193.59high2819.2328576baoan1121.95middle2219.3622356baoan81248.99low721.2860917baoan4193.59high2821.9040 1251 rows × 7 columns # 按照多个字段进行排序 # 先按照“district”字段的内容进行排序如果值相同再按照“per_price”字段的内容进行排序 datas.sort_values(by[district,per_price])districtroomnumhallAREAC_floorfloor_numper_price1047baoan3198.90middle261.1931684baoan3189.00middle261.22471188baoan31110.00middle331.5909841baoan2175.00high71.6667911baoan2189.00middle161.6854........................296baoan4193.59high2819.2328576baoan1121.95middle2219.3622356baoan81248.99low721.2860917baoan4193.59high2821.90400zzzz3189.30middle317.0773 1251 rows × 7 columns # 按照行索引大小进行排序,默认从小到大排序 datas.sort_index()districtroomnumhallAREAC_floorfloor_numper_price0zzzz3189.3middle317.07731baoan41127.0high316.92912baoan1128.0low393.92863baoan1128.0middle303.35684baoan2178.0middle85.0769........................1246baoan4189.3low84.25531247baoan2167.0middle303.80601248baoan2167.4middle295.34121249baoan2173.1low155.95081250baoan3186.2middle324.5244 1251 rows × 7 columns sr datas[per_price]sr0 7.0773 1 6.9291 2 3.9286 3 3.3568 4 5.0769... 1246 4.2553 1247 3.8060 1248 5.3412 1249 5.9508 1250 4.5244 Name: per_price, Length: 1251, dtype: float64# 对Series类型的数据的内容进行排序 sr.sort_values()1047 1.1931 684 1.2247 1188 1.5909 841 1.6667 911 1.6854... 186 16.5493 296 19.2328 576 19.3622 356 21.2860 917 21.9040 Name: per_price, Length: 1251, dtype: float64# 对Series类型的数据的索引进行排序 sr.sort_index()0 7.0773 1 6.9291 2 3.9286 3 3.3568 4 5.0769... 1246 4.2553 1247 3.8060 1248 5.3412 1249 5.9508 1250 4.5244 Name: per_price, Length: 1251, dtype: float644.3 DataFrame运算算术运算 # 对Series类型进行操作 datas[roomnum] 30 6 1 7 2 4 3 4 4 5.. 1246 7 1247 5 1248 5 1249 5 1250 6 Name: roomnum, Length: 1251, dtype: int64datas[roomnum].add(3).head()0 6 1 7 2 4 3 4 4 5 Name: roomnum, dtype: int64datas.iloc[:,1:4]roomnumhallAREA03189.3141127.021128.031128.042178.0............12464189.312472167.012482167.412492173.112503186.2 1251 rows × 3 columns # 对DataFrame类型进行操作 datas.iloc[:,1:4] 10roomnumhallAREA0131199.311411137.02111138.03111138.04121188.0............1246141199.31247121177.01248121177.41249121183.11250131196.2 1251 rows × 3 columns 逻辑运算 # 逻辑判断的结果可以作为筛选的依据 datas[AREA] 1000 False 1 True 2 False 3 False 4 False... 1246 False 1247 False 1248 False 1249 False 1250 False Name: AREA, Length: 1251, dtype: bool# 可以进行布尔索引 datas[datas[AREA] 100]districtroomnumhallAREAC_floorfloor_numper_price1baoan41127.00high316.92915baoan41125.17middle155.816116baoan31151.00high204.966925baoan31116.00high185.000026baoan51151.25high307.6033........................1232baoan51127.17low245.11131238baoan41130.74low3013.00291239baoan31102.10middle2810.87171241baoan51151.30high297.27031243baoan41142.25high326.3269 322 rows × 7 columns # 多个逻辑判断 # 筛选面积大于100 并且放假小于40000的数据 (datas[AREA]100) (datas[per_price] 40000) 0 False 1 True 2 False 3 False 4 False... 1246 False 1247 False 1248 False 1249 False 1250 False Length: 1251, dtype: bool# 布尔索引 datas[(datas[AREA]100) (datas[per_price] 40000)]districtroomnumhallAREAC_floorfloor_numper_price1baoan41127.00high316.92915baoan41125.17middle155.816116baoan31151.00high204.966925baoan31116.00high185.000026baoan51151.25high307.6033........................1232baoan51127.17low245.11131238baoan41130.74low3013.00291239baoan31102.10middle2810.87171241baoan51151.30high297.27031243baoan41142.25high326.3269 322 rows × 7 columns 逻辑运算函数 # 条件查询函数 datas.query(AREA100 per_price40000)districtroomnumhallAREAC_floorfloor_numper_price1baoan41127.00high316.92915baoan41125.17middle155.816116baoan31151.00high204.966925baoan31116.00high185.000026baoan51151.25high307.6033........................1232baoan51127.17low245.11131238baoan41130.74low3013.00291239baoan31102.10middle2810.87171241baoan51151.30high297.27031243baoan41142.25high326.3269 322 rows × 7 columns datas[roomnum].isin([4,5])0 False 1 True 2 False 3 False 4 False... 1246 True 1247 False 1248 False 1249 False 1250 False Name: roomnum, Length: 1251, dtype: bool# 可以指定值进行判断从而进行筛选操作 # 筛选出房间数量为4或者5的数据 datas[datas[roomnum].isin([4,5])]districtroomnumhallAREAC_floorfloor_numper_price1baoan41127.00high316.92915baoan41125.17middle155.816126baoan51151.25high307.603329baoan41143.45middle256.971136baoan41134.60middle329.1828........................1232baoan51127.17low245.11131238baoan41130.74low3013.00291241baoan51151.30high297.27031243baoan41142.25high326.32691246baoan4189.30low84.2553 224 rows × 7 columns 统计运算 # 计算每一列的总数均值标准差最小值分位数最大值等 datas.describe()roomnumhallAREAfloor_numper_pricecount1251.0000001251.01251.0000001251.0000001251.000000mean2.9064751.092.40997624.5987216.643429std0.9406630.037.7981229.3321192.435132min1.0000001.021.9500001.0000001.19310025%2.0000001.075.00000017.0000005.07585050%3.0000001.087.80000028.0000005.90680075%3.0000001.0101.37500031.0000007.761950max8.0000001.0352.90000053.00000021.904000 统计函数 # axis0 求每一列的最大值 axis1求每一行的最大值 datas.max(axis0)district zzzz roomnum 8 hall 1 AREA 352.9 C_floor middle floor_num 53 per_price 21.904 dtype: object# 方差 datas.var(axis0)roomnum 0.884846 hall 0.000000 AREA 1428.698032 floor_num 87.088446 per_price 5.929870 dtype: float64# 标准差 datas.std(axis0)roomnum 0.940663 hall 0.000000 AREA 37.798122 floor_num 9.332119 per_price 2.435132 dtype: float64datas.iloc[:,3]0 89.3 1 127.0 2 28.0 3 28.0 4 78.0... 1246 89.3 1247 67.0 1248 67.4 1249 73.1 1250 86.2 Name: AREA, Length: 1251, dtype: float64# 求最大值所在的下标索引 datas.iloc[:,3].idxmax(axis0)759datas.iloc[759,3]352.9# 求最小值所在的下标索引 datas.iloc[:,3].idxmin(axis0)576datas.iloc[576,3]21.95累计统计函数 datas[per_price]0 7.0773 1 6.9291 2 3.9286 3 3.3568 4 5.0769... 1246 4.2553 1247 3.8060 1248 5.3412 1249 5.9508 1250 4.5244 Name: per_price, Length: 1251, dtype: float64# 累加 datas[per_price].cumsum()0 7.0773 1 14.0064 2 17.9350 3 21.2918 4 26.3687... 1246 8291.3076 1247 8295.1136 1248 8300.4548 1249 8306.4056 1250 8310.9300 Name: per_price, Length: 1251, dtype: float64datas[per_price].sort_index().cumsum().plot()matplotlib.axes._subplots.AxesSubplot at 0x2039a3a3dc8import matplotlib.pyplot as plt datas[per_price].sort_index().cumsum().plot() plt.show()自定义运算 # 自定义一个计算最大值-最小值的函数 datas[[per_price]].apply(lambda x : x.max()-x.min(),axis0)per_price 20.7109 dtype: float644.4 Pandas画图 # 查看面积和房价之间的关系 datas.plot(xAREA,yper_price,kindscatter)matplotlib.axes._subplots.AxesSubplot at 0x203a343dec8# 查看楼层和房价之间的关系 datas.plot(xfloor_num,yper_price,kindscatter)matplotlib.axes._subplots.AxesSubplot at 0x203a3a81bc8datas.plot(xAREA,yper_price,kindbarh)matplotlib.axes._subplots.AxesSubplot at 0x203a2147f084.5 文件的读取与存储 1.读取csv文件 read_csv() iris_data pd.read_csv(./datas/iris.data.csv)iris_data.head()feature1feature2feature3feature4result05.13.51.40.2Iris-setosa14.93.01.40.2Iris-setosa24.73.21.30.2Iris-setosa34.63.11.50.2Iris-setosa45.03.61.40.2Iris-setosa # usecols指定读取的列名列表形式 iris_data1 pd.read_csv(./datas/iris.data.csv,usecols[feature1,feature2,result])iris_data1.head()feature1feature2result05.13.5Iris-setosa14.93.0Iris-setosa24.73.2Iris-setosa34.63.1Iris-setosa45.03.6Iris-setosa iris_data2 pd.read_csv(./datas/iris.data2.csv) iris_data2.head()5.13.51.40.2Iris-setosa04.93.01.40.2Iris-setosa14.73.21.30.2Iris-setosa24.63.11.50.2Iris-setosa35.03.61.40.2Iris-setosa45.43.91.70.4Iris-setosa # names:如果数据集本身没有列名可以自己指定列名 iris_data2 pd.read_csv(./datas/iris.data2.csv,names[feature1,feature2,feature3,feature4,result]) iris_data2.head()feature1feature2feature3feature4result05.13.51.40.2Iris-setosa14.93.01.40.2Iris-setosa24.73.21.30.2Iris-setosa34.63.11.50.2Iris-setosa45.03.61.40.2Iris-setosa datas.head(5)districtroomnumhallAREAC_floorfloor_numper_price0zzzz3189.3middle317.07731baoan41127.0high316.92912baoan1128.0low393.92863baoan1128.0middle303.35684baoan2178.0middle85.0769 # 保存per_price列的数据 # 保存的时候indexFalse 去掉行索引 # modea 追加数据 # headerFalse 不要重复追加列名 datas[:-1].to_csv(./price_test,columns[per_price],indexFalse,modea,headerFalse)# 读取查看数据 perice_test pd.read_csv(./price_test)perice_testper_price07.077316.929123.928633.356845.0769......37466.193237474.255337483.80637495.341237505.9508 3751 rows × 1 columns

查看全文

http://www.zqtcl.cn/news/961549/