【python数据分析】pandas库Dataframe之创建

Dataframe

dataframe是一个表格型的数据结构,是一个“带有标签的二维数组”

创建

1、#由数组/list创建,cloums为字典key,index的默认为数字标签,也可指定

import pandas as pd
import numpy as np

data1 = {'a':[1,2,3],
      'b':[4,5,6],
      'c':[7,8,9]}
data2 = {'one':np.random.rand(3),
      'two':np.random.rand(3)}
d1 = pd.DataFrame(data1)
d2 = pd.DataFrame(data2)
print(d1)
print(d2)
d3 = pd.DataFrame(data1,index=list('xyz'))
print(d3)
# columns可重新指定列
d4 = pd.DataFrame(data2,index=list('qwe'),columns=['one','DD'])
print(d4)

------------------------------结果-------------------------------
   a  b  c
0  1  4  7
1  2  5  8
2  3  6  9
        one       two
0  0.038727  0.275714
1  0.886669  0.857068
2  0.881146  0.633808
   a  b  c
x  1  4  7
y  2  5  8
z  3  6  9
        one   DD
q  0.038727  NaN
w  0.886669  NaN
e  0.881146  NaN

2、# Dataframe之由Series创建,columns为字典key,index为Series的标签,若果Series没有标签,则默认数组标签

import pandas as pd
import numpy as np

data1 = {'one':pd.Series(np.random.rand(2)),
        'two':pd.Series(np.random.rand(3))}
data2 = {'one':pd.Series(np.random.rand(2),index = ['a','b']),
        'two':pd.Series(np.random.rand(3),index=['a','b','c'])}
df1 = pd.DataFrame(data1)
df2 = pd.DataFrame(data2)
print(df1)
print(df2)

----------------------------结果--------------------------
        one       two
0  0.547841  0.407916
1  0.528967  0.761749
2       NaN  0.638886
        one       two
a  0.462170  0.961833
b  0.508991  0.228698
c       NaN  0.306034

3、# Dataframe之由二维创建

import pandas as pd
import numpy as np

ar = np.random.rand(9).reshape(3,3)
print(ar)

df1 = pd.DataFrame(ar)
df2 = pd.DataFrame(ar,index=list('abc'),columns=list('xyz'))
print(df1)
print(df2)

------------------------------结果-------------------------
[[0.11228298 0.74159833 0.32772146]
 [0.14026585 0.61811644 0.92536378]
 [0.60881357 0.28399911 0.19018847]]
          0         1         2
0  0.112283  0.741598  0.327721
1  0.140266  0.618116  0.925364
2  0.608814  0.283999  0.190188
          x         y         z
a  0.112283  0.741598  0.327721
b  0.140266  0.618116  0.925364
c  0.608814  0.283999  0.190188

4、# 由字典组成的列表创建,columns为字典的key,index不指定默认为数组标签

import pandas as pd
import numpy as np

data = [{'one':1,'two':2},{'one':5,'two':10,'three':20}]
print(data)
df1 = pd.DataFrame(data)
df2 = pd.DataFrame(data,index=['a','b'],columns=['one','big'])
print(df1)
print(df2)

-----------------------------结果-----------------------------
[{'one': 1, 'two': 2}, {'one': 5, 'two': 10, 'three': 20}]
   one  three  two
0    1    NaN    2
1    5   20.0   10
   one  big
a    1  NaN
b    5  NaN

5、# 由字典组成的字典创建,colums为字典的key,index为自定的key,这里的index不能改变

import pandas as pd
import numpy as np

data = [{'one':1,'two':2},{'one':5,'two':10,'three':20}]
print(data)
df1 = pd.DataFrame(data)
df2 = pd.DataFrame(data,index=['a','b'],columns=['one','big'])
print(df1)
print(df2)

-------------------------------结果-------------------------------
{'Jack': {'math': 90, 'english': 80, 'art': 88}, 'Marry': {'math': 80, 'english': 70, 'art': 100}, 'Tom': {'math': 80, 'english': 70}}
         Jack  Marry   Tom
art        88    100   NaN
english    80     70  70.0
math       90     80  80.0
          Tom  Jack  Bob
art       NaN    88  NaN
english  70.0    80  NaN
math     80.0    90  NaN
   Jack  Marry  Tom
a   NaN    NaN  NaN
b   NaN    NaN  NaN
c   NaN    NaN  NaN

 


更多精彩内容