DataScience/Matplotlip
Matplotlip 두 컬럼간의 관계(비례,반비례,관계없음)
leopard4
2022. 11. 28. 13:59
Bivariate (여러개의 변수) Visualization 방법¶
Scatterplots¶
In [246]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sb
%matplotlib inline
In [ ]:
# 두 컬럼간의 관계! 를 차트로 나타내는 방법
In [247]:
# 관계란??? 비례관계, 반비례관계, 아무관계 없음.. 이 3가지를 말한다.
In [248]:
pd.read_csv("../data/fuel_econ.csv")
Out[248]:
id | make | model | year | VClass | drive | trans | fuelType | cylinders | displ | pv2 | pv4 | city | UCity | highway | UHighway | comb | co2 | feScore | ghgScore | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 32204 | Nissan | GT-R | 2013 | Subcompact Cars | All-Wheel Drive | Automatic (AM6) | Premium Gasoline | 6 | 3.8 | 79 | 0 | 16.4596 | 20.2988 | 22.5568 | 30.1798 | 18.7389 | 471 | 4 | 4 |
1 | 32205 | Volkswagen | CC | 2013 | Compact Cars | Front-Wheel Drive | Automatic (AM-S6) | Premium Gasoline | 4 | 2.0 | 94 | 0 | 21.8706 | 26.9770 | 31.0367 | 42.4936 | 25.2227 | 349 | 6 | 6 |
2 | 32206 | Volkswagen | CC | 2013 | Compact Cars | Front-Wheel Drive | Automatic (S6) | Premium Gasoline | 6 | 3.6 | 94 | 0 | 17.4935 | 21.2000 | 26.5716 | 35.1000 | 20.6716 | 429 | 5 | 5 |
3 | 32207 | Volkswagen | CC 4motion | 2013 | Compact Cars | All-Wheel Drive | Automatic (S6) | Premium Gasoline | 6 | 3.6 | 94 | 0 | 16.9415 | 20.5000 | 25.2190 | 33.5000 | 19.8774 | 446 | 5 | 5 |
4 | 32208 | Chevrolet | Malibu eAssist | 2013 | Midsize Cars | Front-Wheel Drive | Automatic (S6) | Regular Gasoline | 4 | 2.4 | 0 | 95 | 24.7726 | 31.9796 | 35.5340 | 51.8816 | 28.6813 | 310 | 8 | 8 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
3924 | 39882 | Toyota | Prius Prime | 2018 | Midsize Cars | Front-Wheel Drive | Automatic (variable gear ratios) | Regular Gasoline | 4 | 1.8 | 0 | 0 | 55.2206 | 78.8197 | 53.0000 | 73.6525 | 54.4329 | 78 | 10 | 10 |
3925 | 39898 | Hyundai | Sonata Hybrid | 2018 | Midsize Cars | Front-Wheel Drive | Automatic (AM6) | Regular Gasoline | 4 | 2.0 | 0 | 106 | 39.0000 | 55.9000 | 44.3066 | 64.0000 | 41.0000 | 217 | 9 | 9 |
3926 | 39899 | Hyundai | Sonata Hybrid SE | 2018 | Midsize Cars | Front-Wheel Drive | Automatic (AM6) | Regular Gasoline | 4 | 2.0 | 0 | 106 | 40.0000 | 56.0000 | 46.0000 | 64.0000 | 42.0000 | 212 | 9 | 9 |
3927 | 39900 | Lexus | LS 500 | 2018 | Midsize Cars | Rear-Wheel Drive | Automatic (S10) | Premium Gasoline | 6 | 3.4 | 99 | 0 | 19.2200 | 24.2000 | 30.2863 | 43.4000 | 23.0021 | 387 | 5 | 5 |
3928 | 39901 | Lexus | LS 500 AWD | 2018 | Midsize Cars | All-Wheel Drive | Automatic (S10) | Premium Gasoline | 6 | 3.4 | 99 | 0 | 18.0431 | 22.6000 | 27.0000 | 39.3000 | 21.3945 | 417 | 4 | 4 |
3929 rows × 20 columns
In [249]:
df = pd.read_csv("../data/fuel_econ.csv")
In [250]:
df.shape
Out[250]:
(3929, 20)
In [252]:
# 자동차 모델은 실제로 몇개?
df['model'].nunique()
Out[252]:
769
In [254]:
df['make'].nunique()
Out[254]:
39
In [255]:
# 자동차 만드는 회사별로, 이 데이터 프레임에는 데이터가 몇개 있는지 시각화!
In [266]:
base_order = df['make'].value_counts().index
In [269]:
base_order
Out[269]:
Index(['BMW', 'Mercedes-Benz', 'Porsche', 'Ford', 'Chevrolet', 'MINI', 'Audi', 'Volkswagen', 'Hyundai', 'Dodge', 'Lexus', 'Kia', 'Cadillac', 'Toyota', 'Infiniti', 'Honda', 'Jaguar', 'Mazda', 'Nissan', 'Buick', 'Subaru', 'Chrysler', 'Lincoln', 'Volvo', 'Acura', 'Bentley', 'Mitsubishi', 'Rolls-Royce', 'Maserati', 'Scion', 'Fiat', 'Ferrari', 'Genesis', 'Aston Martin', 'Suzuki', 'Roush Performance', 'Lotus', 'Alfa Romeo', 'Karma'], dtype='object')
In [270]:
plt.figure(figsize=(10,12))
sb.countplot(data = df, y = "make", order = base_order)
plt.show()
In [271]:
# 두 컬럼간의 관계!!!!!
# 배기량(displ)과 연비(comb)의 관계 => 비례, 반비례, 관계없음
# 1. plt의 scatter 사용하는 방법
In [274]:
plt.scatter(data = df, x= "displ", y= "comb")
plt.title("Displ Vs Comb")
plt.xlabel("Displacement (L)")
plt.ylabel('Combined Fuel Eff (mpg)')
plt.show()
In [276]:
df
Out[276]:
id | make | model | year | VClass | drive | trans | fuelType | cylinders | displ | pv2 | pv4 | city | UCity | highway | UHighway | comb | co2 | feScore | ghgScore | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 32204 | Nissan | GT-R | 2013 | Subcompact Cars | All-Wheel Drive | Automatic (AM6) | Premium Gasoline | 6 | 3.8 | 79 | 0 | 16.4596 | 20.2988 | 22.5568 | 30.1798 | 18.7389 | 471 | 4 | 4 |
1 | 32205 | Volkswagen | CC | 2013 | Compact Cars | Front-Wheel Drive | Automatic (AM-S6) | Premium Gasoline | 4 | 2.0 | 94 | 0 | 21.8706 | 26.9770 | 31.0367 | 42.4936 | 25.2227 | 349 | 6 | 6 |
2 | 32206 | Volkswagen | CC | 2013 | Compact Cars | Front-Wheel Drive | Automatic (S6) | Premium Gasoline | 6 | 3.6 | 94 | 0 | 17.4935 | 21.2000 | 26.5716 | 35.1000 | 20.6716 | 429 | 5 | 5 |
3 | 32207 | Volkswagen | CC 4motion | 2013 | Compact Cars | All-Wheel Drive | Automatic (S6) | Premium Gasoline | 6 | 3.6 | 94 | 0 | 16.9415 | 20.5000 | 25.2190 | 33.5000 | 19.8774 | 446 | 5 | 5 |
4 | 32208 | Chevrolet | Malibu eAssist | 2013 | Midsize Cars | Front-Wheel Drive | Automatic (S6) | Regular Gasoline | 4 | 2.4 | 0 | 95 | 24.7726 | 31.9796 | 35.5340 | 51.8816 | 28.6813 | 310 | 8 | 8 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
3924 | 39882 | Toyota | Prius Prime | 2018 | Midsize Cars | Front-Wheel Drive | Automatic (variable gear ratios) | Regular Gasoline | 4 | 1.8 | 0 | 0 | 55.2206 | 78.8197 | 53.0000 | 73.6525 | 54.4329 | 78 | 10 | 10 |
3925 | 39898 | Hyundai | Sonata Hybrid | 2018 | Midsize Cars | Front-Wheel Drive | Automatic (AM6) | Regular Gasoline | 4 | 2.0 | 0 | 106 | 39.0000 | 55.9000 | 44.3066 | 64.0000 | 41.0000 | 217 | 9 | 9 |
3926 | 39899 | Hyundai | Sonata Hybrid SE | 2018 | Midsize Cars | Front-Wheel Drive | Automatic (AM6) | Regular Gasoline | 4 | 2.0 | 0 | 106 | 40.0000 | 56.0000 | 46.0000 | 64.0000 | 42.0000 | 212 | 9 | 9 |
3927 | 39900 | Lexus | LS 500 | 2018 | Midsize Cars | Rear-Wheel Drive | Automatic (S10) | Premium Gasoline | 6 | 3.4 | 99 | 0 | 19.2200 | 24.2000 | 30.2863 | 43.4000 | 23.0021 | 387 | 5 | 5 |
3928 | 39901 | Lexus | LS 500 AWD | 2018 | Midsize Cars | All-Wheel Drive | Automatic (S10) | Premium Gasoline | 6 | 3.4 | 99 | 0 | 18.0431 | 22.6000 | 27.0000 | 39.3000 | 21.3945 | 417 | 4 | 4 |
3929 rows × 20 columns
In [275]:
df.corr()
Out[275]:
id | year | cylinders | displ | pv2 | pv4 | city | UCity | highway | UHighway | comb | co2 | feScore | ghgScore | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
id | 1.000000 | 0.985668 | -0.060096 | -0.074666 | -0.006569 | -0.021951 | 0.091800 | 0.091225 | 0.090593 | 0.095359 | 0.093803 | -0.099717 | -0.127873 | -0.122321 |
year | 0.985668 | 1.000000 | -0.055313 | -0.070424 | 0.006232 | -0.033643 | 0.068050 | 0.066742 | 0.073290 | 0.077641 | 0.071993 | -0.081165 | -0.149829 | -0.145141 |
cylinders | -0.060096 | -0.055313 | 1.000000 | 0.933872 | 0.247571 | -0.004264 | -0.693103 | -0.666029 | -0.766275 | -0.771503 | -0.738023 | 0.848274 | -0.783858 | -0.781815 |
displ | -0.074666 | -0.070424 | 0.933872 | 1.000000 | 0.259336 | 0.022072 | -0.713479 | -0.686166 | -0.783984 | -0.788457 | -0.758397 | 0.855375 | -0.793432 | -0.791216 |
pv2 | -0.006569 | 0.006232 | 0.247571 | 0.259336 | 1.000000 | -0.665642 | -0.278109 | -0.272546 | -0.296808 | -0.298504 | -0.290883 | 0.287200 | -0.296088 | -0.293156 |
pv4 | -0.021951 | -0.033643 | -0.004264 | 0.022072 | -0.665642 | 1.000000 | 0.035188 | 0.037869 | 0.074952 | 0.077442 | 0.047333 | -0.050153 | 0.064876 | 0.065263 |
city | 0.091800 | 0.068050 | -0.693103 | -0.713479 | -0.278109 | 0.035188 | 1.000000 | 0.996377 | 0.915435 | 0.909658 | 0.989552 | -0.904305 | 0.905681 | 0.898793 |
UCity | 0.091225 | 0.066742 | -0.666029 | -0.686166 | -0.272546 | 0.037869 | 0.996377 | 1.000000 | 0.899557 | 0.897814 | 0.981106 | -0.885823 | 0.891297 | 0.884458 |
highway | 0.090593 | 0.073290 | -0.766275 | -0.783984 | -0.296808 | 0.074952 | 0.915435 | 0.899557 | 1.000000 | 0.992191 | 0.962757 | -0.916456 | 0.914116 | 0.897585 |
UHighway | 0.095359 | 0.077641 | -0.771503 | -0.788457 | -0.298504 | 0.077442 | 0.909658 | 0.897814 | 0.992191 | 1.000000 | 0.956580 | -0.912117 | 0.911355 | 0.894314 |
comb | 0.093803 | 0.071993 | -0.738023 | -0.758397 | -0.290883 | 0.047333 | 0.989552 | 0.981106 | 0.962757 | 0.956580 | 1.000000 | -0.929399 | 0.928862 | 0.918807 |
co2 | -0.099717 | -0.081165 | 0.848274 | 0.855375 | 0.287200 | -0.050153 | -0.904305 | -0.885823 | -0.916456 | -0.912117 | -0.929399 | 1.000000 | -0.940624 | -0.944566 |
feScore | -0.127873 | -0.149829 | -0.783858 | -0.793432 | -0.296088 | 0.064876 | 0.905681 | 0.891297 | 0.914116 | 0.911355 | 0.928862 | -0.940624 | 1.000000 | 0.994231 |
ghgScore | -0.122321 | -0.145141 | -0.781815 | -0.791216 | -0.293156 | 0.065263 | 0.898793 | 0.884458 | 0.897585 | 0.894314 | 0.918807 | -0.944566 | 0.994231 | 1.000000 |
In [277]:
# 딱 두 컬럼! displ과 comb 만 상관계수를 가져와라
In [278]:
df[ ["displ", 'comb'] ]
Out[278]:
displ | comb | |
---|---|---|
0 | 3.8 | 18.7389 |
1 | 2.0 | 25.2227 |
2 | 3.6 | 20.6716 |
3 | 3.6 | 19.8774 |
4 | 2.4 | 28.6813 |
... | ... | ... |
3924 | 1.8 | 54.4329 |
3925 | 2.0 | 41.0000 |
3926 | 2.0 | 42.0000 |
3927 | 3.4 | 23.0021 |
3928 | 3.4 | 21.3945 |
3929 rows × 2 columns
In [279]:
df[ ["displ", 'comb'] ].corr()
Out[279]:
displ | comb | |
---|---|---|
displ | 1.000000 | -0.758397 |
comb | -0.758397 | 1.000000 |
In [ ]:
In [280]:
# 두 컬럼간의 관계를 차트로 표시
# 2. seaborn 의 regplot 이용하는 방법
In [283]:
plt.scatter(data = df, x= "displ", y= "comb")
plt.title("Displ Vs Comb")
plt.xlabel("Displacement (L)")
plt.ylabel('Combined Fuel Eff (mpg)')
plt.show()
In [282]:
sb.regplot(data=df, x='displ', y='comb')
plt.show()
In [284]:
sb.regplot(data=df, x='displ', y='comb')
plt.title("Displ Vs Comb")
plt.xlabel("Displacement (L)")
plt.ylabel('Combined Fuel Eff (mpg)')
plt.show()
# reg의 뜻??? regression : 데이터에 fitting 한다는 의미.
In [ ]:
In [ ]:
# 3. sb의 pairplot 을 이용하는 방법
In [287]:
sb.pairplot(data = df, vars= ["displ",'comb', 'co2'])
plt.show()
In [ ]: