import pandas as pd
from matplotlib import pyplot as plt
data=pd.read_csv("iris.csv")
data
# we haveto predict the class lable ie. Species wrt the 4 data points | Specied id dependent variable
#(SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm)
# no of indepenent data for each Species
data["Species"].value_counts()
# its a balanced dataset with each of 50 points
data.plot(x='SepalLengthCm',y='SepalWidthCm')
data
# instead using default line graph, we use scatter plot
data.plot(kind='scatter',x='SepalLengthCm',y='SepalWidthCm');
data # 2-d array , sl and sw
# we have 4 independent variables ie we have 4c2 = 6 (4=independent variable 2= x,y axis)
#sl=sw, sl=pw, sl=pl, sw=pl, sw=pw, pl=pw
import seaborn as sns
sns.pairplot(data,hue="Species" ,vars = ["SepalLengthCm", "SepalWidthCm","PetalLengthCm","PetalWidthCm"])
plt.show()
# we have much clear picture in 12 plot ie (pl and pw)
# ploting histogram, pdf cdf, boxplot, violon for better understanding
sns.pairplot(data,hue="Species" ,vars = ["PetalWidthCm"])
plt.show()
data.corr() #corelation
corr=data.corr()
fig, axis=plt.subplots(figsize=(10,10))
sns.heatmap(corr,annot=True) # ploting corelation
observation= 0.96 or 96% is the highest corelation value
# lable encoder used to convert the dependent column ie Specirs into numeric values to Machine readable form