● predicted score if a student studies for 9.25 hrs/ day
● dataset: http://bit.ly/w-data
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
url=('https://raw.githubusercontent.com/AdiPersonalWorks/Random/master/student_scores%20-%20student_scores.csv')
stud=pd.read_csv(url)
stud
# checking for null values
stud.isnull().sum()
sns.lineplot(x="Hours",y="Scores",data=stud, color='red')
plt.show()
sns.boxplot(y="Hours", data=stud)
plt.show()
sns.boxplot(y="Scores", data=stud)
plt.show()
stud['Hours'].value_counts().plot(kind= 'bar')
stud['Scores'].value_counts().plot(kind= 'bar')
X = stud.iloc[:,:1]
y = stud.iloc[:,1]
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size = 0.7, test_size = 0.3, random_state = 100)
X_train.head()
y_train.head()
from sklearn.linear_model import LinearRegression
# Representing LinearRegression as lr(Creating LinearRegression Object)
lm = LinearRegression()
# Fit the model using lr.fit()
lm.fit(X_train, y_train)
print(lm.intercept_)
print(lm.coef_)
plt.scatter(X_train, y_train)
plt.plot(X_train, 1.495142109236383 + 9.87171443*X_train, 'y')
plt.title("Prediction")
plt.xlabel("Hours")
plt.ylabel("Scores")
plt.show()
y_pred = lm.predict(X_test)
print(y_pred)
comparison=pd.DataFrame({"Actual":y_test,"Predicted":y_pred})
comparison
plt.scatter(X_test, y_test)
plt.plot(X_test, 1.495142109236383 + 9.87171443 * X_test, 'y')
plt.show()
pred_score = lm.predict([[9.25]])
print("The predicted score is :",pred_score)
np.sqrt(mean_squared_error(y_test, y_pred))
r_squared = r2_score(y_test, y_pred)
r_squared
mean_absolute_error(y_test, y_pred)