# -*- coding: utf-8 -*-
"""eca.ipynb

Automatically generated by Colaboratory.

Original file is located at
https://colab.research.google.com/drive/188Ft0dntB6e5TrayRnvNxrrMD8QWn9mP

# Question 1

## **Linear Regression using SKLEARN Package**
"""

#Import Libraries and Modules

import pandas as pd
import numpy as np

import seaborn as sns
import matplotlib.pyplot as plt

from sklearn import preprocessing
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

#Loading the Boston Dataset

from sklearn.datasets import load_boston
boston = load_boston()

#Fields present along with the data

boston.keys()

#Description about the dataset and its attributes

print(boston.DESCR)

# Shape of the dataset

boston.data.shape

# Target Variable Shape

boston.target.shape

# Features present in the dataset

boston.feature_names

# File path of the dataset

boston.filename

# Loading data into pandas dataframe

df = pd.DataFrame(boston.data)

# Assigning the columns of the dataframe with features present in the dataset

df.columns = boston.feature_names

# Loading Target Data into the dataframe with column name "price"

df['price'] = boston.target

# Final Shape of the dataframe after loading target variable

df.shape

# Head of the dataframe

df.head()

# Random samples from the dataframe

df.sample(5)

# basic information about the dataframe

df.info()

# describing the dataframe

df.describe()

# Checking NULL values and total number of NULL values in each column

df.isnull().sum()

# Datatypes of the columns in the dataframe

df.dtypes

# Plotting correlation matrix

plt.figure(figsize=(12,12))
sns.heatmap(df.corr().round(2), annot = True, cmap = 'coolwarm')

# Distribution Plot and Pair Plot between each variables

sns.pairplot(df, diag_kind="kde")

# Breaking the data features into independent and dependent variables

df_train = df[df.columns.drop(['price']).tolist()]
df_target = df['price']

# Scaling the independent data variables

standard_scaler = preprocessing.StandardScaler()
X = standard_scaler.fit_transform(df_train)

# Splitting the dataset into Train and Test Variables

X_train, X_test, Y_train, Y_test = train_test_split(X, df_target, test_size = 0.2, random_state=3)

# Shapes of Train X, Y abd Test X, Y

X_train.shape, X_test.shape, Y_train.shape, Y_test.shape

# Initialize the Linear Regression Model from SKLEARN package and train the model

sklearn_model = LinearRegression().fit(X_train, Y_train)

# Predicting the Test dataset

y_predict = sklearn_model.predict(X_test)

# Finding Performance Metrics of Regression such as Root Mean Squared Erro(RMSE), r2, Mean Squared Erro(MSE)

Workshop Task 1 deep neural network 1. Recall Lecture 6. Run the...