Skip to content

Commit c57c34b

Browse files
Add files via upload
1 parent a1527ba commit c57c34b

File tree

2 files changed

+53981
-0
lines changed

2 files changed

+53981
-0
lines changed
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
import pandas as pd
2+
import seaborn as sns
3+
import matplotlib.pyplot as plt
4+
from sklearn.model_selection import train_test_split
5+
from sklearn.linear_model import LinearRegression
6+
from sklearn.metrics import mean_squared_error, r2_score
7+
8+
diamonds = pd.read_csv('diamonds.csv')
9+
10+
print(diamonds.head())
11+
print(diamonds.info())
12+
print(diamonds.describe())
13+
14+
diamonds = diamonds.dropna()
15+
16+
sns.scatterplot(x='carat', y='price', data=diamonds)
17+
plt.title('Carat vs Price')
18+
plt.show()
19+
20+
# Select only numeric columns for correlation analysis
21+
numeric_columns = diamonds.select_dtypes(include=['number']).columns
22+
correlation_matrix = diamonds[numeric_columns].corr()
23+
24+
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm')
25+
plt.title('Correlation Matrix')
26+
plt.show()
27+
28+
X = diamonds[['carat', 'depth', 'table', 'x', 'y', 'z']] # Exclude non-numeric columns
29+
y = diamonds['price']
30+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
31+
32+
model = LinearRegression()
33+
model.fit(X_train, y_train)
34+
35+
y_pred = model.predict(X_test)
36+
mse = mean_squared_error(y_test, y_pred)
37+
r2 = r2_score(y_test, y_pred)
38+
39+
print(f'Mean Squared Error: {mse}')
40+
print(f'R-squared: {r2}')

0 commit comments

Comments
 (0)