# Upload .csv file in google colab
from google.colab import files

uploaded = files.upload()

Saving employee_churn_data.csv to employee_churn_data (3).csv

# Import all dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns

#  Import and read the csv into a DataFrame
import pandas as pd
employees_df = pd.read_csv("employee_churn_data.csv")
employees_df

# Check data types and null values
employees_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9540 entries, 0 to 9539
Data columns (total 10 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   department     9540 non-null   object 
 1   promoted       9540 non-null   int64  
 2   review         9540 non-null   float64
 3   projects       9540 non-null   int64  
 4   salary         9540 non-null   object 
 5   tenure         9540 non-null   float64
 6   satisfaction   9540 non-null   float64
 7   bonus          9540 non-null   int64  
 8   avg_hrs_month  9540 non-null   float64
 9   left           9540 non-null   object 
dtypes: float64(4), int64(3), object(3)
memory usage: 745.4+ KB

# Determine the number of unique values in each column
employees_df_unique = employees_df.nunique()
employees_df_unique

department         10
promoted            2
review           9540
projects            4
salary              3
tenure             11
satisfaction     9540
bonus               2
avg_hrs_month    9540
left                2
dtype: int64

# Check number of employees turnover
left_counts = employees_df['left'].value_counts()
left_counts.plot(kind='bar', color=['skyblue', 'orange'])

# Plot bar graph
plt.title('Count of Employees who Left the company')
plt.ylabel('Count')
plt.show()

# Convert categorical data to numeric with `pd.get_dummies`

# Add.astype(int) to switch boolean variables (True/False) to integers
employees_df_dummies = pd.get_dummies(employees_df).astype(int)
employees_df_dummies

# Dropping duplicate column(s)
employees_df_dropped = employees_df_dummies.drop(['left_no'], axis =1)
employees_df_dropped

# # Split our data into features(X) and target variable(y)
y = employees_df_dropped ["left_yes"].values
X = employees_df_dropped.drop(columns="left_yes").values

# Split our data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

X_train.shape

(7155, 20)

X_test.shape

(2385, 20)

# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Define the neural network model

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(tf.keras.layers.Dense(units=40, activation="relu", input_dim=len(X_train[0])))

# Second hidden layer
nn.add(tf.keras.layers.Dense(units=20, activation="relu"))

# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 dense_12 (Dense)            (None, 40)                840       
                                                                 
 dense_13 (Dense)            (None, 20)                820       
                                                                 
 dense_14 (Dense)            (None, 1)                 21        
                                                                 
=================================================================
Total params: 1681 (6.57 KB)
Trainable params: 1681 (6.57 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________

# Compile the model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# Train the model
fit_model = nn.fit(X_train_scaled, y_train, epochs=50)

Epoch 1/50
224/224 [==============================] - 1s 2ms/step - loss: 0.6188 - accuracy: 0.6904
Epoch 2/50
224/224 [==============================] - 0s 2ms/step - loss: 0.5971 - accuracy: 0.7078
Epoch 3/50
224/224 [==============================] - 0s 2ms/step - loss: 0.5899 - accuracy: 0.7079
Epoch 4/50
224/224 [==============================] - 0s 2ms/step - loss: 0.5834 - accuracy: 0.7079
Epoch 5/50
224/224 [==============================] - 0s 2ms/step - loss: 0.5779 - accuracy: 0.7089
Epoch 6/50
224/224 [==============================] - 0s 2ms/step - loss: 0.5716 - accuracy: 0.7093
Epoch 7/50
224/224 [==============================] - 0s 2ms/step - loss: 0.5661 - accuracy: 0.7096
Epoch 8/50
224/224 [==============================] - 0s 2ms/step - loss: 0.5610 - accuracy: 0.7115
Epoch 9/50
224/224 [==============================] - 0s 2ms/step - loss: 0.5563 - accuracy: 0.7143
Epoch 10/50
224/224 [==============================] - 0s 2ms/step - loss: 0.5526 - accuracy: 0.7174
Epoch 11/50
224/224 [==============================] - 0s 2ms/step - loss: 0.5488 - accuracy: 0.7206
Epoch 12/50
224/224 [==============================] - 0s 2ms/step - loss: 0.5445 - accuracy: 0.7258
Epoch 13/50
224/224 [==============================] - 0s 2ms/step - loss: 0.5431 - accuracy: 0.7266
Epoch 14/50
224/224 [==============================] - 0s 2ms/step - loss: 0.5403 - accuracy: 0.7245
Epoch 15/50
224/224 [==============================] - 0s 2ms/step - loss: 0.5378 - accuracy: 0.7298
Epoch 16/50
224/224 [==============================] - 0s 2ms/step - loss: 0.5372 - accuracy: 0.7318
Epoch 17/50
224/224 [==============================] - 1s 2ms/step - loss: 0.5349 - accuracy: 0.7315
Epoch 18/50
224/224 [==============================] - 1s 3ms/step - loss: 0.5345 - accuracy: 0.7332
Epoch 19/50
224/224 [==============================] - 1s 3ms/step - loss: 0.5336 - accuracy: 0.7329
Epoch 20/50
224/224 [==============================] - 1s 3ms/step - loss: 0.5313 - accuracy: 0.7321
Epoch 21/50
224/224 [==============================] - 1s 3ms/step - loss: 0.5302 - accuracy: 0.7331
Epoch 22/50
224/224 [==============================] - 0s 2ms/step - loss: 0.5299 - accuracy: 0.7349
Epoch 23/50
224/224 [==============================] - 0s 2ms/step - loss: 0.5282 - accuracy: 0.7384
Epoch 24/50
224/224 [==============================] - 0s 2ms/step - loss: 0.5277 - accuracy: 0.7365
Epoch 25/50
224/224 [==============================] - 0s 2ms/step - loss: 0.5273 - accuracy: 0.7379
Epoch 26/50
224/224 [==============================] - 0s 2ms/step - loss: 0.5270 - accuracy: 0.7372
Epoch 27/50
224/224 [==============================] - 0s 2ms/step - loss: 0.5255 - accuracy: 0.7395
Epoch 28/50
224/224 [==============================] - 0s 2ms/step - loss: 0.5250 - accuracy: 0.7365
Epoch 29/50
224/224 [==============================] - 0s 2ms/step - loss: 0.5249 - accuracy: 0.7388
Epoch 30/50
224/224 [==============================] - 0s 2ms/step - loss: 0.5243 - accuracy: 0.7389
Epoch 31/50
224/224 [==============================] - 0s 2ms/step - loss: 0.5249 - accuracy: 0.7389
Epoch 32/50
224/224 [==============================] - 0s 2ms/step - loss: 0.5237 - accuracy: 0.7396
Epoch 33/50
224/224 [==============================] - 0s 2ms/step - loss: 0.5229 - accuracy: 0.7407
Epoch 34/50
224/224 [==============================] - 0s 2ms/step - loss: 0.5228 - accuracy: 0.7409
Epoch 35/50
224/224 [==============================] - 0s 2ms/step - loss: 0.5231 - accuracy: 0.7417
Epoch 36/50
224/224 [==============================] - 0s 2ms/step - loss: 0.5226 - accuracy: 0.7402
Epoch 37/50
224/224 [==============================] - 0s 2ms/step - loss: 0.5212 - accuracy: 0.7409
Epoch 38/50
224/224 [==============================] - 0s 2ms/step - loss: 0.5213 - accuracy: 0.7398
Epoch 39/50
224/224 [==============================] - 0s 2ms/step - loss: 0.5206 - accuracy: 0.7426
Epoch 40/50
224/224 [==============================] - 0s 2ms/step - loss: 0.5203 - accuracy: 0.7430
Epoch 41/50
224/224 [==============================] - 0s 2ms/step - loss: 0.5202 - accuracy: 0.7435
Epoch 42/50
224/224 [==============================] - 0s 2ms/step - loss: 0.5199 - accuracy: 0.7420
Epoch 43/50
224/224 [==============================] - 0s 2ms/step - loss: 0.5197 - accuracy: 0.7428
Epoch 44/50
224/224 [==============================] - 0s 2ms/step - loss: 0.5195 - accuracy: 0.7448
Epoch 45/50
224/224 [==============================] - 0s 2ms/step - loss: 0.5185 - accuracy: 0.7441
Epoch 46/50
224/224 [==============================] - 1s 2ms/step - loss: 0.5183 - accuracy: 0.7417
Epoch 47/50
224/224 [==============================] - 1s 3ms/step - loss: 0.5181 - accuracy: 0.7434
Epoch 48/50
224/224 [==============================] - 1s 2ms/step - loss: 0.5180 - accuracy: 0.7458
Epoch 49/50
224/224 [==============================] - 1s 2ms/step - loss: 0.5179 - accuracy: 0.7452
Epoch 50/50
224/224 [==============================] - 1s 3ms/step - loss: 0.5176 - accuracy: 0.7426

# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

75/75 - 0s - loss: 0.5599 - accuracy: 0.7174 - 212ms/epoch - 3ms/step
Loss: 0.5598913431167603, Accuracy: 0.7174004316329956

# Create a new DataFrame
history_df = pd.DataFrame(fit_model.history)

# Plot the accuracy
history_df.plot(y="accuracy", color="orangered")
plt.ylabel('Accuracy')
plt.xlabel('Epochs')
plt.show()

# Convert categorical features to numerical (only run once! otherwise clear all outputs and rerun)

# Assign a numerical value to each categorical feature instead
department_encoding = {'sales': 0, 'retail': 1, 'operations': 2, 'engineering': 3, 'marketing': 4, 'support': 5, 'admin': 6, 'finance': 7, 'logistics': 8, 'IT': 9}
salary_encoding = {'low': 0, 'medium': 1, 'high': 2}
left_encoding = {'no': 0, 'yes': 1}

# Encoded
employees_df['department_encoded'] = employees_df['department'].map(department_encoding).astype(int)
employees_df['salary_encoded'] = employees_df['salary'].map(salary_encoding).astype(int)
employees_df['left_encoded'] = employees_df['left'].map(left_encoding).astype(int)

# Drop the original categorical columns
employees_df.drop(['department', 'salary', 'left'], axis=1, inplace=True)

# Keep only numeric columns
numeric_df = employees_df.select_dtypes(include=['float64', 'int64'])

# Display correlation matrix
correlation_matrix = numeric_df.corr()

plt.figure(figsize=(8, 6))
sns.heatmap(correlation_matrix, annot=True, cmap='crest', fmt=".2f", linewidths=0.5)
plt.title('Correlation Matrix')
plt.show()

#1.'left' and 'review' have a moderately positive correlation:
# means there is some tendency for the number of employees who left to increase as the reviews become more positive, possibly for better opportunities.
g = sns.FacetGrid(numeric_df, col='left_encoded')
g.map(plt.hist, 'review', bins=20)

<seaborn.axisgrid.FacetGrid at 0x7a3070976980>

#2. 'left' and 'promoted'/'projects'/'satisfaction'/'bonus' have negative correlations:
# means changes in any one of these factors are not largely associated with consistent changes in the number of employees who left the company.
columns_to_plot = ['promoted', 'projects', 'satisfaction',	'bonus']

# Create loop to go through 4 feature columns
for column in columns_to_plot:
    g = sns.FacetGrid(numeric_df, col='left_encoded')
    g.map(plt.hist, column, bins=10)

plt.show()

#3. 'tenure' and 'avg_hrs_month' have strongly positive correlation:
# means that employees who have both left or stayed with the company for a longer time, between 4.0-9.0 years, tend to work more hours per month on average, or vice versa.
# Keep these columns in our data.

g = sns.FacetGrid(numeric_df, col='left_encoded', hue='tenure')
g.map(plt.hist, 'avg_hrs_month', bins=10)
g.add_legend()

plt.show()

#4. 'department' and 'salary' have no correlation to 'left':
# means neither the department in which an employee works nor their salary level has a significant impact on whether they left the company. Let's consider dropping these two columns.

g = sns.FacetGrid(numeric_df, col='left_encoded', hue='salary_encoded')
g.map(plt.hist, 'department_encoded', bins=20)
g.add_legend()

plt.show()

# Dropping non-essential columns
employees_df_cleaned = numeric_df.drop(['department_encoded', 'salary_encoded'], axis =1)
employees_df_cleaned

# Split our data into features(X) and target variable(y)
y = employees_df_cleaned ["left_encoded"].values
X = employees_df_cleaned.drop(columns="left_encoded").values

# Split our data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

X_train.shape

(7155, 7)

X_test.shape

(2385, 7)

# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Define the neural network model

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(tf.keras.layers.Dense(units=14, activation="relu", input_dim=len(X_train[0])))

# Second hidden layer
nn.add(tf.keras.layers.Dense(units=7, activation="relu"))

# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn.summary()

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 dense_15 (Dense)            (None, 14)                112       
                                                                 
 dense_16 (Dense)            (None, 7)                 105       
                                                                 
 dense_17 (Dense)            (None, 1)                 8         
                                                                 
=================================================================
Total params: 225 (900.00 Byte)
Trainable params: 225 (900.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________

# Compile the model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# Train the model
fit_model = nn.fit(X_train_scaled, y_train, epochs=50)

Epoch 1/50
224/224 [==============================] - 2s 3ms/step - loss: 0.5741 - accuracy: 0.7400
Epoch 2/50
224/224 [==============================] - 0s 2ms/step - loss: 0.4801 - accuracy: 0.7553
Epoch 3/50
224/224 [==============================] - 0s 2ms/step - loss: 0.4483 - accuracy: 0.7530
Epoch 4/50
224/224 [==============================] - 0s 2ms/step - loss: 0.4358 - accuracy: 0.7711
Epoch 5/50
224/224 [==============================] - 0s 2ms/step - loss: 0.4277 - accuracy: 0.7955
Epoch 6/50
224/224 [==============================] - 0s 2ms/step - loss: 0.4169 - accuracy: 0.8078
Epoch 7/50
224/224 [==============================] - 0s 2ms/step - loss: 0.4091 - accuracy: 0.8189
Epoch 8/50
224/224 [==============================] - 1s 2ms/step - loss: 0.3988 - accuracy: 0.8280
Epoch 9/50
224/224 [==============================] - 1s 5ms/step - loss: 0.3877 - accuracy: 0.8377
Epoch 10/50
224/224 [==============================] - 1s 5ms/step - loss: 0.3775 - accuracy: 0.8404
Epoch 11/50
224/224 [==============================] - 1s 5ms/step - loss: 0.3698 - accuracy: 0.8451
Epoch 12/50
224/224 [==============================] - 1s 5ms/step - loss: 0.3626 - accuracy: 0.8460
Epoch 13/50
224/224 [==============================] - 1s 5ms/step - loss: 0.3550 - accuracy: 0.8498
Epoch 14/50
224/224 [==============================] - 1s 3ms/step - loss: 0.3484 - accuracy: 0.8510
Epoch 15/50
224/224 [==============================] - 0s 2ms/step - loss: 0.3435 - accuracy: 0.8532
Epoch 16/50
224/224 [==============================] - 1s 3ms/step - loss: 0.3385 - accuracy: 0.8534
Epoch 17/50
224/224 [==============================] - 1s 3ms/step - loss: 0.3350 - accuracy: 0.8559
Epoch 18/50
224/224 [==============================] - 1s 3ms/step - loss: 0.3319 - accuracy: 0.8569
Epoch 19/50
224/224 [==============================] - 1s 3ms/step - loss: 0.3287 - accuracy: 0.8605
Epoch 20/50
224/224 [==============================] - 1s 3ms/step - loss: 0.3257 - accuracy: 0.8625
Epoch 21/50
224/224 [==============================] - 1s 3ms/step - loss: 0.3236 - accuracy: 0.8621
Epoch 22/50
224/224 [==============================] - 0s 2ms/step - loss: 0.3210 - accuracy: 0.8622
Epoch 23/50
224/224 [==============================] - 0s 2ms/step - loss: 0.3191 - accuracy: 0.8637
Epoch 24/50
224/224 [==============================] - 0s 2ms/step - loss: 0.3180 - accuracy: 0.8648
Epoch 25/50
224/224 [==============================] - 0s 2ms/step - loss: 0.3154 - accuracy: 0.8653
Epoch 26/50
224/224 [==============================] - 0s 2ms/step - loss: 0.3139 - accuracy: 0.8668
Epoch 27/50
224/224 [==============================] - 0s 2ms/step - loss: 0.3120 - accuracy: 0.8661
Epoch 28/50
224/224 [==============================] - 1s 5ms/step - loss: 0.3107 - accuracy: 0.8654
Epoch 29/50
224/224 [==============================] - 1s 3ms/step - loss: 0.3088 - accuracy: 0.8657
Epoch 30/50
224/224 [==============================] - 1s 4ms/step - loss: 0.3077 - accuracy: 0.8672
Epoch 31/50
224/224 [==============================] - 0s 2ms/step - loss: 0.3064 - accuracy: 0.8704
Epoch 32/50
224/224 [==============================] - 0s 2ms/step - loss: 0.3047 - accuracy: 0.8693
Epoch 33/50
224/224 [==============================] - 0s 2ms/step - loss: 0.3030 - accuracy: 0.8710
Epoch 34/50
224/224 [==============================] - 0s 2ms/step - loss: 0.3030 - accuracy: 0.8692
Epoch 35/50
224/224 [==============================] - 0s 2ms/step - loss: 0.3020 - accuracy: 0.8690
Epoch 36/50
224/224 [==============================] - 0s 2ms/step - loss: 0.3005 - accuracy: 0.8703
Epoch 37/50
224/224 [==============================] - 0s 2ms/step - loss: 0.2992 - accuracy: 0.8710
Epoch 38/50
224/224 [==============================] - 0s 2ms/step - loss: 0.2994 - accuracy: 0.8721
Epoch 39/50
224/224 [==============================] - 0s 2ms/step - loss: 0.2979 - accuracy: 0.8720
Epoch 40/50
224/224 [==============================] - 0s 2ms/step - loss: 0.2969 - accuracy: 0.8723
Epoch 41/50
224/224 [==============================] - 0s 2ms/step - loss: 0.2961 - accuracy: 0.8741
Epoch 42/50
224/224 [==============================] - 1s 3ms/step - loss: 0.2959 - accuracy: 0.8727
Epoch 43/50
224/224 [==============================] - 1s 3ms/step - loss: 0.2943 - accuracy: 0.8737
Epoch 44/50
224/224 [==============================] - 1s 3ms/step - loss: 0.2940 - accuracy: 0.8734
Epoch 45/50
224/224 [==============================] - 1s 3ms/step - loss: 0.2936 - accuracy: 0.8734
Epoch 46/50
224/224 [==============================] - 1s 3ms/step - loss: 0.2931 - accuracy: 0.8737
Epoch 47/50
224/224 [==============================] - 1s 3ms/step - loss: 0.2920 - accuracy: 0.8728
Epoch 48/50
224/224 [==============================] - 0s 2ms/step - loss: 0.2923 - accuracy: 0.8760
Epoch 49/50
224/224 [==============================] - 0s 2ms/step - loss: 0.2912 - accuracy: 0.8753
Epoch 50/50
224/224 [==============================] - 0s 2ms/step - loss: 0.2907 - accuracy: 0.8762

# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

75/75 - 0s - loss: 0.2956 - accuracy: 0.8679 - 218ms/epoch - 3ms/step
Loss: 0.29557913541793823, Accuracy: 0.8679245114326477

# Create a new DataFrame
history_df = pd.DataFrame(fit_model.history)

# Plot the accuracy
history_df.plot(y="accuracy", color="blue")
plt.ylabel('Accuracy')
plt.xlabel('Epochs')
plt.show()

	department	promoted	review	projects	salary	tenure	satisfaction	bonus	avg_hrs_month	left
0	operations	0	0.577569	3	low	5.0	0.626759	0	180.866070	no
1	operations	0	0.751900	3	medium	6.0	0.443679	0	182.708149	no
2	support	0	0.722548	3	medium	6.0	0.446823	0	184.416084	no
3	logistics	0	0.675158	4	high	8.0	0.440139	0	188.707545	no
4	sales	0	0.676203	3	high	5.0	0.577607	1	179.821083	no
...	...	...	...	...	...	...	...	...	...	...
9535	operations	0	0.610988	4	medium	8.0	0.543641	0	188.155738	yes
9536	logistics	0	0.746887	3	medium	8.0	0.549048	0	188.176164	yes
9537	operations	0	0.557980	3	low	7.0	0.705425	0	186.531008	yes
9538	IT	0	0.584446	4	medium	8.0	0.607287	1	187.641370	yes
9539	finance	0	0.626373	3	low	7.0	0.706455	1	185.920934	yes

	promoted	review	projects	tenure	satisfaction	bonus	avg_hrs_month	department_IT	department_admin	department_engineering	...	department_marketing	department_operations	department_retail	department_sales	department_support	salary_high	salary_low	salary_medium	left_no	left_yes
0	0	0	3	5	0	0	180	0	0	0	...	0	1	0	0	0	0	1	0	1	0
1	0	0	3	6	0	0	182	0	0	0	...	0	1	0	0	0	0	0	1	1	0
2	0	0	3	6	0	0	184	0	0	0	...	0	0	0	0	1	0	0	1	1	0
3	0	0	4	8	0	0	188	0	0	0	...	0	0	0	0	0	1	0	0	1	0
4	0	0	3	5	0	1	179	0	0	0	...	0	0	0	1	0	1	0	0	1	0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
9535	0	0	4	8	0	0	188	0	0	0	...	0	1	0	0	0	0	0	1	0	1
9536	0	0	3	8	0	0	188	0	0	0	...	0	0	0	0	0	0	0	1	0	1
9537	0	0	3	7	0	0	186	0	0	0	...	0	1	0	0	0	0	1	0	0	1
9538	0	0	4	8	0	1	187	1	0	0	...	0	0	0	0	0	0	0	1	0	1
9539	0	0	3	7	0	1	185	0	0	0	...	0	0	0	0	0	0	1	0	0	1

	promoted	review	projects	tenure	satisfaction	bonus	avg_hrs_month	department_IT	department_admin	department_engineering	...	department_logistics	department_marketing	department_operations	department_retail	department_sales	department_support	salary_high	salary_low	salary_medium	left_yes
0	0	0	3	5	0	0	180	0	0	0	...	0	0	1	0	0	0	0	1	0	0
1	0	0	3	6	0	0	182	0	0	0	...	0	0	1	0	0	0	0	0	1	0
2	0	0	3	6	0	0	184	0	0	0	...	0	0	0	0	0	1	0	0	1	0
3	0	0	4	8	0	0	188	0	0	0	...	1	0	0	0	0	0	1	0	0	0
4	0	0	3	5	0	1	179	0	0	0	...	0	0	0	0	1	0	1	0	0	0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
9535	0	0	4	8	0	0	188	0	0	0	...	0	0	1	0	0	0	0	0	1	1
9536	0	0	3	8	0	0	188	0	0	0	...	1	0	0	0	0	0	0	0	1	1
9537	0	0	3	7	0	0	186	0	0	0	...	0	0	1	0	0	0	0	1	0	1
9538	0	0	4	8	0	1	187	1	0	0	...	0	0	0	0	0	0	0	0	1	1
9539	0	0	3	7	0	1	185	0	0	0	...	0	0	0	0	0	0	0	1	0	1

	promoted	review	projects	tenure	satisfaction	bonus	avg_hrs_month	left_encoded
0	0	0.577569	3	5.0	0.626759	0	180.866070	0
1	0	0.751900	3	6.0	0.443679	0	182.708149	0
2	0	0.722548	3	6.0	0.446823	0	184.416084	0
3	0	0.675158	4	8.0	0.440139	0	188.707545	0
4	0	0.676203	3	5.0	0.577607	1	179.821083	0
...	...	...	...	...	...	...	...	...
9535	0	0.610988	4	8.0	0.543641	0	188.155738	1
9536	0	0.746887	3	8.0	0.549048	0	188.176164	1
9537	0	0.557980	3	7.0	0.705425	0	186.531008	1
9538	0	0.584446	4	8.0	0.607287	1	187.641370	1
9539	0	0.626373	3	7.0	0.706455	1	185.920934	1

> `Model Initialization`¶

> `Model Optimization`¶

> Model Initialization¶

> Model Optimization¶

> `Model Initialization`¶

> `Model Optimization`¶