import numpy as np
def random_forest_regressor(X_train, y_train, X_test, n_trees, max_depth):
y_preds = []
for i in range(n_trees):
# Bootstrap sample
indices = np.random.choice(X_train.shape[0], X_train.shape[0], replace=true)
X_bootstrap = X_train[indices]
y_bootstrap = y_train[indices]
# Build decision tree
tree = build_decision_tree(X_bootstrap, y_bootstrap, max_depth)
# Predict on test set
y_pred = predict(tree, X_test)
y_preds.append(y_pred)
# Average predictions from all trees
return np.mean(y_preds, axis=0)
def build_decision_tree(X, y, max_depth):
# Check if we should stop splitting
if len(y) == 1 or max_depth == 0:
return y[0]
# Find best split
best_feature = none
best_threshold = none
best_entropy = np.inf
for feature in range(X.shape[1]):
for threshold in np.unique(X[:, feature]):
left_indices = X[:, feature] < threshold
right_indices = X[:, feature] >= threshold
if len(y[left_indices]) == 0 or len(y[right_indices]) == 0:
continue
entropy = compute_entropy(y[left_indices], y[right_indices])
if entropy < best_entropy:
best_feature = feature
best_threshold = threshold
best_entropy = entropy
# Split data and recursively build subtrees
left_indices = X[:, best_feature] < best_threshold
right_indices = X[:, best_feature] >= best_threshold
left_tree = build_decision_tree(X[left_indices], y[left_indices], max_depth-1)
right_tree = build_decision_tree(X[right_indices], y[right_indices], max_depth-1)
# Return decision node
return DecisionNode(best_feature, best_threshold, left_tree, right_tree)
def predict(tree, X_test):
y_pred = []
for x in X_test:
node = tree
while isinstance(node, DecisionNode):
if x[node.feature] < node.threshold:
node = node.left
else:
node = node.right
y_pred.append(node)
return np.array(y_pred)
def compute_entropy(y_left, y_right):
# Compute entropy of left and right subsets
p_left = len(y_left) / (len(y_left) + len(y_right))
p_right = 1 - p_left
entropy_left = p_left * np.log2(np.var(y_left))
entropy_right = p_right * np.log2(np.var(y_right))
print("entropy_left: ", entropy_left)
print("entropy_right: ", entropy_right)
return - (entropy_left + entropy_right)
class DecisionNode:
def __init__(self, feature, threshold, left_tree, right_tree):
self.feature = feature
self.threshold = threshold
self.left_tree = left_tree
self.right_tree = right_tree