# pastebin

Paste Search Dynamic
Recent pastes
random forest regressor
1. import numpy as np
2.
3. def random_forest_regressor(X_train, y_train, X_test, n_trees, max_depth):
4.     y_preds = []
5.     for i in range(n_trees):
6.         # Bootstrap sample
7.         indices = np.random.choice(X_train.shape, X_train.shape, replace=true)
8.         X_bootstrap = X_train[indices]
9.         y_bootstrap = y_train[indices]
10.         # Build decision tree
11.         tree = build_decision_tree(X_bootstrap, y_bootstrap, max_depth)
12.         # Predict on test set
13.         y_pred = predict(tree, X_test)
14.         y_preds.append(y_pred)
15.     # Average predictions from all trees
16.     return np.mean(y_preds, axis=0)
17.
18. def build_decision_tree(X, y, max_depth):
19.     # Check if we should stop splitting
20.     if len(y) == 1 or max_depth == 0:
21.         return y
22.     # Find best split
23.     best_feature = none
24.     best_threshold = none
25.     best_entropy = np.inf
26.     for feature in range(X.shape):
27.         for threshold in np.unique(X[:, feature]):
28.             left_indices = X[:, feature] < threshold
29.             right_indices = X[:, feature] >= threshold
30.             if len(y[left_indices]) == 0 or len(y[right_indices]) == 0:
31.                 continue
32.             entropy = compute_entropy(y[left_indices], y[right_indices])
33.             if entropy < best_entropy:
34.                 best_feature = feature
35.                 best_threshold = threshold
36.                 best_entropy = entropy
37.     # Split data and recursively build subtrees
38.     left_indices = X[:, best_feature] < best_threshold
39.     right_indices = X[:, best_feature] >= best_threshold
40.     left_tree = build_decision_tree(X[left_indices], y[left_indices], max_depth-1)
41.     right_tree = build_decision_tree(X[right_indices], y[right_indices], max_depth-1)
42.     # Return decision node
43.     return DecisionNode(best_feature, best_threshold, left_tree, right_tree)
44.
45. def predict(tree, X_test):
46.     y_pred = []
47.     for x in X_test:
48.         node = tree
49.         while isinstance(node, DecisionNode):
50.             if x[node.feature] < node.threshold:
51.                 node = node.left
52.             else:
53.                 node = node.right
54.         y_pred.append(node)
55.     return np.array(y_pred)
56. def compute_entropy(y_left, y_right):
57.     # Compute entropy of left and right subsets
58.     p_left = len(y_left) / (len(y_left) + len(y_right))
59.     p_right = 1 - p_left
60.     entropy_left = p_left * np.log2(np.var(y_left))
61.     entropy_right = p_right * np.log2(np.var(y_right))
62.     print("entropy_left: ", entropy_left)
63.     print("entropy_right: ", entropy_right)
64.     return - (entropy_left + entropy_right)
65.
66.
67. class DecisionNode:
68.     def __init__(self, feature, threshold, left_tree, right_tree):
69.         self.feature = feature
70.         self.threshold = threshold
71.         self.left_tree = left_tree
72.         self.right_tree = right_tree
73.
Parsed in 0.061 seconds