pastebin

Paste Search Dynamic
Recent pastes
random forest regressor
  1. import numpy as np
  2.  
  3. def random_forest_regressor(X_train, y_train, X_test, n_trees, max_depth):
  4.     y_preds = []
  5.     for i in range(n_trees):
  6.         # Bootstrap sample
  7.         indices = np.random.choice(X_train.shape[0], X_train.shape[0], replace=true)
  8.         X_bootstrap = X_train[indices]
  9.         y_bootstrap = y_train[indices]
  10.         # Build decision tree
  11.         tree = build_decision_tree(X_bootstrap, y_bootstrap, max_depth)
  12.         # Predict on test set
  13.         y_pred = predict(tree, X_test)
  14.         y_preds.append(y_pred)
  15.     # Average predictions from all trees
  16.     return np.mean(y_preds, axis=0)
  17.  
  18. def build_decision_tree(X, y, max_depth):
  19.     # Check if we should stop splitting
  20.     if len(y) == 1 or max_depth == 0:
  21.         return y[0]
  22.     # Find best split
  23.     best_feature = none
  24.     best_threshold = none
  25.     best_entropy = np.inf
  26.     for feature in range(X.shape[1]):
  27.         for threshold in np.unique(X[:, feature]):
  28.             left_indices = X[:, feature] < threshold
  29.             right_indices = X[:, feature] >= threshold
  30.             if len(y[left_indices]) == 0 or len(y[right_indices]) == 0:
  31.                 continue
  32.             entropy = compute_entropy(y[left_indices], y[right_indices])
  33.             if entropy < best_entropy:
  34.                 best_feature = feature
  35.                 best_threshold = threshold
  36.                 best_entropy = entropy
  37.     # Split data and recursively build subtrees
  38.     left_indices = X[:, best_feature] < best_threshold
  39.     right_indices = X[:, best_feature] >= best_threshold
  40.     left_tree = build_decision_tree(X[left_indices], y[left_indices], max_depth-1)
  41.     right_tree = build_decision_tree(X[right_indices], y[right_indices], max_depth-1)
  42.     # Return decision node
  43.     return DecisionNode(best_feature, best_threshold, left_tree, right_tree)
  44.  
  45. def predict(tree, X_test):
  46.     y_pred = []
  47.     for x in X_test:
  48.         node = tree
  49.         while isinstance(node, DecisionNode):
  50.             if x[node.feature] < node.threshold:
  51.                 node = node.left
  52.             else:
  53.                 node = node.right
  54.         y_pred.append(node)
  55.     return np.array(y_pred)
  56. def compute_entropy(y_left, y_right):
  57.     # Compute entropy of left and right subsets
  58.     p_left = len(y_left) / (len(y_left) + len(y_right))
  59.     p_right = 1 - p_left
  60.     entropy_left = p_left * np.log2(np.var(y_left))
  61.     entropy_right = p_right * np.log2(np.var(y_right))
  62.     print("entropy_left: ", entropy_left)
  63.     print("entropy_right: ", entropy_right)
  64.     return - (entropy_left + entropy_right)
  65.  
  66.  
  67. class DecisionNode:
  68.     def __init__(self, feature, threshold, left_tree, right_tree):
  69.         self.feature = feature
  70.         self.threshold = threshold
  71.         self.left_tree = left_tree
  72.         self.right_tree = right_tree
  73.  
Parsed in 0.061 seconds