You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

134 lines
4.9 KiB

5 months ago
import numpy as np
import matplotlib.pyplot as plt
def generate_dataset(func_type, coeff, n_samples=100, noise_level=0.0):
"""
Generate a dataset based on a mathematical function.
Parameters:
func_type (str): Type of function ('linear', 'quadratic', 'cubic', 'quartic', 'exponential', 'logarithmic').
coeff (list): Coefficients of the function.
n_samples (int): Number of samples to generate.
noise_level (float): Standard deviation of Gaussian noise added to the data.
Returns:
np.ndarray: Generated dataset.
"""
# Generate x values
x_values = np.linspace(-1000, 1000, n_samples)
# Define the functions
functions = {
'linear': lambda x: coeff[0] + coeff[1] * x,
'quadratic': lambda x: coeff[0] + coeff[1] * x + coeff[2] * x**2,
'cubic': lambda x: coeff[0] + coeff[1] * x + coeff[2] * x**2 + coeff[3] * x**3,
'quartic': lambda x: coeff[0] + coeff[1] * x + coeff[2] * x**2 + coeff[3] * x**3 + coeff[4] * x**4,
'exponential': lambda x: coeff[0] * np.exp(coeff[1] * x),
'logarithmic': lambda x: coeff[0] + coeff[1] * np.log(np.abs(x) + 1) # Avoid log(0)
}
# Generate y values based on the selected function
y_values = functions[func_type](x_values)
# Add noise
noise = np.random.normal(0, noise_level, n_samples)
y_values_noisy = y_values + noise
# Combine x and y values
dataset = np.vstack((x_values, y_values_noisy)).T
return dataset
def plot_dataset(dataset, func_type, coeff):
"""
Plot the generated dataset.
Parameters:
dataset (np.ndarray): Generated dataset.
func_type (str): Type of function used to generate the dataset.
coeff (list): Coefficients of the function.
"""
x_values = dataset[:, 0]
y_values = dataset[:, 1]
# Plotting the dataset
plt.figure(figsize=(10, 6))
plt.scatter(x_values, y_values, color='blue', label='Generated Data')
# Plot the original function without noise for comparison
if func_type == 'linear':
y_original = coeff[0] + coeff[1] * x_values
elif func_type == 'quadratic':
y_original = coeff[0] + coeff[1] * x_values + coeff[2] * x_values**2
elif func_type == 'cubic':
y_original = coeff[0] + coeff[1] * x_values + coeff[2] * x_values**2 + coeff[3] * x_values**3
elif func_type == 'quartic':
y_original = coeff[0] + coeff[1] * x_values + coeff[2] * x_values**2 + coeff[3] * x_values**3 + coeff[4] * x_values**4
elif func_type == 'exponential':
y_original = coeff[0] * np.exp(coeff[1] * x_values)
elif func_type == 'logarithmic':
y_original = coeff[0] + coeff[1] * np.log(np.abs(x_values) + 1)
plt.plot(x_values, y_original, color='red', label='Original Function')
plt.title(f'{func_type.capitalize()} Function Dataset Visualization')
plt.xlabel('X Values')
plt.ylabel('Y Values')
plt.legend()
plt.grid(True)
plt.show()
def generate_dataset2(func_type, coeff, x_range=(-1000, 1000), n_samples=100, noise_level=0.0):
"""
Generate a dataset based on a mathematical function with a specified x range.
Parameters:
func_type (str): Type of function ('linear', 'quadratic', 'cubic', 'quartic', 'exponential', 'logarithmic').
coeff (list): Coefficients of the function.
x_range (tuple): The range of x values (min, max).
n_samples (int): Number of samples to generate.
noise_level (float): Standard deviation of Gaussian noise added to the data.
Returns:
np.ndarray: Generated dataset.
"""
# Generate x values within the specified range
x_values = np.linspace(x_range[0], x_range[1], n_samples)
# Define the functions
functions = {
'linear': lambda x: coeff[0] + coeff[1] * x,
'quadratic': lambda x: coeff[0] + coeff[1] * x + coeff[2] * x**2,
'cubic': lambda x: coeff[0] + coeff[1] * x + coeff[2] * x**2 + coeff[3] * x**3,
'quartic': lambda x: coeff[0] + coeff[1] * x + coeff[2] * x**2 + coeff[3] * x**3 + coeff[4] * x**4,
'exponential': lambda x: coeff[0] * np.exp(coeff[1] * x),
'logarithmic': lambda x: coeff[0] + coeff[1] * np.log(np.abs(x) + 1) # Avoid log(0)
}
# Generate y values based on the selected function
y_values = functions[func_type](x_values)
# Clip y values to stay within the specified range
y_values = np.clip(y_values, x_range[0], x_range[1])
# Add noise
noise = np.random.normal(0, noise_level, n_samples)
y_values_noisy = y_values + noise
# Clip noisy y values to stay within the specified range
y_values_noisy = np.clip(y_values_noisy, x_range[0], x_range[1])
# Combine x and y values
dataset = np.vstack((x_values, y_values_noisy)).T
return dataset
# Example usage
dataset = generate_dataset('quadratic', [1, 2, 3], n_samples=10, noise_level=10)
print(dataset)
# Visualize the dataset
dataset = generate_dataset('quadratic', [1, 2, 3], n_samples=10, noise_level=10)
plot_dataset(dataset, 'quadratic', [1, 2, 3])