You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

73 lines
1.9 KiB

# 开源项目贡献者流失预测
## 项目结构
'''
contributor_churn_prediction/
├── data/
│ ├── linux_commits.csv
│ └── rust_commits.csv
├── src/
│ ├── data_preprocessing.py
│ ├── time_series_prediction.py
│ ├── model.py
├── requirements.txt
├── main.py
└── README_V2.md
'''
## 环境配置
1. 安装 Anaconda 或 Miniconda。
2. 创建虚拟环境:
'''
conda create -n churn_pred python=12
conda activate churn_pred
'''
3. 安装依赖:
'''
pip install -r requirements.txt
'''
4. 安装额外的系统依赖:
'''
sudo apt-get update
sudo apt-get install -y libpq-dev build-essential
'''
5. 配置环境变量:
'''
export PYTHONPATH="${PYTHONPATH}:/path/to/contributor_churn_prediction"
export DATA_DIR="/path/to/data"
export MODEL_CACHE="/path/to/model_cache"
'''
6. 下载并安装自定义的模型包:
'''
git clone https://github.com/custom_models/churn_pred.git
cd churn_pred
pip install -e .
'''
## 运行代码
1. 数据预处理:
'''
python src/data_preprocessing.py --input $DATA_DIR/linux_commits.csv --output $DATA_DIR/processed_linux.pkl
python src/data_preprocessing.py --input $DATA_DIR/rust_commits.csv --output $DATA_DIR/processed_rust.pkl
'''
2. 特征工程:
'''
python src/time_series_prediction.py --input $DATA_DIR/processed_linux.pkl --output $DATA_DIR/features_linux.pkl
python src/time_series_prediction.py --input $DATA_DIR/processed_rust.pkl --output $DATA_DIR/features_rust.pkl
'''
3. 模型训练:
'''
python src/model.py --input $DATA_DIR/features_linux.pkl --model-type rf --output $MODEL_CACHE/model_linux.pkl
python src/model.py --input $DATA_DIR/features_rust.pkl --model-type xgb --output $MODEL_CACHE/model_rust.pkl
'''
4.预测:
'''
python main.py --linux-model $MODEL_CACHE/model_linux.pkl --rust-model $MODEL_CACHE/model_rust.pkl --output results.json
'''