From a2b4b0dd047f7b15b2b69fb7bf41d4e2cb185be7 Mon Sep 17 00:00:00 2001
From: paxflsu4r <198028451@qq.com>
Date: Mon, 20 Jan 2025 17:15:36 +0800
Subject: [PATCH] ADD file via upload

---
 my code.py | 261 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 261 insertions(+)
 create mode 100644 my code.py

diff --git a/my code.py b/my code.py
new file mode 100644
index 0000000..20bc978
--- /dev/null
+++ b/my code.py	
@@ -0,0 +1,261 @@
+import jsonimport jsonimport json
+
+def combine_jsonl(file1, file2, output_file):
+    combined_data = []
+
+    # 读取第一个jsonl文件
+    with open(file1, 'r', encoding='utf-8') as f1:
+        for line in f1:
+            combined_data.append(json.loads(line))
+
+    # 读取第二个jsonl文件
+    with open(file2, 'r', encoding='utf-8') as f2:
+        for line in f2:
+            combined_data.append(json.loads(line))
+
+    # 将合并的数据写入新的jsonl文件
+    with open(output_file, 'w', encoding='utf-8') as out:
+        for entry in combined_data:
+            out.write(json.dumps(entry, ensure_ascii=False) + '\n')
+
+# 示例使用
+combine_jsonl('4.jsonl', 'C:\\Users\\LENOVO\\Documents\\WeChat Files\\wxid_3kv9umjbvkr822\\FileStorage\\File\\2024-10\\3mbpp_test1.jsonl', '5.jsonl')
+
+def combine_jsonl(file1, file2, output_file):
+    combined_data = []
+
+    # 读取第一个jsonl文件
+    with open(file1, 'r', encoding='utf-8') as f1:
+        for line in f1:
+            combined_data.append(json.loads(line))
+
+    # 读取第二个jsonl文件
+    with open(file2, 'r', encoding='utf-8') as f2:
+        for line in f2:
+            combined_data.append(json.loads(line))
+
+    # 将合并的数据写入新的jsonl文件
+    with open(output_file, 'w', encoding='utf-8') as out:
+        for entry in combined_data:
+            out.write(json.dumps(entry, ensure_ascii=False) + '\n')
+
+# 示例使用
+combine_jsonl('4.jsonl', 'C:\\Users\\LENOVO\\Documents\\WeChat Files\\wxid_3kv9umjbvkr822\\FileStorage\\File\\2024-10\\3mbpp_test1.jsonl', '5.jsonl')
+
+
+def combine_jsonl(file1, file2, output_file):
+    combined_data = []
+
+    # 读取第一个jsonl文件
+    with open(file1, 'r', encoding='utf-8') as f1:
+        for line in f1:
+            combined_data.append(json.loads(line))
+
+    # 读取第二个jsonl文件
+    with open(file2, 'r', encoding='utf-8') as f2:
+        for line in f2:
+            combined_data.append(json.loads(line))
+
+    # 将合并的数据写入新的jsonl文件
+    with open(output_file, 'w', encoding='utf-8') as out:
+        for entry in combined_data:
+            out.write(json.dumps(entry, ensure_ascii=False) + '\n')
+
+# 示例使用
+combine_jsonl('4.jsonl', 'C:\\Users\\LENOVO\\Documents\\WeChat Files\\wxid_3kv9umjbvkr822\\FileStorage\\File\\2024-10\\3mbpp_test1.jsonl', '5.jsonl')
+
+import pandas as pd
+
+
+def parquet_to_jsonl(parquet_file, jsonl_file):
+    # 使用pandas读取parquet文件
+    df = pd.read_parquet(parquet_file, engine='pyarrow')
+
+    # 将DataFrame转换为JSON Lines格式并写入文件
+    with open(jsonl_file, 'w') as f:
+        for index, row in df.iterrows():
+            json_str = row.to_json()
+            f.write(json_str + '\n')
+
+
+# 使用示例
+parquet_file = 'E:\\train-00000-of-00001.parquet'
+jsonl_file = 'e1.jsonl'
+parquet_to_jsonl(parquet_file, jsonl_file)
+#print(1)
+
+(line)
+        if "output" in data:
+            value = data["output"]
+            # 查找特定字符串的位置
+            index = value.find("\n```python")
+            if index!= -1:
+                # 如果找到特定字符串，保留其前面的内容
+                new_value = value[:index]
+                data["input"] = new_value
+        f_out.write(json.dumps(data) + '\n')
+
+# 定义两个文件的名称
+source_filename = '3.jsonl'
+target_filename = '1.jsonl'
+
+# 打开源文件和目标文件
+with open(source_filename, 'r',encoding='utf-8') as source_file, open(target_filename, 'a+',encoding='utf-8') as target_file:
+    # 检查目标文件是否为空
+    target_file.seek(0)  # 移动到文件开头
+    is_target_empty = target_file.read(1) == ''
+
+    # 如果目标文件不为空，则移动到文件末尾并添加一个换行符
+    if not is_target_empty:
+        target_file.seek(0, 2)  # 移动到文件末尾
+        target_file.write('\n')
+
+    # 读取源文件内容并写入目标文件
+    for line in source_file:
+        target_file.write(line)
+
+RYin: 01-20 15:08:16
+import json
+
+# 输入和输出文件名
+input_filename = 'code1.jsonl'
+output_filename = 'code2.jsonl'
+
+# 要删除的键
+key_to_delete = "system"
+
+# 读取输入文件，处理每个JSON对象，并写入输出文件
+with open(input_filename, 'r', encoding='utf-8') as infile, open(output_filename, 'w', encoding='utf-8') as outfile:
+    for line in infile:
+        data = json.loads(line)
+        # 如果键存在，则删除它
+        if key_to_delete in data:
+            del data[key_to_delete]
+        # 将修改后的JSON对象写回到文件
+        json.dump(data, outfile)
+        outfile.write('\n')
+
+RYin: 01-20 15:08:25
+import json
+
+# 设置文件路径
+input_file_path = 'code2.jsonl'
+output_file_path = 'code1.jsonl'
+target_string = '```python\n'
+
+# 处理 JSONL 文件
+with open(input_file_path, 'r', encoding='utf-8') as infile, open(output_file_path, 'w', encoding='utf-8') as outfile:
+    for line in infile:
+        # 解析每一行 JSON
+        data = json.loads(line)
+
+        # 检查目标键是否存在
+        if 'output' in data:
+            # 删除特定字符串及之前的内容
+            content = data['output']
+            if target_string in content:
+                # 找到目标字符串的位置，并截取
+                content = content.split(target_string, 1)[-1]
+                data['output'] = content
+
+        # 将修改后的数据写入新的 JSONL 文件
+        outfile.write(json.dumps(data) + '\n')
+
+print("处理完成！")
+
+import json
+
+# 设置文件路径
+input_file_path = 'code2.jsonl'
+output_file_path = 'code1.jsonl'
+target_string = '```python\n'
+
+# 处理 JSONL 文件
+with open(input_file_path, 'r', encoding='utf-8') as infile, open(output_file_path, 'w', encoding='utf-8') as outfile:
+    for line in infile:
+        # 解析每一行 JSON
+        data = json.loads(line)
+
+        # 检查目标键是否存在
+        if 'output' in data:
+            # 删除特定字符串及之前的内容
+            content = data['output']
+            if target_string in content:
+                # 找到目标字符串的位置，并截取
+                content = content.split(target_string, 1)[-1]
+                data['output'] = content
+
+        # 将修改后的数据写入新的 JSONL 文件
+        outfile.write(json.dumps(data) + '\n')
+
+print("处理完成！")
+
+import json
+
+# 读取原始jsonl文件并修改内容
+with open('C:\\Users\\LENOVO\\Downloads\\output17.jsonl', 'r', encoding='utf-8') as infile, open('y1.jsonl', 'w', encoding='utf-8') as outfile:
+    for line in infile:
+        data = json.loads(line)
+        # 检查并替换input中的第一个出现的字符串"question"
+        if 'input' in data:
+            data['input'] = data['input'].replace("question", "Instruction", 1)  # 只替换第一个出现的字符串
+        # 写入新的jsonl文件
+        outfile.write(json.dumps(data, ensure_ascii=False) + '\n')
+
+import json
+
+# 假设我们要将键 "old_key" 的内容更改为 "new_value"
+# old_key = "old_key_name"
+old_key = "task_id"
+new_value = 0
+
+# 读取JSON Lines文件
+with open('5.jsonl', 'r',encoding='utf-8') as file:
+    lines = file.readlines()
+
+# 创建一个新的列表来保存修改后的JSON对象
+updated_lines = []
+
+# 遍历每一行
+for line in lines:
+    # 解析JSON对象
+    data = json.loads(line)
+    new_value = new_value + 1
+
+    # 检查键是否存在，如果存在，则修改它
+    if old_key in data:
+        # 修改键的内容
+        data[old_key] = str(new_value ) # 如果你只是想更新键的值，使用这行
+        # data[old_key] = new_value  # 如果你只是想更新旧键的值，使用这行
+        # 如果要删除旧键，可以使用 del data[old_key]
+
+    # 将修改后的JSON对象添加回列表
+    updated_lines.append(json.dumps(data))
+
+# 将修改后的内容写回到一个新的JSON Lines文件
+with open('6.jsonl', 'w', encoding='utf-8') as file:
+    file.write('\n'.join(updated_lines))
+
+print("文件已更新。")
+
+import requests
+from bs4 import BeautifulSoup
+headers = {
+    "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36 SLBrowser/9.0.5.8121 SLBChan/111 SLBVPV/64-bit"
+}
+response = requests.get("https://hf-mirror.com/datasets/openai/openai_humaneval/viewer/openai_humaneval/test?row=0", headers=headers)
+html = response.text
+soup = BeautifulSoup(html, "html.parser")
+# all_content = soup.select('[class="hljs-ln-code"]')
+# all_content = soup.find_all("div",{"class":"htmledit_views"})
+all_content = soup.find_all("div",{"class":"line-clamp-2"})
+for content in all_content:
+   # content_string = content.string
+     # if "//" not in content.text:
+    print(content.text)
+# with open("./data.txt","w",encoding="utf-8") as f:
+#     f.write(content.text)
+
+
+
+