diff --git a/cwrapper/wrapp_runner.py b/cwrapper/wrapp_runner.py new file mode 100644 index 0000000..9da26ea --- /dev/null +++ b/cwrapper/wrapp_runner.py @@ -0,0 +1,1372 @@ +#!/usr/bin/python3 +import subprocess +import re +import os +import sys +import string +import networkx as nx + +from collections import defaultdict + +CLASS_NAME_STR = "public class" +MY_GLOBAL_HELPER_CLASS = "UNKNOWN" +DEFAULT_STRUCT = "struct " + MY_GLOBAL_HELPER_CLASS + "*" +SEPARATOR_STRING = "// Code above this line has been added to remove errors" +PUBLIC_CLASS_STRING = "// This class was public" +C_BASIC_DATA_TYPE = ["int", "short", "long", "char", "float", "double"] + +DEFAULT_TYPE = "void *" + +DUMMY_RETURN_TYPE = MY_GLOBAL_HELPER_CLASS + +LIST_OF_INTERNAL_EXCEPTIONS = ["UncheckedIOException", "ArithmeticException", "ArrayIndexOutOfBoundsException", + "ArrayStoreException", "ClassCastException", "IllegalArgumentException", + "IllegalMonitorStateException", "IllegalStateException", "IllegalThreadStateException", + "IndexOutOfBoundsException", "NegativeArraySizeException", "NullPointerException", + "NumberFormatException", "SecurityException", "UnsupportedOperationException"] +NATIVE_ARRAY_VARIABLES = ["length"] +RESERVED_KEYWORDS = ["super"] +VALID_VARIABLE_CHARS = string.ascii_letters + string.digits + "_" +VALID_CHARS = VALID_VARIABLE_CHARS + "$.()[]" +IDENTIFIERS_FOR_FUNCTION = ["public", "private", "protected"] + + +def compile_file_and_get_output(file_path): + tmp_output_path = file_path[:file_path.rfind('/')] + '/tmp/' + file_path[file_path.rfind('/') + 1:] + # print(tmp_output_path) + cmd = ["clang", file_path, "-emit-llvm", "-S", "-c", "-o", tmp_output_path] + # a = os.system(cmd) + return subprocess.run(cmd, stderr=subprocess.PIPE).stderr.decode('utf-8') + # return subprocess.run(['javac', file_path, '-d', '/tmp/'], stderr=subprocess.PIPE).stderr.decode('utf-8') + + +def get_file_content(file_path): + content = None + if not os.path.exists(file_path): + print("file not exists:", file_path) + with open(file_path, 'r') as f: + content = f.read() + return content + + +def remove_file_if_exists(file_path): + try: + os.remove(file_path) + except OSError: + pass + + +def duplicate_file(original_file, new_file): + if (new_file.rfind('/') != -1): + dir_path = new_file[:new_file.rfind('/')] + subprocess.run(['mkdir', '-p', dir_path], stderr=subprocess.PIPE).stderr.decode('utf-8') + subprocess.run(["cp", original_file, new_file], stderr=subprocess.PIPE).stderr.decode('utf-8') + + +def write_file_content(file_path, file_content): + if (file_path.rfind('/') != -1): + dir_path = file_path[:file_path.rfind('/')] + subprocess.run(['mkdir', '-p', dir_path], stderr=subprocess.PIPE).stderr.decode('utf-8') + with open(file_path, 'w') as f: + f.write(file_content) + + +def get_full_path_of_files_in_folder(folder_path): + l_docs = [] + paths = os.listdir(folder_path) + for i in range(len(paths)): + paths[i] = folder_path + paths[i] + if (os.path.isdir(paths[i])): + l_docs.extend(get_full_path_of_files_in_folder(paths[i] + "/")) + else: + l_docs.append(paths[i]) + return l_docs + + +""" Functions to edit code """ + + +def get_l_of_params(s): + if (s == ""): + return [] + s = s.replace("", "Object") + s = re.sub('Class<[^>]*>', 'Class', s) + l_p = s.split(",") + to_ret = [] + pending = [] + for i in range(len(l_p)): + if ("<" in l_p[i] or ">" in l_p[i]): + pending.append(l_p[i]) + else: + if (pending != []): + to_ret.append(",".join(pending)) + pending = [] + to_ret.append(l_p[i]) + + if (pending != []): + to_ret.append(",".join(pending)) + return to_ret + + +def make_dummy_body(return_type): + if return_type == "int" or return_type == "byte" or return_type == "long": + return "{return 1;}" + elif return_type == "boolean": + return "{return True;}" + elif return_type == "float" or return_type == "double": + return "{return 1.0;}" + elif return_type == "char": + return "{return 'c';}" + else: + return "{return null;}" + + +def make_dummy_method(method_sig): + ret_type = method_sig[0] + + method_name = method_sig[1].split("(")[0] + method_param_types = get_l_of_params(method_sig[1].split("(")[1][:-1]) + method_args = [] + for ctr in range(len(method_param_types)): + method_args.append(method_param_types[ctr] + " o" + str(ctr)) + + method_args = "(" + ", ".join(method_args) + ")" + method_definition = '\tpublic ' + ret_type + ' ' + method_name + method_args + make_dummy_body(ret_type) + '\n' + + return method_definition + + +def make_dummy_variable(var): + if "#ptr" in var[0]:#function ptr + ret_type = re.sub("#ptr", "", var[0]) + return ret_type + " (*" + var[1] + ")();\n" + if (type(var) != type('') and len(var) == 2): + if "[10]" in var[0]: + # return '\t' + var[0].replace("[10]", "") + ' ' + var[1] + "[10]" + ";\n" + return '\t' + var[0].replace("[10]", "*") + ' ' + var[1] + ";\n" + return '\t' + var[0] + ' ' + var[1] + ";\n" + else: + return '\t' + DUMMY_RETURN_TYPE + ' ' + var + ";\n" + + +def make_dummy_constructor(class_name, cons_arg): + cons_param_types = get_l_of_params(cons_arg) + cons_params = [] + for ctr in range(len(cons_param_types)): + cons_params.append(cons_param_types[ctr] + " o" + str(ctr)) + cons_params = "(" + ", ".join(cons_params) + ")" + return '\t' + class_name + cons_params + '{}\n' + + +def make_dummy_definition(ident): + global identifiers + + ident_value = identifiers[ident] + + # return "#define " + ident + " " + ident_type + " " + ident_value + "\n" + return "#define " + ident + " " + ident_value + "\n" + + +def make_dummy_global_vars(var): + global allocate_value + #global global_vars + var_type = global_vars[var] + if "[10]" in global_vars[var]: + var_type = var_type.replace("[10]", "") + if "#constant" in var_type: + var_type = var_type.replace("#constant", "") + # return var_type + " " + var + "[10]" + ";\n" + return var_type + "* " + var + ";\n" + # return var_type + " " + var + "[10]" + ";\n" + return var_type + "* " + var + ";\n" + if "#constant" in var_type: + var_type = var_type.replace("#constant", "") + allocate_value += 1 + return "const " + var_type + " " + var + " = " + str(allocate_value) + ";\n" + return global_vars[var] + " " + var + ";\n" + + +def make_dummy_class(class_name, list_of_variables, list_of_method_signatures, list_of_constructor_args): + global identifiers + + variable_definitions = "" + method_definitions = "" + + identifier_definitions = "" + + for var in list_of_variables: + variable_definitions += make_dummy_variable(var) + + for cons_arg in list_of_constructor_args: + method_definitions += make_dummy_constructor(class_name, cons_arg) + + for method_sig in list_of_method_signatures: + method_definitions += make_dummy_method(method_sig) + + method_definitions = method_definitions.strip() + variable_definitions = variable_definitions.strip() + if class_name not in ptr_tag: + class_code = "typedef struct " + class_name + " {\n\t" + variable_definitions + \ + "\n\t" + method_definitions + "\n}" + class_name + ";\n" + else: + class_code = "struct " + class_name+"_t" + " {\n\t" + variable_definitions + \ + "\n\t" + method_definitions + "\n}" + ";\n" + class_code += "typedef struct " + class_name + "_t*" + " " + class_name + ";\n" + return class_code + + +def make_dummy_exception(class_name, list_of_constructor_args, list_of_method_signatures): + method_definitions = "\tpublic " + class_name + "(String errorMessage) { super(errorMessage); }\n" + for cons_arg in list_of_constructor_args: + method_definitions += make_dummy_constructor(class_name, cons_arg) + + for method_sig in list_of_method_signatures: + method_definitions += make_dummy_method(method_sig) + + class_code = "class " + class_name + " extends Exception{\n" + method_definitions + "}\n" + return class_code + + +def get_existing_class_names(): + global l_code + + contenders = [] + for line_no, line in enumerate(l_code): + if (SEPARATOR_STRING in line): + break + if (line.startswith("public class ")): + contenders.append((line_no, line.split(" ")[2])) + if (line.startswith("final class ")): + contenders.append((line_no, line.split(" ")[2])) + if (line.startswith("class ")): + class_name = line.split(" ")[1] + if "{" in class_name: + class_name = class_name[:class_name.index("{")] + contenders.append((line_no, class_name)) + + if (line.strip().startswith("@")): + l_code[line_no] = "//" + l_code[line_no] + return contenders + + +def get_code_for_new_class(class_name): + l_var = [i[1] if len(i) == 2 else i[1:] for i in d_classes_to_add[class_name] if i[0] == "VAR"] + l_method = [i[1] if len(i) == 2 else i[1:] for i in d_classes_to_add[class_name] if i[0] == "METHOD"] + l_exception = [i[1] for i in d_classes_to_add[class_name] if i[0] == "EXCEPTION"] + l_constructor = [i[1] for i in d_classes_to_add[class_name] if i[0] == "CONSTRUCTOR"] + + if (len(l_exception) != 0): + class_code = make_dummy_exception(class_name, l_constructor, l_method) + else: + class_code = make_dummy_class(class_name, l_var, l_method, l_constructor) + + return class_code.split("\n") + + +def get_code_for_existing_class(class_name): + l_var = [i[1] if len(i) == 2 else i[1:] for i in d_classes_to_add[class_name] if i[0] == "VAR"] + l_method = [i[1] if len(i) == 2 else i[1:] for i in d_classes_to_add[class_name] if i[0] == "METHOD"] + + # print(l_var) + method_definitions = "" + variable_definitions = "" + for method_sig in l_method: + method_definitions += make_dummy_method(method_sig) + for variable_sig in l_var: + variable_definitions += make_dummy_variable(variable_sig) + + class_code = variable_definitions + method_definitions + return class_code + + +def add_members_of_existing_existing_class(): + global l_code + global d_classes_to_add + global existing_class_names + + existing_class_names = get_existing_class_names() + + for line_no, class_name in existing_class_names: + # print(class_name) + class_code = get_code_for_existing_class(class_name).strip().split("\n") + if (class_code != ['']): + l_code = l_code[:line_no + 1] + class_code + l_code[line_no + 1:] + # reset dictionary for existing classes + d_classes_to_add[class_name] = set() + + +def copy_a_graph(ori_graph): + new_graph = nx.DiGraph() + + for pre, nxt in ori_graph.edges(): + new_graph.add_edge(pre, nxt) + + return new_graph + + +def get_new_code_to_add(): + global identifiers + #global i + + new_code_to_add = [] + + identifier_definitions = "" + + # for ident in identifiers.keys(): + # identifier_definitions += make_dummy_definition(ident) + + # if identifier_definitions: + # new_code_to_add += identifier_definitions.split("\n") + for define_line in define_lines: + new_code_to_add += [define_line + "\n"] + + queue = [] + searched = [] + tmp_order_graph = copy_a_graph(struct_order_graph) + + for struct_name in tmp_order_graph.nodes(): + if tmp_order_graph.in_degree(struct_name) == 0: + queue.append(struct_name) + searched.append(struct_name) + + # print(queue) + while queue: + struct_name = queue.pop(0) + if struct_name in d_classes_to_add: + new_code_to_add += get_code_for_new_class(struct_name) + for nxt_name in tmp_order_graph.neighbors(struct_name): + if nxt_name not in searched and tmp_order_graph.in_degree(nxt_name) == 1: + queue.append(nxt_name) + searched.append(nxt_name) + tmp_order_graph.remove_node(struct_name) + + for struct_name in d_classes_to_add.keys(): + if struct_name not in searched: + new_code_to_add += get_code_for_new_class(struct_name) + # print(new_code_to_add) + + for var in global_vars.keys(): + new_code_to_add += [make_dummy_global_vars(var)] + + return new_code_to_add + + +def compress_code(source_code): + lines = source_code.split("\n") + new_lines = [] + line_id = len(lines) - 1 + pre_line = "" + while (line_id >= 0): + current_line = lines[line_id] + if current_line.strip().startswith("."): + pre_line = current_line.strip() + pre_line + else: + current_line = current_line.rstrip() + pre_line + new_lines.insert(0, current_line) + pre_line = "" + line_id -= 1 + new_source_code = "" + for line in new_lines: + new_source_code += line + "\n" + # print(new_source_code) + return new_source_code + + +def get_new_code_line_after_add_data_type(pointer, valid_chars, target_type, code_line, direction): + target_type = "(" + target_type + ")" + "(Object)" + if direction == "right": + ptr = len(pointer) - 1 + + while (code_line[ptr] not in valid_chars): + ptr = ptr + 1 + if (ptr > len(code_line)): + return code_line + return code_line[:ptr] + target_type + code_line[ptr:] + + # direction = "left" + ptr = len(pointer) - 2 + num_brackets = 0 + num_sq_brackets = 0 + + # print(code_line, code_line[ptr + 1]) + if (code_line[:ptr + 1].strip() in ["switch"]): + ptr += 1 + + if (code_line[ptr + 1] not in valid_chars): + # bitPointer = 8 + i; + while (code_line[ptr] == " "): + ptr -= 1 + if (ptr < 0): + return code_line + + while (num_sq_brackets != 0 or num_brackets != 0 or code_line[ptr] in valid_chars): + if (code_line[ptr] == "]"): + num_sq_brackets += 1 + elif (code_line[ptr] == "[" and num_sq_brackets != 0): + num_sq_brackets -= 1 + + elif (code_line[ptr] == ")"): + num_brackets += 1 + elif (code_line[ptr] == "(" and num_brackets != 0): + num_brackets -= 1 + + elif (code_line[ptr] in "[(" and num_brackets == 0 and num_sq_brackets == 0): + break + ptr -= 1 + + if (ptr < 0): + return code_line + + if (code_line[ptr] != " "): + ptr += 1 + + if (code_line[ptr:].strip().startswith("return")): + while (code_line[ptr] == " "): + ptr += 1 + ptr += len("return ") + + if (code_line[ptr - 3:ptr] == "new"): + # code_line = code_line[:ptr-3] + "(" + code_line[ptr-3:-1] + ")" + code_line[-1] + ptr -= 3 + + return code_line[:ptr] + target_type + code_line[ptr:] + + +def compare_args_by_index(sig_src, sig_dst): + l_src = sig_src[1:-1].split(",") + l_dst = sig_dst[1:-1].split(",") + + for i in range(len(l_src)): + if (l_src[i] != l_dst[i]): + return i, l_dst[i] + + +def get_new_code_line_after_add_suitable_method_signature(pointer, method_desc, code_line, possible_matches): + method_name = method_desc.split("(")[0].strip() + method_args = method_desc.strip()[len(method_name):] + + # always apply the match - argument mismatch; ... cannot be converted to ... + match_to_apply = "" + for i in possible_matches: + if (i[1].startswith("(argument mismatch;")): + match_to_apply = i[0][i[0].find("("): i[0].find(")") + 1] + break + + if (match_to_apply == ""): # no match found - make new method/constructor otherwise + class_name = possible_matches[0][0].split(" ")[1].split(".")[0] + if (possible_matches[0][0].startswith("method")): + reverse_method_to_class_mapping[method_desc] = class_name + d_classes_to_add[class_name].add(("METHOD", DUMMY_RETURN_TYPE, method_desc)) + else: + d_classes_to_add[class_name].add(("CONSTRUCTOR", method_desc.split("(")[1][:-1])) + return code_line + + index_to_change, target_type = compare_args_by_index(method_args, match_to_apply) + target_type = "(" + target_type + ")" + "(Object)" + + ptr = len(pointer) - 2 + while (not ( + code_line[ptr:].startswith(method_name) or code_line[ptr:].startswith("this") or code_line[ptr:].startswith( + "super"))): + ptr += 1 + if (ptr > len(code_line)): + return code_line + + num_brackets = num_comma = 0 + + while (not (num_brackets == 1 and num_comma == index_to_change)): + if (code_line[ptr] == ")"): + num_brackets -= 1 + elif (code_line[ptr] == "("): + num_brackets += 1 + if (num_brackets == 1 and code_line[ptr] == ","): + num_comma += 1 + ptr += 1 + + if (ptr > len(code_line)): + return code_line + + code_line = code_line[:ptr] + target_type + code_line[ptr:] + return code_line + + +def ls_does_contain(ls, subs): + for s in ls: + if (subs in s): + return True + return False + + +def b_begins_with_one_of_a(a, b): + for i in a: + if (b.startswith(i)): + return True + return False + + +# adds "throws Throwable" to the first function in class +def make_existing_function_throwable(): + global l_code + + line_number = 0 + while line_number < len(l_code): + if (b_begins_with_one_of_a(IDENTIFIERS_FOR_FUNCTION, l_code[line_number].strip())): + break + line_number += 1 + + if (line_number == len(l_code)): + return + + if ("throws " in l_code[line_number]): + if ("throws Throwable" in l_code[line_number]): + return + l_code[line_number] = l_code[line_number].replace("throws ", "throws Throwable, ") + return + + if (l_code[line_number].strip()[-1] == "{"): + l_code[line_number] = l_code[line_number].rstrip()[:-1] + " throws Throwable {" + + elif (l_code[line_number].strip()[-1] == ")"): + if line_number < len(l_code) and "throws " in l_code[line_number + 1]: + # 12private ColumnVector backSubstitution(ColumnVector y) throws Throwable + # 13 throws MatrixException + l_code[line_number + 1] = l_code[line_number + 1].replace("throws ", "throws Throwable, ") + return + l_code[line_number] = l_code[line_number].rstrip() + " throws Throwable " + + return + + +def search_var(class_name, var_name): + global d_classes_to_add + if class_name in d_classes_to_add: + for item in d_classes_to_add[class_name]: + if var_name == item[2]: + return item + if class_name == "global": + return "VAR", global_vars[var_name], var_name + + +def tokenize_into_list(_str): + #while re.search(r"\[[^\[\]]*?\]", _str): + # _str = re.sub(r"\[[^\[\]]*?\]", "", _str) + if "[" in _str: + _str = _str[:_str.find("[")] + c_token = r'[a-zA-Z_][a-zA-Z_\d]*' + pattern = re.compile(c_token) + tokens = pattern.findall(_str) + return tokens + + +def tokenizer(source_code): + return + + +# def parse_quote(_str): +# def extract_error_tag(source_code_line, tag_line): +def find_next_char(desc, ptr): + real_desc = desc[ptr:] + c_token = r'([a-zA-Z_][a-zA-Z_\d]*)' + if not re.match(c_token, real_desc): + return "" + first_word = re.match(c_token, real_desc).group(1) + # print(first_word) + if len(real_desc) <= len(first_word): + return "" + real_desc = real_desc[len(first_word):] + for _char in real_desc: + if _char and _char != " ": + return _char + + +class Exception: + + def __init__(self, e, tag): + self.l_e = [i for i in e.split('\n')] + self.line_number = int(self.l_e[0].strip()[:self.l_e[0].find(":")]) - 1 + self.exception_desc = self.l_e[0].strip()[self.l_e[0].find("error: ") + 7:] + if '(aka' in self.exception_desc: + self.exception_desc = re.sub(r"\(aka '[^()]*'\) ", "", self.exception_desc) + if (self.can_touch_this()): + if ("error: " in self.l_e[0]): + if tag: + self.analyse_type_missing_exception() + else: + self.analyse_exception() + # print("finish") + + def can_touch_this(self): + return self.line_number not in dont_touch_list + + def add_to_dont_touch_list(self): + global dont_touch_list + dont_touch_list.add(self.line_number) + + def analyse_exception(self): + global l_code + global d_classes_to_add + global reverse_method_to_class_mapping + global exception_list_index + + global alias + global identifiers + global var_mapping + global struct_order_graph + global global_vars + global ptr_tag + + global define_lines + + # print("analysis begin:", self.exception_desc) + # unknown type name 'X' + # Action: creat struct 'X' + # array has incomplete element type 'X' + # incomplete definition of type '' + # variable has incomplete type 'enum comm' + # subscript of pointer to incomplete type 'struct chng' + if self.exception_desc.startswith("unknown type name ") \ + or self.exception_desc.startswith("array has incomplete element type") \ + or self.exception_desc.startswith("incomplete definition of type") \ + or self.exception_desc.startswith("subscript of pointer to incomplete type ") \ + or self.exception_desc.startswith("variable has incomplete type"): + # struct_name = self.exception_desc.split(" ")[-1][1:-1] + struct_name = self.exception_desc.split("'")[1] + if struct_name.startswith("struct "): + struct_name = struct_name[7:] + # print("unknown type name", struct_name) + d_classes_to_add[struct_name] = set() + return + + # invalid application of 'sizeof' to an incomplete type 'struct capaths' + if self.exception_desc.startswith("invalid application of") and "to an incomplete type" in self.exception_desc: + # struct_name = self.exception_desc.split(" ")[-1][1:-1] + struct_name = self.exception_desc.split("'")[3] + if struct_name.startswith("struct "): + struct_name = struct_name[7:] + # print("unknown type name", struct_name) + d_classes_to_add[struct_name] = set() + return + + # must use 'struct' tag to refer to type 'X' + # Action: typedef struct 'X'{}'X'; + if self.exception_desc.startswith("must use 'struct' tag to refer to type "): + # struct_name = self.exception_desc.split(" ")[-1][1:-1] + struct_name = self.exception_desc.split("'")[3] + if struct_name.startswith("struct "): + struct_name = struct_name[7:] + # print("'struct' tag", struct_name) + alias[struct_name] = True + return + + # no member named 'X' in 'Y' + # no member named 'journalOff' in 'struct Pager'; did you mean 'journalHdr'? + # Action: add UNKNown 'X' in 'Y' + # vect->n_vsize = size; + # ~~~~ ^ + if self.exception_desc.startswith("no member named "): + struct_name = self.exception_desc.split("'")[3] + if struct_name.startswith("struct "): + struct_name = struct_name[7:] + member_name = self.exception_desc.split("'")[1] + + # print(member_name, "not in", struct_name) + #check pointer + _, left_error_end = re.search(r"~+", self.l_e[2]).span() + r_begin = self.l_e[2].find("^") + if r_begin > left_error_end: + call_ptr = self.l_e[1][left_error_end + 1:r_begin].strip() + if call_ptr == "->": + new_struct_name = struct_name+"_t" + #d_classes_to_add[new_struct_name].add(('VAR', DEFAULT_STRUCT, member_name)) + ptr_tag[struct_name] = True + d_classes_to_add[struct_name].add(('VAR', DEFAULT_STRUCT, member_name)) + + var_mapping[member_name] = struct_name + return + + # use of undeclared identifier 'X' + if self.exception_desc.startswith("use of undeclared identifier "): + # struct_name = self.exception_desc.split(" ")[-1][1:-1] + identifier_name = self.exception_desc.split("'")[1] + if identifier_name in d_classes_to_add: # exist + return + # print("undeclared identifier", identifier_name) + + ptr = self.l_e[2].find("^") + + nxt_char = find_next_char(self.l_e[1], ptr) + # print(nxt_char) + if nxt_char == "*" or nxt_char.isalpha(): + d_classes_to_add[identifier_name] = set() + return + + if (ptr - 1 >= 0 and self.l_e[1][ptr - 1] == '[') or (ptr - 2 >= 0 and self.l_e[1][ptr - 2] == '['): + global_vars[identifier_name] = "int" + else: + global_vars[identifier_name] = MY_GLOBAL_HELPER_CLASS + return + # var_mapping[identifier_name] = "global" + + # invalid operands to binary expression + # invalid operands to binary expression ('int' and 'bool' (aka 'struct bool')) + # 4327:int accuracy = 100 - (mon->m_timed[MON_TMD_STUN] ? STUN_HIT_REDUCTION : 0); + if self.exception_desc.startswith("invalid operands to binary expression "): + # left_error, right_error = extract_error_tag() + left_error_begin, left_error_end = re.search(r"~+", self.l_e[2]).span() + right_error_begin, right_error_end = re.search(r"~+", self.l_e[2][left_error_end:]).span() + right_error_begin += left_error_end + right_error_end += left_error_end + + # print(self.l_e[2][left_error_end:]) + left_error_code = self.l_e[1][left_error_begin:left_error_end] + right_error_code = self.l_e[1][right_error_begin:right_error_end] + + left_error_type = self.exception_desc.split("'")[1] + right_error_type = self.exception_desc.split("'")[3] + + left_error_tokens = tokenize_into_list(left_error_code) + right_error_tokens = tokenize_into_list(right_error_code) + # print(left_error_tokens) + if MY_GLOBAL_HELPER_CLASS in left_error_type: + # change left type + for token in left_error_tokens[-1::-1]: + # print(token) + if token in var_mapping: + struct_name = var_mapping[token] + item = search_var(struct_name, token) + d_classes_to_add[struct_name].remove(item) + d_classes_to_add[struct_name].add(('VAR', 'int', token)) + # struct_order_graph.add_edge(right_error_type, struct_name) + break + elif token in global_vars: + global_vars[token] = "int" + break + if MY_GLOBAL_HELPER_CLASS in right_error_type: + # change right type + for token in right_error_tokens[-1::-1]: + if token in var_mapping: + struct_name = var_mapping[token] + item = search_var(struct_name, token) + d_classes_to_add[struct_name].remove(item) + d_classes_to_add[struct_name].add(('VAR', 'int', token)) + # struct_order_graph.add_edge(left_error_type, struct_name) + break + elif token in global_vars: + global_vars[token] = "int" + break + if MY_GLOBAL_HELPER_CLASS not in left_error_type and MY_GLOBAL_HELPER_CLASS not in right_error_type: + # add define + if left_error_type in C_BASIC_DATA_TYPE: + define_lines.add("#define " + right_error_type + " " + left_error_type) + elif right_error_type in C_BASIC_DATA_TYPE: + define_lines.add("#define " + left_error_type + " " + right_error_type) + return + + # array subscript is not an integer + # action: + if self.exception_desc.startswith("array subscript is not an integer"): + left_error_begin, left_error_end = re.search(r"~+", self.l_e[2]).span() + + left_error_code = self.l_e[1][left_error_begin:left_error_end] + + left_error_tokens = tokenize_into_list(left_error_code) + + for token in left_error_tokens[-1::-1]: + if token in var_mapping: + struct_name = var_mapping[token] + d_classes_to_add[struct_name].remove(('VAR', 'void*', token)) + d_classes_to_add[struct_name].add(('VAR', "int", token)) + # struct_order_graph.add_edge(right_error_type, struct_name) + break + return + + # error: called object type 'struct UNKNOWN *' is not a function or function pointer + + # operand of type 'void' where arithmetic or pointer type is required + if re.match("operand of type '.*' where arithmetic or pointer type is required", self.exception_desc): + left_error_begin, left_error_end = re.search(r"~+", self.l_e[2]).span() + left_error_begin -= 1 + left_error_code = self.l_e[1][left_error_begin:left_error_end] + + left_error_tokens = tokenize_into_list(left_error_code) + + # print(left_error_tokens) + # print(global_vars) + for token in left_error_tokens[-1::-1]: + if token in var_mapping: + struct_name = var_mapping[token] + item = search_var(struct_name, token) + d_classes_to_add[struct_name].remove(item) + if "[10]" in item[1]: + target_type = "int[10]" + else: + target_type = "int" + d_classes_to_add[struct_name].add(('VAR', target_type, token)) + if "struct " in target_type: + target_type = target_type[7:] + struct_order_graph.add_edge(target_type, struct_name) + break + elif token in global_vars: + if "[10]" in global_vars[token]: + target_type = "int[10]" + else: + target_type = "int" + global_vars[token] = target_type + break + + # member reference base type 'void' is not a structure or union + + # statement requires expression of integer type ('void *' invalid) + + # cannot take the address of an rvalue of type + + # expression is not assignable + + # declaration of anonymous struct must be a definition + + # returning 'int' from a function with incompatible result type 'SQLITE_PRIVATE' + if re.match(r"returning '.*' from a function with incompatible result type '.*'", self.exception_desc): + left_error_begin, left_error_end = re.search(r"~+", self.l_e[2]).span() + + left_error_code = self.l_e[1][left_error_begin - 1:left_error_end] + + left_error_tokens = tokenize_into_list(left_error_code) + + target_type = self.exception_desc.split("'")[3] + + # print(left_error_tokens, target_type) + for token in left_error_tokens[-1::-1]: + if token in var_mapping: + struct_name = var_mapping[token] + item = search_var(struct_name, token) + d_classes_to_add[struct_name].remove(item) + d_classes_to_add[struct_name].add(('VAR', target_type, token)) + if "struct " in target_type: + target_type = target_type[7:] + struct_order_graph.add_edge(target_type, struct_name) + # print(target_type[7:], struct_name) + break + elif token in global_vars: + global_vars[token] = target_type + return + + # member reference type 'UNKNOWN' (aka 'struct UNKNOWN') is not a pointer; + if re.match(r"member reference type .* is not a pointer", self.exception_desc): + reference_type = self.exception_desc.split("'")[1] + if reference_type in d_classes_to_add: + ptr_tag[reference_type] = True + return + left_error_begin, left_error_end = re.search(r"~+", self.l_e[2]).span() + + left_error_code = self.l_e[1][left_error_begin:left_error_end] + + left_error_tokens = tokenize_into_list(left_error_code) + # print(left_error_tokens) + for token in left_error_tokens[-1::-1]: + if token in var_mapping: + struct_name = var_mapping[token] + + item = search_var(struct_name, token) + d_classes_to_add[struct_name].remove(item) + d_classes_to_add[struct_name].add(('VAR', item[1] + "*", token)) + break + elif token in global_vars: + # item = ("VAR", global_vars[token], token) + global_vars[token] = global_vars[token] + "*" + break + return + + # member reference type 'struct UNKNOWN *' is a pointer + if re.match(r"member reference type '.*' is a pointer", self.exception_desc): + left_error_begin, left_error_end = re.search(r"~+", self.l_e[2]).span() + + left_error_code = self.l_e[1][left_error_begin:left_error_end] + + left_error_tokens = tokenize_into_list(left_error_code) + # print(left_error_tokens) + for token in left_error_tokens[-1::-1]: + if token in var_mapping: + struct_name = var_mapping[token] + + item = search_var(struct_name, token) + d_classes_to_add[struct_name].remove(item) + d_classes_to_add[struct_name].add(('VAR', item[1].replace("*", ""), token)) + break + elif token in global_vars: + # item = ("VAR", global_vars[token], token) + global_vars[token] = global_vars[token].replace("*", "") + break + return + + # initializing 'int' with an expression of incompatible type 'UNKNOWN' + if re.match(r"initializing '.*' with an expression of incompatible type '.*'", self.exception_desc): + target_type = self.exception_desc.split("'")[1] + if " " in target_type: + target_type = target_type.split(" ")[-1] + + # origin_type = self.exception_desc.split("'")[-1] + + left_error_begin, left_error_end = re.search(r"~+", self.l_e[2]).span() + + left_error_code = self.l_e[1][left_error_begin:left_error_end] + + left_error_tokens = tokenize_into_list(left_error_code) + + # print(left_error_tokens, target_type) + for token in left_error_tokens[-1::-1]: + + if token in var_mapping: + struct_name = var_mapping[token] + item = search_var(struct_name, token) + d_classes_to_add[struct_name].remove(item) + d_classes_to_add[struct_name].add(('VAR', target_type, token)) + if "struct " in target_type: + target_type = target_type[7:] + struct_order_graph.add_edge(target_type, struct_name) + break + elif token in global_vars: + + global_vars[token] = target_type + break + return + + # print("ok") + + # assigning to 'const char *' from incompatible type 'UNKNOWN' (aka 'struct UNKNOWN') + if re.match(r"assigning to '.*' from incompatible type '.*'", self.exception_desc): + target_type = self.exception_desc.split("'")[1] + origin_type = self.exception_desc.split("'")[3] + + if MY_GLOBAL_HELPER_CLASS in target_type: + # assigning to 'struct UNKNOWN' from incompatible type 'char' + ptr = self.l_e[2].find("^") + error_code = self.l_e[1][:ptr] + target_type = origin_type + left_error_tokens = tokenize_into_list(error_code) + # print(left_error_tokens, target_type) + for token in left_error_tokens[-1::-1]: + if token in var_mapping: + struct_name = var_mapping[token] + item = search_var(struct_name, token) + d_classes_to_add[struct_name].remove(item) + d_classes_to_add[struct_name].add(('VAR', target_type, token)) + if "struct " in target_type: + target_type = target_type[7:] + struct_order_graph.add_edge(target_type, struct_name) + break + elif token in global_vars: + global_vars[token] = target_type + break + return + + left_error_begin, left_error_end = re.search(r"~+", self.l_e[2]).span() + + left_error_code = self.l_e[1][left_error_begin:left_error_end] + + left_error_tokens = tokenize_into_list(left_error_code) + #print(left_error_tokens, target_type) + for token in left_error_tokens[-1::-1]: + if token in var_mapping: + struct_name = var_mapping[token] + item = search_var(struct_name, token) + d_classes_to_add[struct_name].remove(item) + d_classes_to_add[struct_name].add(('VAR', target_type, token)) + if "struct " in target_type: + target_type = target_type[7:] + struct_order_graph.add_edge(target_type, struct_name) + break + elif token in global_vars: + global_vars[token] = target_type + break + return + + # duplicate + if self.exception_desc.startswith("duplicate"): + line_number = 0 + # print(self.line_number) + l_code[line_number] = "//" + l_code[line_number] + + # cannot combine with previous 'type-name' declaration specifier + if self.exception_desc.startswith("cannot combine with previous 'type-name' declaration specifier"): + ptr = self.l_e[2].find("^") + # print(ptr, self.line_number) + # print(l_code) + # print(len(l_code)) + l_code[self.line_number] = l_code[self.line_number][ptr:] + return + + # expected ';' after top level declarator + if self.exception_desc.startswith("expected ';' after top level declarator"): + code_line = l_code[self.line_number] + words = code_line.split(' ') + new_code_line = "" + for word in words[1:]: + new_code_line += word + " " + new_code_line = new_code_line.strip() + l_code[self.line_number] = new_code_line + + # subscripted value is not an array + if self.exception_desc.startswith("subscripted value is not an array, pointer, or vector"): + left_error_begin, left_error_end = re.search(r"~+", self.l_e[2]).span() + + left_error_code = self.l_e[1][left_error_begin:left_error_end] + + left_error_tokens = tokenize_into_list(left_error_code) + + for token in left_error_tokens[-1::-1]: + if token in var_mapping: + struct_name = var_mapping[token] + item = search_var(struct_name, token) + d_classes_to_add[struct_name].remove(item) + target_type = item[1]+"[10]" + d_classes_to_add[struct_name].add(('VAR', target_type, token)) + if "struct " in target_type: + target_type = target_type[7:] + struct_order_graph.add_edge(target_type, struct_name) + break + elif token in global_vars: + target_type = global_vars[token]+"[10]" + global_vars[token] = target_type + break + + # passing 'UNKNOWN' (aka 'struct UNKNOWN') to parameter of incompatible type 'unsigned long' + if re.match("passing '.*' to parameter of incompatible type '.*'", self.exception_desc): + src_type = self.exception_desc.split("'")[1] + target_type = self.exception_desc.split("'")[3] + + left_error_begin, left_error_end = re.search(r"~+", self.l_e[2]).span() + left_error_begin -= 1 + left_error_code = self.l_e[1][left_error_begin:left_error_end] + + left_error_tokens = tokenize_into_list(left_error_code) + + for token in left_error_tokens[-1::-1]: + if token in var_mapping: + struct_name = var_mapping[token] + item = search_var(struct_name, token) + d_classes_to_add[struct_name].remove(item) + + d_classes_to_add[struct_name].add(('VAR', target_type, token)) + if "struct " in target_type: + target_type = target_type[7:] + struct_order_graph.add_edge(target_type, struct_name) + break + elif token in global_vars: + + global_vars[token] = target_type + break + # 1747.c:29:29: error: initializer element is not a compile-time constant + + # arithmetic on a pointer to an incomplete type + if re.match("arithmetic on a pointer to an incomplete type '.*'", self.exception_desc): + left_error_begin, left_error_end = re.search(r"~+", self.l_e[2]).span() + #left_error_begin -= 1 + left_error_code = self.l_e[1][left_error_begin:left_error_end] + + left_error_tokens = tokenize_into_list(left_error_code) + target_type = "int" + for token in left_error_tokens[-1::-1]: + if token in var_mapping: + struct_name = var_mapping[token] + item = search_var(struct_name, token) + d_classes_to_add[struct_name].remove(item) + d_classes_to_add[struct_name].add(('VAR', target_type, token)) + if "struct " in target_type: + target_type = target_type[7:] + struct_order_graph.add_edge(target_type, struct_name) + break + elif token in global_vars: + global_vars[token] = target_type + break + return + + # called object type 'struct UNKNOWN *' is not a function or function pointer + if re.match("called object type '.*' is not a function or function pointer", self.exception_desc): + left_error_begin, left_error_end = re.search(r"~+", self.l_e[2]).span() + # left_error_begin -= 1 + left_error_code = self.l_e[1][left_error_begin:left_error_end] + + left_error_tokens = tokenize_into_list(left_error_code) + #target_type = "int" + for token in left_error_tokens[-1::-1]: + if token in var_mapping: + struct_name = var_mapping[token] + item = search_var(struct_name, token) + d_classes_to_add[struct_name].remove(item) + target_type = item[1] + "#ptr" + d_classes_to_add[struct_name].add(('VAR', target_type, token)) + if "struct " in target_type: + target_type = target_type[7:] + struct_order_graph.add_edge(target_type, struct_name) + break + elif token in global_vars: + target_type = global_vars[token] + "#ptr" + global_vars[token] = target_type + break + return + + # expression is not an integer constant expression + if re.match("expression is not an integer constant expression", self.exception_desc): + left_error_begin, left_error_end = re.search(r"~+", self.l_e[2]).span() + left_error_begin -= 1 + #print(left_error_begin, left_error_end) + left_error_code = self.l_e[1][left_error_begin:left_error_end] + + left_error_tokens = tokenize_into_list(left_error_code) + target_type = "int#constant" + # print(left_error_tokens) + # print(global_vars) + for token in left_error_tokens[-1::-1]: + if token in var_mapping: + struct_name = var_mapping[token] + item = search_var(struct_name, token) + d_classes_to_add[struct_name].remove(item) + # target_type = item[1] + "#ptr" + d_classes_to_add[struct_name].add(('VAR', target_type, token)) + if "struct " in target_type: + target_type = target_type[7:] + struct_order_graph.add_edge(target_type, struct_name) + break + elif token in global_vars: + # target_type = global_vars[token] + "#ptr" + global_vars[token] = target_type + break + return + + # declaration of anonymous struct must be a definition + if re.match("declaration of anonymous struct must be a definition", self.exception_desc): + if "typedef struct" in self.l_e[1]: + struct_type = self.l_e[1].split(" ")[2] + if struct_type in d_classes_to_add: + d_classes_to_add.pop(struct_type) + return + + + #unsolved + + + + def analyse_type_missing_exception(self): + global l_code + global d_classes_to_add + global reverse_method_to_class_mapping + global exception_list_index + + global alias + global identifiers + global var_mapping + global struct_order_graph + global global_vars + + global define_lines + + # print("analysis begin1:", self.exception_desc) + # unknown type name 'X' + # Action: creat struct 'X' + # array has incomplete element type 'X' + # incomplete definition of type + # variable has incomplete type 'enum comm' + # subscript of pointer to incomplete type 'struct chng' + if self.exception_desc.startswith("unknown type name ") \ + or self.exception_desc.startswith("array has incomplete element type") \ + or self.exception_desc.startswith("incomplete definition of type") \ + or self.exception_desc.startswith("subscript of pointer to incomplete type ") \ + or self.exception_desc.startswith("variable has incomplete type"): + # struct_name = self.exception_desc.split(" ")[-1][1:-1] + struct_name = self.exception_desc.split("'")[1] + if struct_name.startswith("struct "): + struct_name = struct_name[7:] + # print("unknown type name", struct_name) + d_classes_to_add[struct_name] = set() + return + + # invalid application of 'sizeof' to an incomplete type 'struct capaths' + if self.exception_desc.startswith("invalid application of") and "to an incomplete type" in self.exception_desc: + # struct_name = self.exception_desc.split(" ")[-1][1:-1] + struct_name = self.exception_desc.split("'")[3] + if struct_name.startswith("struct "): + struct_name = struct_name[7:] + # print("unknown type name", struct_name) + d_classes_to_add[struct_name] = set() + return + + # must use 'struct' tag to refer to type 'X' + # Action: typedef struct 'X'{}'X'; + if self.exception_desc.startswith("must use 'struct' tag to refer to type "): + # struct_name = self.exception_desc.split(" ")[-1][1:-1] + struct_name = self.exception_desc.split("'")[3] + if struct_name.startswith("struct "): + struct_name = struct_name[7:] + # print("'struct' tag", struct_name) + alias[struct_name] = True + return + + # use of undeclared identifier 'X' + if self.exception_desc.startswith("use of undeclared identifier "): + # struct_name = self.exception_desc.split(" ")[-1][1:-1] + identifier_name = self.exception_desc.split("'")[1] + # print("use of undeclared identifier", identifier_name) + if identifier_name in d_classes_to_add: # exist + return + ptr = self.l_e[2].find("^") + + nxt_char = find_next_char(self.l_e[1], ptr) + # print(nxt_char) + if nxt_char == "*" or nxt_char.isalpha(): + d_classes_to_add[identifier_name] = set() + return + # otherwise identifier is global variable + global_vars[identifier_name] = MY_GLOBAL_HELPER_CLASS + + # cannot combine with previous 'type-name' declaration specifier + if self.exception_desc.startswith("cannot combine with previous 'type-name' declaration specifier"): + ptr = self.l_e[2].find("^") + # print(ptr, self.line_number) + # print(l_code) + # print(len(l_code)) + l_code[self.line_number] = l_code[self.line_number][ptr:] + return + + +def preprocess(file_path): + new_content = "#include \n" \ + "#include \n" \ + "#include \n" \ + "#include \n" \ + "#include \n" \ + "#include \n" \ + "#include \n" \ + "#define bool int\n" \ + "#define true 1\n" \ + "#define false 0\n" + # print(file_path) + old_content = get_file_content(file_path).strip() + + old_content = re.sub(r"/\*.*?\*/","",old_content,flags=re.S) + + new_content += old_content + + write_file_content(file_path, new_content) + + +def reset_d_classes(): + global d_classes_to_add + global exception_list_index + d_classes_to_add = defaultdict(set) + d_classes_to_add[DUMMY_RETURN_TYPE] = set() + d_classes_to_add[MY_GLOBAL_HELPER_CLASS] = set() + reverse_method_to_class_mapping = defaultdict() + exception_list_index = 0 + + +def add_tag(file_content): + lines = file_content.split("\n") + tagged_content = "" + for idx in range(len(lines)): + line = lines[idx] + tagged_content += str(idx + 1) + line + "\n" + return tagged_content + + +def handle_file(src_file_path, output_file_path, times_to_try_compile): + global l_code + global dont_touch_list + # reset all data dict + reset_d_classes() + + file_path = output_file_path + + duplicate_file(src_file_path, file_path) + + preprocess(file_path) + + l_code = get_file_content(file_path).split("\n") + + # make_existing_function_throwable() + + i = 0 + fw = open("info1.txt", 'a') + fw1 = open("info2.txt", 'a') + + tag = True + + while (i < times_to_try_compile): + # l_code = get_file_content(file_path).split("\n") #add + + + s = compile_file_and_get_output(file_path) + # fw.write(add_tag(get_file_content(file_path)) + "\n") + # fw.write(s + "\n") + # print(s) + # if i == 8: + # fw1.write(add_tag(get_file_content(file_path)) + "\n") + # fw1.write(s + "\n") + l_errors = [l.strip() for l in s.split(file_path + ":")][1:] + l_errors = [i for i in l_errors if (" error: " in i and " warning: " not in i)] + + num_errors = len(l_errors) + + if (num_errors == 0): + return True + # print("in") + dont_touch_list = set() + + for j in range(len(l_errors)): + Exception(l_errors[j], tag) + # print("analysis end!") + if i == 1: + tag = False # exe tag + + generated_code = get_new_code_to_add() + + if SEPARATOR_STRING in l_code: + l_code = l_code[l_code.index(SEPARATOR_STRING) + 1:] + + new_code = "\n".join(generated_code + [SEPARATOR_STRING] + l_code) + + write_file_content(file_path, new_code) + + l_code = get_file_content(file_path).split("\n") + + i += 1 + remove_file_if_exists(output_file_path) + return False + + +if __name__ == '__main__': + + l_code = [] + dont_touch_list = set() + d_classes_to_add = defaultdict() + reverse_method_to_class_mapping = defaultdict() + alias = defaultdict() + identifiers = defaultdict() + exception_list_index = 0 + var_mapping = defaultdict() + struct_order_graph = nx.DiGraph() + global_vars = defaultdict() + ptr_tag = defaultdict() + allocate_value = 0 + + define_lines = set() + + if (len(sys.argv) != 4): + print("Usage:", sys.argv[0], "input_file", "output_file", "compile_tries") + exit() + + inp_file_path = sys.argv[1] + output_file_path = sys.argv[2] + times_to_try_compile = int(sys.argv[3]) + + result = False + remove_file_if_exists(output_file_path) + # runtime_error + try: + result = handle_file(inp_file_path, output_file_path, times_to_try_compile) + except: + print("Some error occured.") + remove_file_if_exists(output_file_path) + # if not result: + # ff = open("record.txt", 'a') + # ff.write(inp_file_path + "\n") + print(result) \ No newline at end of file