diff --git a/bert/bert.json b/bert/bert.json new file mode 100644 index 0000000..a8828b8 --- /dev/null +++ b/bert/bert.json @@ -0,0 +1,24 @@ +{ + "architectures": [ + "BertForPreTraining" + ], + "attention_probs_dropout_prob": 0.1, + "classifier_dropout": null, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 50, + "model_type": "bert", + "num_attention_heads": 8, + "num_hidden_layers": 4, + "pad_token_id": 0, + "position_embedding_type": "absolute", + "torch_dtype": "float32", + "transformers_version": "4.30.2", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 2000 +} diff --git a/bert/my_data_collator.py b/bert/my_data_collator.py new file mode 100644 index 0000000..6420dca --- /dev/null +++ b/bert/my_data_collator.py @@ -0,0 +1,269 @@ +from dataclasses import dataclass +from typing import (TYPE_CHECKING, Any, Dict, List, NamedTuple, Optional, + Sequence, Tuple, Union) + +import numpy as np +import tokenizers +import torch +from transformers import BatchEncoding + +EncodedInput = List[int] + + +@dataclass +class MyDataCollatorForPreTraining: + tokenizer: tokenizers.Tokenizer + mlm: bool = True + mlm_probability: float = 0.15 + pad_to_multiple_of: Optional[int] = None + + def __post_init__(self): + # print(self.mlm, self.tokenzier.token_to_id("[MASK]")) + # input() + if self.mlm and self.tokenizer.token_to_id("[MASK]") is None: + raise ValueError( + "This tokenizer does not have a mask token which is necessary for masked language modeling. " + "You should pass `mlm=False` to train on causal language modeling instead." + ) + + def __call__( + self, examples: List[Union[List[int], torch.Tensor, Dict[str, torch.Tensor]]], + ) -> Dict[str, torch.Tensor]: + # print(examples) + # Handle dict or lists with proper padding and conversion to tensor. + if isinstance(examples[0], (dict, BatchEncoding)): + batch = pad( + encoded_inputs=examples, + return_tensors="pt", + pad_to_multiple_of=self.pad_to_multiple_of, + ) + else: + batch = { + "input_ids": _collate_batch( + examples, self.tokenizer, pad_to_multiple_of=self.pad_to_multiple_of + ) + } + + # If special token mask has been preprocessed, pop it from the dict. + special_tokens_mask = batch.pop("special_tokens_mask", None) + if self.mlm: + batch["input_ids"], batch["labels"] = self.mask_tokens( + batch["input_ids"], special_tokens_mask=special_tokens_mask + ) + else: + batch["input_ids"] = torch.squeeze(batch["input_ids"], dim=0) + batch["token_type_ids"] = torch.squeeze(batch["token_type_ids"], dim=0) + return batch + + def mask_tokens( + self, inputs: torch.Tensor, special_tokens_mask: Optional[torch.Tensor] = None + ) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Prepare masked tokens inputs/labels for masked language modeling: 80% MASK, 10% random, 10% original. + """ + labels = inputs.clone() + # We sample a few tokens in each sequence for MLM training (with probability `self.mlm_probability`) + probability_matrix = torch.full(labels.shape, self.mlm_probability) + if special_tokens_mask is None: + special_tokens_mask = [ + self.tokenizer.get_special_tokens_mask( + val, already_has_special_tokens=True + ) + for val in labels.tolist() + ] + special_tokens_mask = torch.tensor(special_tokens_mask, dtype=torch.bool) + else: + special_tokens_mask = special_tokens_mask.bool() + + probability_matrix.masked_fill_(special_tokens_mask, value=0.0) + masked_indices = torch.bernoulli(probability_matrix).bool() + labels[~masked_indices] = -100 # We only compute loss on masked tokens + + # 80% of the time, we replace masked input tokens with tokenizer.mask_token ([MASK]) + indices_replaced = ( + torch.bernoulli(torch.full(labels.shape, 0.8)).bool() & masked_indices + ) + # inputs[indices_replaced] = self.tokenizer.convert_tokens_to_ids( + # self.tokenizer.mask_token + # ) + inputs[indices_replaced] = self.tokenizer.token_to_id("[MASK]") + + # 10% of the time, we replace masked input tokens with random word + indices_random = ( + torch.bernoulli(torch.full(labels.shape, 0.5)).bool() + & masked_indices + & ~indices_replaced + ) + random_words = torch.randint( + self.tokenizer.get_vocab_size(), labels.shape, dtype=torch.long + ) + inputs[indices_random] = random_words[indices_random] + + # The rest of the time (10% of the time) we keep the masked input tokens unchanged + return inputs, labels + + +def pad( + encoded_inputs: Union[ + BatchEncoding, + List[BatchEncoding], + Dict[str, EncodedInput], + Dict[str, List[EncodedInput]], + List[Dict[str, EncodedInput]], + ], + padding=True, + max_length: Optional[int] = None, + pad_to_multiple_of: Optional[int] = None, + return_attention_mask: Optional[bool] = None, + return_tensors=None, + verbose: bool = True, +) -> BatchEncoding: + """ + Pad a single encoded input or a batch of encoded inputs up to predefined length or to the max sequence length + in the batch. + Padding side (left/right) padding token ids are defined at the tokenizer level (with ``self.padding_side``, + ``self.pad_token_id`` and ``self.pad_token_type_id``) + .. note:: + If the ``encoded_inputs`` passed are dictionary of numpy arrays, PyTorch tensors or TensorFlow tensors, the + result will use the same type unless you provide a different tensor type with ``return_tensors``. In the + case of PyTorch tensors, you will lose the specific device of your tensors however. + Args: + encoded_inputs (:class:`~transformers.BatchEncoding`, list of :class:`~transformers.BatchEncoding`, :obj:`Dict[str, List[int]]`, :obj:`Dict[str, List[List[int]]` or :obj:`List[Dict[str, List[int]]]`): + Tokenized inputs. Can represent one input (:class:`~transformers.BatchEncoding` or :obj:`Dict[str, + List[int]]`) or a batch of tokenized inputs (list of :class:`~transformers.BatchEncoding`, `Dict[str, + List[List[int]]]` or `List[Dict[str, List[int]]]`) so you can use this method during preprocessing as + well as in a PyTorch Dataloader collate function. + Instead of :obj:`List[int]` you can have tensors (numpy arrays, PyTorch tensors or TensorFlow tensors), + see the note above for the return type. + padding (:obj:`bool`, :obj:`str` or :class:`~transformers.file_utils.PaddingStrategy`, `optional`, defaults to :obj:`True`): + Select a strategy to pad the returned sequences (according to the model's padding side and padding + index) among: + * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a + single sequence if provided). + * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the + maximum acceptable input length for the model if that argument is not provided. + * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of + different lengths). + max_length (:obj:`int`, `optional`): + Maximum length of the returned list and optionally padding length (see above). + pad_to_multiple_of (:obj:`int`, `optional`): + If set will pad the sequence to a multiple of the provided value. + This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability + >= 7.5 (Volta). + return_attention_mask (:obj:`bool`, `optional`): + Whether to return the attention mask. If left to the default, will return the attention mask according + to the specific tokenizer's default, defined by the :obj:`return_outputs` attribute. + `What are attention masks? <../glossary.html#attention-mask>`__ + return_tensors (:obj:`str` or :class:`~transformers.file_utils.TensorType`, `optional`): + If set, will return tensors instead of list of python integers. Acceptable values are: + * :obj:`'tf'`: Return TensorFlow :obj:`tf.constant` objects. + * :obj:`'pt'`: Return PyTorch :obj:`torch.Tensor` objects. + * :obj:`'np'`: Return Numpy :obj:`np.ndarray` objects. + verbose (:obj:`bool`, `optional`, defaults to :obj:`True`): + Whether or not to print more information and warnings. + """ + # If we have a list of dicts, let's convert it in a dict of lists + # We do this to allow using this method as a collate_fn function in PyTorch Dataloader + if isinstance(encoded_inputs, (list, tuple)) and isinstance( + encoded_inputs[0], (dict, BatchEncoding) + ): + encoded_inputs = { + key: [example[key] for example in encoded_inputs] + for key in encoded_inputs[0].keys() + } + + required_input = encoded_inputs["input_ids"] + + if not required_input: + if return_attention_mask: + encoded_inputs["attention_mask"] = [] + return encoded_inputs + + # If we have PyTorch/TF/NumPy tensors/arrays as inputs, we cast them as python objects + # and rebuild them afterwards if no return_tensors is specified + # Note that we lose the specific device the tensor may be on for PyTorch + + first_element = required_input[0] + if isinstance(first_element, (list, tuple)): + # first_element might be an empty list/tuple in some edge cases so we grab the first non empty element. + index = 0 + while len(required_input[index]) == 0: + index += 1 + if index < len(required_input): + first_element = required_input[index][0] + # At this state, if `first_element` is still a list/tuple, it's an empty one so there is nothing to do. + if not isinstance(first_element, (int, list, tuple)): + if isinstance(first_element, torch.Tensor): + return_tensors = "pt" if return_tensors is None else return_tensors + elif isinstance(first_element, np.ndarray): + return_tensors = "np" if return_tensors is None else return_tensors + else: + raise ValueError( + f"type of {first_element} unknown: {type(first_element)}. " + f"Should be one of a python, numpy, pytorch or tensorflow object." + ) + + for key, value in encoded_inputs.items(): + encoded_inputs[key] = to_py_obj(value) + + required_input = encoded_inputs["input_ids"] + if required_input and not isinstance(required_input[0], (list, tuple)): + return BatchEncoding(encoded_inputs, tensor_type=return_tensors) + + batch_size = len(required_input) + assert all( + len(v) == batch_size for v in encoded_inputs.values() + ), "Some items in the output dictionary have a different batch size than others." + + batch_outputs = {} + for i in range(batch_size): + inputs = dict((k, v[i]) for k, v in encoded_inputs.items()) + for key, value in inputs.items(): + if key not in batch_outputs: + batch_outputs[key] = [] + batch_outputs[key].append(value) + + return BatchEncoding(batch_outputs, tensor_type=return_tensors) + + +def _collate_batch(examples, tokenizer, pad_to_multiple_of: Optional[int] = None): + """Collate `examples` into a batch, using the information in `tokenizer` for padding if necessary.""" + # Tensorize if necessary. + if isinstance(examples[0], (list, tuple)): + examples = [torch.tensor(e, dtype=torch.long) for e in examples] + + # Check if padding is necessary. + length_of_first = examples[0].size(0) + are_tensors_same_length = all(x.size(0) == length_of_first for x in examples) + if are_tensors_same_length and ( + pad_to_multiple_of is None or length_of_first % pad_to_multiple_of == 0 + ): + return torch.stack(examples, dim=0) + + # If yes, check if we have a `pad_token`. + if tokenizer._pad_token is None: + raise ValueError( + "You are attempting to pad samples but the tokenizer you are using" + f" ({tokenizer.__class__.__name__}) does not have a pad token." + ) + + # Creating the full tensor and filling it with our data. + max_length = max(x.size(0) for x in examples) + if pad_to_multiple_of is not None and (max_length % pad_to_multiple_of != 0): + max_length = ((max_length // pad_to_multiple_of) + 1) * pad_to_multiple_of + result = examples[0].new_full([len(examples), max_length], tokenizer.pad_token_id) + for i, example in enumerate(examples): + if tokenizer.padding_side == "right": + result[i, : example.shape[0]] = example + else: + result[i, -example.shape[0] :] = example + return result + + +def to_py_obj(obj): + if isinstance(obj, torch.Tensor): + return obj.detach().cpu().tolist() + elif isinstance(obj, np.ndarray): + return obj.tolist() + else: + return obj diff --git a/bert/obtain_inst_vec.py b/bert/obtain_inst_vec.py new file mode 100644 index 0000000..7f3af0e --- /dev/null +++ b/bert/obtain_inst_vec.py @@ -0,0 +1,111 @@ +import os +import numpy as np +import tokenizers +import torch + +from transformers import ( + BatchEncoding, + BertConfig, + BertForPreTraining +) + +from .my_data_collator import MyDataCollatorForPreTraining +model_file = os.path.join("./bert/pytorch_model.bin") +tokenizer_file = os.path.join("./bert/tokenizer-inst.all.json") +config_file = os.path.join('./bert/bert.json') + +# from my_data_collator import MyDataCollatorForPreTraining +# model_file = os.path.join("./pytorch_model.bin") +# tokenizer_file = os.path.join("./tokenizer-inst.all.json") +# config_file = os.path.join('./bert.json') + + +def load_model(): + config = BertConfig.from_json_file(config_file) + model = BertForPreTraining(config) + state_dict = torch.load(model_file) + model.load_state_dict(state_dict) + model.eval() + + tokenizer = tokenizers.Tokenizer.from_file(tokenizer_file) + tokenizer.enable_padding( + pad_id=tokenizer.token_to_id("[PAD]"), pad_token="[PAD]", length=50 + ) + return model, tokenizer + + +def process_input(inst, tokenizer): + encoded_input = {} + if isinstance(inst, str): + # make a batch by myself + inst = [inst for _ in range(8)] + results = tokenizer.encode_batch(inst) + encoded_input["input_ids"] = [result.ids for result in results] + encoded_input["token_type_ids"] = [result.type_ids for result in results] + encoded_input["special_tokens_mask"] = [ + result.special_tokens_mask for result in results + ] + + # print(encoded_input["input_ids"]) + + # use `np` rather than `pt` in case of reporting of error + batch_output = BatchEncoding( + encoded_input, tensor_type="np", prepend_batch_axis=False, + ) + + # print(batch_output["input_ids"]) + + # NOTE: utilize the "special_tokens_mask", + # only work if the input consists of single instruction + length_mask = 1 - batch_output["special_tokens_mask"] + + data_collator = MyDataCollatorForPreTraining(tokenizer=tokenizer, mlm=False) + + model_input = data_collator([batch_output]) + + # print(model_input["input_ids"]) + + return model_input, length_mask + + +def generate_inst_vec(inst, method="mean"): + model, tokenizer = load_model() + + model_input, length_mask = process_input(inst, tokenizer) + length_mask = torch.from_numpy(length_mask).to(model_input["input_ids"].device) + + output = model(**model_input, output_hidden_states=True) + + if method == "cls": + if isinstance(inst, str): + return output.hidden_states[-1][0][0] + elif isinstance(inst, list): + return output.hidden_states[-1, :, 0, :] + elif method == "mean": + result = output.hidden_states[-1] * torch.unsqueeze(length_mask, dim=-1) + # print(result.shape) + if isinstance(inst, str): + result = torch.mean(result[0], dim=0) + elif isinstance(inst, list): + result = torch.mean(result, dim=1) + return result + elif method == "max": + result = output.hidden_states[-1] * torch.unsqueeze(length_mask, dim=-1) + # print(result.shape) + if isinstance(inst, str): + result = torch.max(result[0], dim=0) + elif isinstance(inst, list): + result = torch.max(result, dim=1) + return result + + +def bb2vec(inst): + tmp = generate_inst_vec(inst, method="mean") + return list(np.mean(tmp.detach().numpy(), axis=0)) + + +if __name__ == "__main__": + temp = bb2vec(['adc byte [ ebp - 0x74 ] cl','mov dh 0x79','adc eax 1']) + temp = list(temp) + print(temp) + diff --git a/bert/pytorch_model.bin b/bert/pytorch_model.bin new file mode 100644 index 0000000..f23b864 Binary files /dev/null and b/bert/pytorch_model.bin differ diff --git a/bert/tokenizer-inst.all.json b/bert/tokenizer-inst.all.json new file mode 100644 index 0000000..5060169 --- /dev/null +++ b/bert/tokenizer-inst.all.json @@ -0,0 +1,2147 @@ +{ + "version": "1.0", + "truncation": null, + "padding": { + "strategy": { + "Fixed": 50 + }, + "direction": "Right", + "pad_to_multiple_of": null, + "pad_id": 3, + "pad_type_id": 0, + "pad_token": "[PAD]" + }, + "added_tokens": [ + { + "id": 0, + "content": "[UNK]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "[CLS]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "[SEP]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 3, + "content": "[PAD]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 4, + "content": "[MASK]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": null, + "pre_tokenizer": { + "type": "Whitespace" + }, + "post_processor": { + "type": "TemplateProcessing", + "single": [ + { + "SpecialToken": { + "id": "[CLS]", + "type_id": 0 + } + }, + { + "Sequence": { + "id": "A", + "type_id": 0 + } + }, + { + "SpecialToken": { + "id": "[SEP]", + "type_id": 0 + } + } + ], + "pair": [ + { + "SpecialToken": { + "id": "[CLS]", + "type_id": 0 + } + }, + { + "Sequence": { + "id": "A", + "type_id": 0 + } + }, + { + "SpecialToken": { + "id": "[SEP]", + "type_id": 0 + } + }, + { + "Sequence": { + "id": "B", + "type_id": 1 + } + }, + { + "SpecialToken": { + "id": "[SEP]", + "type_id": 1 + } + } + ], + "special_tokens": { + "[CLS]": { + "id": "[CLS]", + "ids": [ + 1 + ], + "tokens": [ + "[CLS]" + ] + }, + "[SEP]": { + "id": "[SEP]", + "ids": [ + 2 + ], + "tokens": [ + "[SEP]" + ] + } + } + }, + "decoder": null, + "model": { + "type": "WordLevel", + "vocab": { + "[UNK]": 0, + "[CLS]": 1, + "[SEP]": 2, + "[PAD]": 3, + "[MASK]": 4, + "[": 5, + "]": 6, + "mov": 7, + "+": 8, + "qword": 9, + "rsp": 10, + "rax": 11, + "dword": 12, + "eax": 13, + "rcx": 14, + ".": 15, + "cmp": 16, + "rdx": 17, + "lea": 18, + "call": 19, + "fcn": 20, + "jmp": 21, + "ecx": 22, + "byte": 23, + "rbx": 24, + "ebp": 25, + "-": 26, + "je": 27, + "jne": 28, + "edx": 29, + "esi": 30, + "8": 31, + "rdi": 32, + "rsi": 33, + "test": 34, + "add": 35, + "push": 36, + "ebx": 37, + "0": 38, + "edi": 39, + "0x10": 40, + "1": 41, + "al": 42, + "rbp": 43, + "sub": 44, + "4": 45, + "r8": 46, + "xor": 47, + "0x20": 48, + "esp": 49, + "0x18": 50, + "0x28": 51, + "movzx": 52, + "0x30": 53, + "and": 54, + "r9": 55, + "2": 56, + "*": 57, + "0x40": 58, + "xmm0": 59, + "jbe": 60, + "word": 61, + "0xc": 62, + "0x38": 63, + "nop": 64, + "jae": 65, + "cl": 66, + "jb": 67, + "0x48": 68, + "r10": 69, + "inc": 70, + "0x50": 71, + "xmmword": 72, + "ja": 73, + "r8d": 74, + "dx": 75, + "or": 76, + "pop": 77, + "r11": 78, + "r14": 79, + "ret": 80, + ":": 81, + "0x58": 82, + "r12": 83, + "0x14": 84, + "0x60": 85, + "sym": 86, + "0x70": 87, + "imp": 88, + "dl": 89, + "r15": 90, + "bl": 91, + "str": 92, + "0x68": 93, + "imul": 94, + "0x80": 95, + "r13": 96, + "0x78": 97, + "shr": 98, + "movups": 99, + "r9d": 100, + "jl": 101, + "3": 102, + "0x1c": 103, + "00450a10": 104, + "0x005b0900": 105, + "shl": 106, + "jge": 107, + "dec": 108, + "0x88": 109, + "0x90": 110, + "ax": 111, + "004528a0": 112, + "jle": 113, + ")": 114, + "gs": 115, + "xmm1": 116, + "KERNEL32": 117, + "0xa0": 118, + "ah": 119, + "0x98": 120, + "0xffffffff": 121, + "0x24": 122, + "r0": 123, + "r1": 124, + "5": 125, + "dh": 126, + "7": 127, + "0x2c": 128, + "0xa8": 129, + "movabs": 130, + "ch": 131, + "r14d": 132, + "jg": 133, + "(": 134, + "outsb": 135, + "sil": 136, + "r10d": 137, + "sar": 138, + "r12d": 139, + "0xb0": 140, + "movsxd": 141, + "::": 142, + "0xc0": 143, + "0xb8": 144, + "0x34": 145, + "jns": 146, + "r15d": 147, + "movdqu": 148, + "0xd0": 149, + "xorps": 150, + "6": 151, + "movsd": 152, + "r8b": 153, + "r3": 154, + "r2": 155, + "l": 156, + "outsd": 157, + "0xa": 158, + "0x3c": 159, + "bt": 160, + "9": 161, + "r11d": 162, + "xmm2": 163, + "js": 164, + "0x3f": 165, + "jo": 166, + "cx": 167, + "sbb": 168, + "0xffffffffffffffff": 169, + "0x44": 170, + "0xc8": 171, + "0x100": 172, + "bh": 173, + "0xd8": 174, + "dil": 175, + "r13d": 176, + "0xf0": 177, + "0xf": 178, + "0042c5c0": 179, + "0x64": 180, + "0x1f": 181, + "r4": 182, + "es": 183, + "@(": 184, + "neg": 185, + "fs": 186, + "0x5c": 187, + "lock": 188, + "sete": 189, + "insb": 190, + "0x4c": 191, + "adc": 192, + "sp": 193, + "@": 194, + "ud2": 195, + "jnp": 196, + "0xe8": 197, + "0xd": 198, + "0x19": 199, + "0x17": 200, + "bx": 201, + "si": 202, + "0xe0": 203, + "xchg": 204, + "movsx": 205, + "r6": 206, + "0x54": 207, + "insd": 208, + "0x2e": 209, + "0x65": 210, + "0xf8": 211, + "r5": 212, + "xmm3": 213, + "0x6c": 214, + "0x61": 215, + "r9b": 216, + "0x74": 217, + "0x41": 218, + "0xb": 219, + "r7": 220, + "0x108": 221, + "di": 222, + "xmm6": 223, + "0x11": 224, + "0x16": 225, + "0042c650": 226, + "msvcrt": 227, + "0x118": 228, + "0xe": 229, + "0x110": 230, + "0x2f": 231, + "b": 232, + "0x6f": 233, + "0x29": 234, + "0x12": 235, + "0x22": 236, + "case": 237, + "0042cf60": 238, + "xmm4": 239, + "0x27": 240, + "rol": 241, + "0x138": 242, + "0x6e": 243, + "movdqa": 244, + "0x2d": 245, + "ldr": 246, + "cmovne": 247, + "0x200": 248, + "0x130": 249, + "not": 250, + "0xfffffffe": 251, + "r11b": 252, + "r15b": 253, + "0x120": 254, + "r14b": 255, + "0xff": 256, + "jno": 257, + "setne": 258, + "0x7c": 259, + "0x69": 260, + "xadd": 261, + "USER32": 262, + "jp": 263, + "0x13": 264, + "ymmword": 265, + "/": 266, + "0x128": 267, + "xmm5": 268, + "0x140": 269, + "r12b": 270, + "0x150": 271, + "int3": 272, + "0x1000": 273, + "movaps": 274, + "0x63": 275, + "mul": 276, + "cmove": 277, + "r10b": 278, + "..": 279, + "s": 280, + "0x39": 281, + "bf": 282, + "0xfffffffffffffffe": 283, + "0x15": 284, + "0x004ba480": 285, + "0x49": 286, + "0x2a": 287, + "0x47": 288, + "0x67": 289, + "xmm7": 290, + "0x21": 291, + "riz": 292, + "0x148": 293, + "0x2b": 294, + "0x320": 295, + "ror": 296, + "0x7f": 297, + "0x7fffffff": 298, + "0x73": 299, + "0x1a": 300, + "bp": 301, + "pxor": 302, + "cs": 303, + "0x400": 304, + "fld": 305, + "0x32": 306, + "0x31": 307, + "0x66": 308, + "ip": 309, + "0x158": 310, + "vtable": 311, + "0x75": 312, + "0x3d": 313, + "0x4a": 314, + "cmpxchg": 315, + "00409a40": 316, + "0x170": 317, + "st": 318, + "section": 319, + "0x1d8": 320, + "0xffff": 321, + "bpl": 322, + "0x160": 323, + "0x43": 324, + "vmovdqu": 325, + "w": 326, + "0x26": 327, + "fstp": 328, + "r13b": 329, + "div": 330, + "bra": 331, + "00409850": 332, + "0x72": 333, + "beq": 334, + "0x004fab60": 335, + "setb": 336, + "0042cc90": 337, + "dll_memset": 338, + "0x3a": 339, + "bts": 340, + "default": 341, + "0x57": 342, + "fp": 343, + "jsr": 344, + "0x1d": 345, + "0x45": 346, + "0x1e": 347, + "aesenc": 348, + "dll_GetLastError": 349, + "0x8000": 350, + "0x2000": 351, + "rep": 352, + "zero": 353, + "pushfd": 354, + "0x190": 355, + "t8": 356, + "0x005a0630": 357, + "std": 358, + "0x8c": 359, + "a0": 360, + "0x62": 361, + "0x3e": 362, + "eq": 363, + "0x104": 364, + "cmovg": 365, + "0x79": 366, + "0x4e": 367, + "0xb1": 368, + "0x1e8": 369, + "0x1b0": 370, + "mulsd": 371, + "0x1e0": 372, + "movss": 373, + "0x76": 374, + "ucomisd": 375, + "xmm8": 376, + "r8w": 377, + "0x84": 378, + "0x46": 379, + "0x80000000": 380, + "0x94": 381, + "0x77": 382, + "0x800": 383, + "0042cd90": 384, + "0x1a0": 385, + "0x5a": 386, + "lr": 387, + "cvtsi2sd": 388, + "cdq": 389, + "addsd": 390, + "0x198": 391, + "ymm0": 392, + "0xf4": 393, + "0x1a8": 394, + "0x10000": 395, + "0x1c8": 396, + "0x1b": 397, + "0x168": 398, + "0x1c0": 399, + "0x280": 400, + "ds": 401, + "ss": 402, + "0x310": 403, + "sb": 404, + "0x37": 405, + "0x1f8": 406, + "0xfffffffffffffade": 407, + "v1": 408, + "bne": 409, + "0x240": 410, + "0xfffffffd": 411, + "tst": 412, + "cmova": 413, + "MSVCRT": 414, + "004539c0": 415, + "0x250": 416, + "0x178": 417, + "0x1f0": 418, + "xmm9": 419, + "0x59": 420, + "0x25": 421, + "kernel32": 422, + "ra": 423, + "0x180": 424, + "v0": 425, + "0x300": 426, + "xmm10": 427, + "movq": 428, + "0x1b8": 429, + "0x230": 430, + "0x100000": 431, + "2x_": 432, + "0xb4": 433, + "0042ce00": 434, + "0x290": 435, + "gp": 436, + "pushal": 437, + "k1": 438, + "r12w": 439, + "0x210": 440, + "0x270": 441, + "0x51": 442, + "0x1320": 443, + "data": 444, + "0x004fc320": 445, + "a3": 446, + "0x248": 447, + "t0": 448, + "0x7a": 449, + "at": 450, + "0x2a0": 451, + "0x4f": 452, + "@-": 453, + "0x23": 454, + "0x71": 455, + "c": 456, + "ADVAPI32": 457, + "0x3b": 458, + "cmovae": 459, + "0x36": 460, + "0x258": 461, + "xmm11": 462, + "t9": 463, + "0xa4": 464, + "s0": 465, + "0x20000": 466, + "0x1d0": 467, + "0xfffff": 468, + "0x56": 469, + "0xfffd": 470, + "004508c0": 471, + "0x278": 472, + "cxx": 473, + "0x6d": 474, + "0x220": 475, + "0x00597158": 476, + "r9w": 477, + "0042a560": 478, + "0x208": 479, + "leave": 480, + "14001b640": 481, + "0x1000000": 482, + "vdsutil": 483, + "0x00000400": 484, + "0xe4": 485, + "0x55": 486, + "t7": 487, + "0x188": 488, + "ymm3": 489, + "0xc4": 490, + "r10w": 491, + "0x9c": 492, + "0x2c0": 493, + "s3": 494, + "sl": 495, + "movd": 496, + "t3": 497, + "0x00597150": 498, + "0x7d": 499, + "0x35": 500, + "0x5b": 501, + "0x2f0": 502, + "r15w": 503, + "0x005b075c": 504, + "lsl": 505, + "0x01": 506, + "0x288": 507, + "0x3e8": 508, + "cmovb": 509, + "0x42": 510, + "k0": 511, + "f": 512, + "0x005974d0": 513, + "out": 514, + "in": 515, + "0x4000": 516, + "0x5f": 517, + "ymm1": 518, + "a1": 519, + "0x81": 520, + "r14w": 521, + "stosd": 522, + "idiv": 523, + "a2": 524, + "0x298": 525, + "0x7b": 526, + "api_ms_win_downlevel_kernel32_l1_1_0": 527, + "s7": 528, + "0xfd": 529, + "0xac": 530, + "0x3a8": 531, + "0x005b34c0": 532, + "addiu": 533, + "0x33": 534, + "0x1ff": 535, + "seta": 536, + "0042bbb0": 537, + "0x140000000": 538, + "0x00596978": 539, + "0x00": 540, + "divsd": 541, + "0x2f8": 542, + "stc": 543, + "0x3e5": 544, + "cvttsd2si": 545, + "s1": 546, + "outsw": 547, + "0x7fff": 548, + "0x228": 549, + "t4": 550, + "clc": 551, + "cmovl": 552, + "addi": 553, + "0x2c8": 554, + "0xf9": 555, + "0x0059a1b0": 556, + "0x218": 557, + "0x2b8": 558, + "dll_CloseHandle": 559, + "aav": 560, + "s4": 561, + "xmm12": 562, + "0x458": 563, + "0x5d": 564, + "0043c670": 565, + "MFC42": 566, + "dll_void___cdecl_operator_delete_void____ptr64_": 567, + "0x260": 568, + "0x1ffff": 569, + "0x005b0760": 570, + "0xfffffffc": 571, + "0x5e": 572, + "0xfffffffffffffff8": 573, + "GDI32": 574, + "0x4b": 575, + "t1": 576, + "ymm2": 577, + "004451b0": 578, + "subsd": 579, + "btr": 580, + "0x2d0": 581, + "0x328": 582, + "0x6a": 583, + "0x4d": 584, + "xmm15": 585, + "0x40e7a8": 586, + "0xdc": 587, + "0xfffffff0": 588, + "lb": 589, + "0042cf10": 590, + "shld": 591, + "lfence": 592, + "0x238": 593, + "strb": 594, + "0xfc": 595, + "0xFF": 596, + "00482607": 597, + "0x12c": 598, + "0x53": 599, + "0x101": 600, + "s2": 601, + "psrldq": 602, + "0x40000000": 603, + "0x8007000e": 604, + "0x52": 605, + "t2": 606, + "0xbc": 607, + "wait": 608, + "cld": 609, + "0x7e": 610, + "0xcc": 611, + "extu": 612, + "0x49e4cc": 613, + "j": 614, + "hi": 615, + "r0r15": 616, + "0x00599eb8": 617, + "ymm4": 618, + "cmc": 619, + "jal": 620, + "pcmpeqb": 621, + "0x7fffffffffffffff": 622, + "0x2a8": 623, + "ljmp": 624, + "0xbf": 625, + "scasb": 626, + "0x268": 627, + "popal": 628, + "0x102": 629, + "0xe08": 630, + "0x800000000000": 631, + "{": 632, + "}": 633, + "14001242c": 634, + "dll_GetProcAddress": 635, + "arpl": 636, + "t6": 637, + "0xe00": 638, + "0042a6a0": 639, + "api_ms_win_core_errorhandling_l1_1_0": 640, + "movsb": 641, + "0x00597528": 642, + "0x10c": 643, + "bsf": 644, + "s5": 645, + "setl": 646, + "bsr": 647, + "0x460": 648, + "enter": 649, + "0x004c46c0": 650, + "0042a040": 651, + "CVdsService": 652, + "0x2b0": 653, + "t5": 654, + "stosb": 655, + "vzeroupper": 656, + "cmovle": 657, + "andi": 658, + "0x00584940": 659, + "movapd": 660, + "sltiu": 661, + "0x83": 662, + "lsr": 663, + "s6": 664, + "0x406f7d": 665, + "boost": 666, + "0x3b9aca00": 667, + "xmm13": 668, + "0x308": 669, + "rcr": 670, + "0x00597510": 671, + "0xd800": 672, + "retf": 673, + "beql": 674, + "0xfffffffffffffff0": 675, + "0x44d13d": 676, + "0x1400f5e38": 677, + "ori": 678, + "0x2d8": 679, + "slti": 680, + "setg": 681, + "0x478": 682, + "user32": 683, + "0xfffffffffffffffc": 684, + "0xe10": 685, + "ll": 686, + "0x2e0": 687, + "0x140040010": 688, + "0xec": 689, + "0xe5": 690, + "0x418": 691, + "0x489634": 692, + "xmm14": 693, + "0x7ff": 694, + "0x3a0": 695, + "00453730": 696, + "004544f0": 697, + "14000e6d8": 698, + "OLEAUT32": 699, + "0x14002a3b0": 700, + "int": 701, + "vmovntdq": 702, + "0047ddc0": 703, + "pmovmskb": 704, + "vmovdqa": 705, + "r11w": 706, + "0x86": 707, + "0x00597828": 708, + "CVssHardwareProviderWrapper": 709, + "lw": 710, + "0x4b0": 711, + "0x04": 712, + "0x514": 713, + "sw": 714, + "0xfde9": 715, + "bswap": 716, + "lbu": 717, + "0x49089b": 718, + "0xd4": 719, + "0x49d664": 720, + "0043c560": 721, + "dll_GetProcessHeap": 722, + "fadd": 723, + "lh": 724, + "loopne": 725, + "0xfe": 726, + "ldrb": 727, + "r13w": 728, + "dll_CoTaskMemFree": 729, + "0x3f8": 730, + "0x00597514": 731, + "0x468": 732, + "setbe": 733, + "0x40294a": 734, + "0x00590758": 735, + "dll_public": 736, + "pslldq": 737, + "0x004ba300": 738, + "0x237": 739, + "0040d420": 740, + "0xe9": 741, + "00407b90": 742, + "0xfffffffb": 743, + "0x10ffff": 744, + "0040bad0": 745, + "sh": 746, + "140001e97": 747, + "00411650": 748, + "movlpd": 749, + "0x005b0904": 750, + "0x103": 751, + "0x14009e838": 752, + "cvtss2sd": 753, + "0x6b": 754, + "0xd2": 755, + "lwl": 756, + "0x00000000": 757, + "exception_detail": 758, + "fmul": 759, + "0xe06d7363": 760, + "0x3ff": 761, + "0x1270": 762, + "dll_HeapAlloc": 763, + "0x124": 764, + "dll_LeaveCriticalSection": 765, + "setae": 766, + "dll_EnterCriticalSection": 767, + "0x00597520": 768, + "...": 769, + "0xffffffffffffffe0": 770, + "api_ms_win_core_com_l1_1_0": 771, + "0x005b0758": 772, + "orr": 773, + "fild": 774, + "0x80070057": 775, + "0x0059782c": 776, + "lodsb": 777, + "cpp": 778, + "0040c4e0": 779, + "0x005b1e28": 780, + "0042c820": 781, + "0x00587100": 782, + "0x40000": 783, + "jalx": 784, + "0x00590750": 785, + "0x5e4": 786, + "0x00597518": 787, + "0x80004005": 788, + "00402500": 789, + "_": 790, + "0x2e687361": 791, + "dll_free": 792, + "0x8000000000000000": 793, + "0x005b11a0": 794, + "0x114": 795, + "0x3b8": 796, + "0x401489": 797, + "0x400000": 798, + "0x410": 799, + "shrd": 800, + "rcl": 801, + "0x005b0750": 802, + "0x490476": 803, + "0x005b0774": 804, + "0xa3d70a3d70a3d70b": 805, + "pand": 806, + "0xfffffff8": 807, + "bnel": 808, + "0x4d0": 809, + "0042c940": 810, + "0xcccccccccccccccd": 811, + "bound": 812, + "0x408": 813, + "004400a0": 814, + "140006ed0": 815, + "0x1fff": 816, + "0040207f": 817, + "0042a850": 818, + "0xfff": 819, + "cmovs": 820, + "0xde8": 821, + "a6": 822, + "strh": 823, + "0x00584760": 824, + "ole32": 825, + "xori": 826, + "0040d400": 827, + "0x3d0": 828, + "0x5e0": 829, + "4x__": 830, + "0041ccec": 831, + "0xfff0": 832, + "0x005970d8": 833, + "vpxor": 834, + "0x11c": 835, + "0x636e7566": 836, + "sc": 837, + "xword": 838, + "14000a750": 839, + "14000e160": 840, + "lwr": 841, + "0x3c0": 842, + "0x005977a0": 843, + "00461fe0": 844, + "entry0": 845, + "0x1d4": 846, + "1400aafcc": 847, + "0x00596eb8": 848, + "VCRUNTIME140": 849, + "0x25c": 850, + "vpaddq": 851, + "1800020f0": 852, + "004592e0": 853, + "dll_HeapFree": 854, + "00409b30": 855, + "0x00599ea0": 856, + "0x3c8": 857, + "0x1258": 858, + "0x005975b8": 859, + "hlt": 860, + "rdata": 861, + "text": 862, + "0x02": 863, + "ldc1": 864, + "0x00596ec8": 865, + "lwc1": 866, + "0x318": 867, + "0xdc00": 868, + "0x29676564": 869, + "0x3d8": 870, + "0xb3": 871, + "lwc2": 872, + "0040c3b0": 873, + "0x340": 874, + "0x2338": 875, + "DLL_CString": 876, + "0xdf": 877, + "0x20c": 878, + "0x430": 879, + "0x80070000": 880, + "pshufd": 881, + "65": 882, + "0x00597578": 883, + "0x134": 884, + "0x154": 885, + "0x005978b0": 886, + "0x2000000": 887, + "0047e010": 888, + "dll": 889, + "0x005970d0": 890, + "sal": 891, + "00497b10": 892, + "swr": 893, + "0x005b1e20": 894, + "0x15c": 895, + "0x324": 896, + "004613a0": 897, + "dll_memcpy": 898, + "0xffffffe0": 899, + "fldz": 900, + "0x005b0820": 901, + "0x005b0be3": 902, + "0x174": 903, + "0x14c": 904, + "140002a78": 905, + "0x21c": 906, + "0xdf8": 907, + "0049de90": 908, + "0x00596eb0": 909, + "ymm5": 910, + "aaa": 911, + "lhu": 912, + "_n": 913, + "0xdddd": 914, + "0x00597130": 915, + "fmov": 916, + "vpmovmskb": 917, + "0042f350": 918, + "0x004c99a0": 919, + "0x005b34d0": 920, + "0x8888888888888889": 921, + "ldc2": 922, + "0x2710": 923, + "0x82": 924, + "0xdf0": 925, + "0x1cc": 926, + "0x350": 927, + "0x13c": 928, + "cmpsb": 929, + "0x6c797473": 930, + "0xa1": 931, + "0x330": 932, + "0x1328": 933, + "0x274": 934, + "0x49b1da": 935, + "140002a04": 936, + "0049a9d0": 937, + "vpcmpeqb": 938, + "0x1000000000000": 939, + "sdc1": 940, + "sahf": 941, + "sti": 942, + "0x3a4": 943, + "0x448": 944, + "0x510": 945, + "0x244": 946, + "0x00597120": 947, + "por": 948, + "0x00596ec0": 949, + "move": 950, + "0042d232": 951, + "swc1": 952, + "swc2": 953, + "0x4f0": 954, + "0xffffff80": 955, + "0x005b0905": 956, + "0x6a0": 957, + "sts": 958, + "swl": 959, + "dll_RegCloseKey": 960, + "00470060": 961, + "0x4e0": 962, + "0xfffffff7": 963, + "004504e0": 964, + "0x00596950": 965, + "0x338": 966, + "140015880": 967, + "0x00597138": 968, + "0x440": 969, + "0x005855e0": 970, + "api_ms_win_core_synch_l1_1_0": 971, + "0x005b0ce0": 972, + "0x00599f58": 973, + "0x144": 974, + "0x3a79616c": 975, + "0x80000": 976, + "0xb5": 977, + "api_ms_win_crt_heap_l1_1_0": 978, + "00452100": 979, + "0x30333a74": 980, + "0x2e8": 981, + "fst": 982, + "0x7fffffffffff": 983, + "dll_WriteFile": 984, + "00424120": 985, + "0x8b": 986, + "0x204": 987, + "0x4668e0": 988, + "0xfffffffffffffffd": 989, + "004521f0": 990, + "0x140142444": 991, + "vmulsd": 992, + "0x4550": 993, + "cache": 994, + "0x5e8": 995, + "0x989680": 996, + "1400158a0": 997, + "0xa9": 998, + "0xffffffffffffe000": 999, + "sdc2": 1000, + "0xe000": 1001, + "cmovns": 1002, + "0042b180": 1003, + "0047d0c0": 1004, + "00410cb0": 1005, + "0x005975e0": 1006, + "0x388": 1007, + "0x4000000": 1008, + "repne": 1009, + "00488b50": 1010, + "gdi32": 1011, + "0xea": 1012, + "0x234": 1013, + "0x381": 1014, + "0x407db8": 1015, + "fnstsw": 1016, + "0x214": 1017, + "140004a10": 1018, + "0x005b0888": 1019, + "0x488": 1020, + "0x004f8eb8": 1021, + "0x00597348": 1022, + "0x005975b0": 1023, + "loope": 1024, + "cmpsd": 1025, + "cdqe": 1026, + "cpuid": 1027, + "0x004e053e": 1028, + "0x005973e8": 1029, + "0x41880c": 1030, + "0x407f10": 1031, + "0x49d678": 1032, + "bnd": 1033, + "lds": 1034, + "0x4c0": 1035, + "0x005978b8": 1036, + "0x28646c69": 1037, + "0x00596f18": 1038, + "0x254": 1039, + "0x43bba9": 1040, + "0x180009298": 1041, + "pref": 1042, + "00443f53": 1043, + "0x344": 1044, + "0x004fab00": 1045, + "ymm6": 1046, + "00440440": 1047, + "0x408504": 1048, + "0x480": 1049, + "004436b0": 1050, + "14000d860": 1051, + "14001b330": 1052, + "0x105": 1053, + "0x4952ed": 1054, + "00489187": 1055, + "0042d8d0": 1056, + "0x202c": 1057, + "004465e0": 1058, + "004610c0": 1059, + "0x005968b0": 1060, + "0xcccc": 1061, + "0xfffffffffffffff4": 1062, + "0x4fd95c": 1063, + "0x404": 1064, + "aas": 1065, + "0x550": 1066, + "0x164": 1067, + "int1": 1068, + "0042a0a0": 1069, + "cli": 1070, + "00428080": 1071, + "00449070": 1072, + "0x005966a8": 1073, + "0x4a4fe2": 1074, + "0044b8d0": 1075, + "0x470": 1076, + "dll_FreeLibrary": 1077, + "scasd": 1078, + "0040ab90": 1079, + "00497980": 1080, + "0xff0": 1081, + "0xfffffffffffffff5": 1082, + "14000b400": 1083, + "0x19930520": 1084, + "0x00597350": 1085, + "0x1330": 1086, + "dll_MultiByteToWideChar": 1087, + "paddd": 1088, + "0x40d338": 1089, + "0x00596fb8": 1090, + "0x4726f0": 1091, + "movntps": 1092, + "0x570": 1093, + "00497e70": 1094, + "vpand": 1095, + "ymm12": 1096, + "1400ba280": 1097, + "00403b50": 1098, + "00426ab0": 1099, + "0x438": 1100, + "les": 1101, + "0x16c": 1102, + "0x450": 1103, + "0x800000": 1104, + "0x1370": 1105, + "0x1f4": 1106, + "0x3fffffff": 1107, + "0x47c": 1108, + "0x15180": 1109, + "lahf": 1110, + "0x004ba3c0": 1111, + "cwde": 1112, + "r0r0": 1113, + "0x004c14e0": 1114, + "1400122dc": 1115, + "00431590": 1116, + "0041dd10": 1117, + "0x4375c3": 1118, + "0xb6": 1119, + "0xc00": 1120, + "0x616d696e": 1121, + "00452230": 1122, + "0x2ea27ffb": 1123, + "004366b0": 1124, + "0x0058fec8": 1125, + "0x878": 1126, + "0x9a": 1127, + "0x00597858": 1128, + "0x005b1dd0": 1129, + "0x6e6f6622": 1130, + "0x005b1db8": 1131, + "api_ms_win_crt_runtime_l1_1_0": 1132, + "0x4a6128": 1133, + "0xc0000000": 1134, + "0xd04ae83d": 1135, + "00421320": 1136, + "0x00596f10": 1137, + "0x5a4d": 1138, + "00407d50": 1139, + "00410bc1": 1140, + "0x005974e8": 1141, + "0x005b0724": 1142, + "0x3b0": 1143, + "004242c0": 1144, + "0x00599ebc": 1145, + "0xe0ff5cb4": 1146, + "0040ac40": 1147, + "0x005975c8": 1148, + "dll_SetLastError": 1149, + "0x004ba400": 1150, + "popfd": 1151, + "0x005b34b0": 1152, + "14000dec0": 1153, + "0x004fb880": 1154, + "0x005974d8": 1155, + "0x005b34c8": 1156, + "0x3f0": 1157, + "0x4a3246": 1158, + "0x632e5444": 1159, + "0x186a0": 1160, + "0x3fff": 1161, + "0xaaaaaaaaaaaaaaab": 1162, + "0x398": 1163, + "140003684": 1164, + "lodsd": 1165, + "004029f6": 1166, + "0x16bc": 1167, + "0042da30": 1168, + "0xe1": 1169, + "0048263a": 1170, + "0x1228": 1171, + "0x7925028c": 1172, + "14001b724": 1173, + "0x23c": 1174, + "0x00582104": 1175, + "004405c0": 1176, + "dll_DeleteCriticalSection": 1177, + "14000163b": 1178, + "0x303a30303a37302d": 1179, + "dll_WideCharToMultiByte": 1180, + "0x005977f8": 1181, + "0x005b0860": 1182, + "0x3a1a0": 1183, + "vpmuludq": 1184, + "004020ca": 1185, + "pl": 1186, + "00436890": 1187, + "0x44921d": 1188, + "140011e68": 1189, + "0043aaa0": 1190, + "0x00597558": 1191, + "0x005973c0": 1192, + "0x005974e0": 1193, + "0x005b07a0": 1194, + "api_ms_win_core_registry_l1_1_0": 1195, + "0x41bdb5": 1196, + "loop": 1197, + "rsb": 1198, + "0x004de777": 1199, + "0x358": 1200, + "0x5d0": 1201, + "00411b60": 1202, + "0x00597560": 1203, + "0x880": 1204, + "ymm11": 1205, + "004403c0": 1206, + "0x4983f8": 1207, + "0x20000000": 1208, + "r0r2": 1209, + "api_ms_win_core_winrt_string_l1_1_0": 1210, + "dll_malloc": 1211, + "ymm13": 1212, + "0040af40": 1213, + "0x005973d0": 1214, + "0044d0c0": 1215, + "daa": 1216, + "0x418852": 1217, + "gt": 1218, + "0x005a0678": 1219, + "0xdfff": 1220, + "0044aae0": 1221, + "0x487bea": 1222, + "dll_Sleep": 1223, + "0x1248": 1224, + "0x2077656e": 1225, + "0x49e06f": 1226, + "0x004b9b80": 1227, + "0x898": 1228, + "00402f8a": 1229, + "0x005b34b8": 1230, + "0x6a8": 1231, + "00454410": 1232, + "0x004d5320": 1233, + "0x111": 1234, + "0x00596ff8": 1235, + "0x224": 1236, + "0041fe40": 1237, + "0x005975d8": 1238, + "WS2_32": 1239, + "aam": 1240, + "0x00597508": 1241, + "0x6b626577": 1242, + "0047e960": 1243, + "0x00596698": 1244, + "0x380": 1245, + "0xfb": 1246, + "0xffff0000": 1247, + "_0x_x": 1248, + "0x00596858": 1249, + "DLL_GetLastError": 1250, + "cmovbe": 1251, + "0x690": 1252, + "fcomp": 1253, + "ymm10": 1254, + "0x004cb9a0": 1255, + "0x888": 1256, + "prefetchnta": 1257, + "d0": 1258, + "fdiv": 1259, + "0x2pc": 1260, + "0x6b0": 1261, + "0x8a8": 1262, + "CVdsVolume": 1263, + "0x401410": 1264, + "0x08": 1265, + "0x46d886": 1266, + "0x19930522": 1267, + "0x24c": 1268, + "00421550": 1269, + "0x005973d8": 1270, + "0x870": 1271, + "0043a7d0": 1272, + "1400adbe0": 1273, + "exts": 1274, + "0x00597530": 1275, + "0x4e75": 1276, + "0x746f622d": 1277, + "140099904": 1278, + "00484920": 1279, + "0043d6f0": 1280, + "0x005975bc": 1281, + "0x00597ea0": 1282, + "0x636f6c62": 1283, + "dll__errno": 1284, + "0x004d85e0": 1285, + "0x005b0704": 1286, + "0x449d76": 1287, + "0x7ffff8": 1288, + "0x00597868": 1289, + "0x119": 1290, + "0x40e3d8": 1291, + "0xfffffff4": 1292, + "2x__": 1293, + "fistp": 1294, + "004067d0": 1295, + "0x401118": 1296, + "shlr8": 1297, + "0x005974f8": 1298, + "0x005b0be2": 1299, + "00470b80": 1300, + "0x005978d0": 1301, + "0x1004": 1302, + "8x__": 1303, + "0x005b0bee": 1304, + "0x005b15a0": 1305, + "0x03": 1306, + "0x21000": 1307, + "0x40da16": 1308, + "0x45c": 1309, + "0x4a0": 1310, + "0x6000": 1311, + "0xffffffd0": 1312, + "0x005968d0": 1313, + "0x005973f0": 1314, + "0x1001": 1315, + "0x68676965": 1316, + "0x85": 1317, + "0042c6c0": 1318, + "004712f0": 1319, + "0x0059a3c8": 1320, + "0x005b0764": 1321, + "0x1000193": 1322, + "0x1040": 1323, + "0x19930521": 1324, + "0x420": 1325, + "0042c870": 1326, + "0x00000401": 1327, + "0043a090": 1328, + "00445a00": 1329, + "0x00599f40": 1330, + "0xfbe8": 1331, + "api_ms_win_core_file_l1_1_0": 1332, + "0x00597850": 1333, + "0x19c": 1334, + "0x44847e": 1335, + "0x1220": 1336, + "0x4015cb": 1337, + "dll_SetUnhandledExceptionFilter": 1338, + "xlatb": 1339, + "0x437773": 1340, + "0x442b79": 1341, + "00404a14": 1342, + "0x407c2d": 1343, + "0x447001": 1344, + "0x46f8e0": 1345, + "0x6f636564": 1346, + "00459270": 1347, + "0x005b0bea": 1348, + "0x00597250": 1349, + "0x004e08b1": 1350, + "0x10000000": 1351, + "0x1278": 1352, + "api_ms_win_crt_stdio_l1_1_0": 1353, + "004037c0": 1354, + "00449ea0": 1355, + "0x26c": 1356, + "0xffffffef": 1357, + "0x3b303a74": 1358, + "0x432819": 1359, + "0x49bc94": 1360, + "0x67696c61": 1361, + "0047a7e0": 1362, + "0x370": 1363, + "0x87": 1364, + "0040c410": 1365, + "0x00593fe0": 1366, + "0x20b": 1367, + "14001244c": 1368, + "1400d4cec": 1369, + "0x140025b0c": 1370, + "0x439af2": 1371, + "0xfa0": 1372, + "dll_LoadLibraryA": 1373, + "0x890": 1374, + "0x005b1e18": 1375, + "0x0059c700": 1376, + "0x439a91": 1377, + "dll_VdsTraceEx": 1378, + "0x0058fec0": 1379, + "0x00596ff0": 1380, + "0x4r15": 1381, + "0x500": 1382, + "00450840": 1383, + "0x004faa80": 1384, + "0x005967a8": 1385, + "ucomiss": 1386, + "0xf1": 1387, + "0x43b231": 1388, + "0x49eb38": 1389, + "0x4a0379": 1390, + "client": 1391, + "stmxcsr": 1392, + "0049b4a0": 1393, + "1400140f0": 1394, + "0x232": 1395, + "0x49c3c2": 1396, + "14000f940": 1397, + "char_traits_char__": 1398, + "pshufb": 1399, + "0x11d": 1400, + "1400b96e4": 1401, + "CVdsDiskLun": 1402, + "00408e70": 1403, + "0xc9": 1404, + "0x0059c760": 1405, + "0x1260": 1406, + "0x402145": 1407, + "0x428": 1408, + "char_traits_char___class_std": 1409, + "00434d00": 1410, + "0x140042610": 1411, + "0x750": 1412, + "dll_GetCurrentThreadId": 1413, + "game": 1414, + "install": 1415, + "0x140025a50": 1416, + "0x47b982": 1417, + "0x6f2d6e69": 1418, + "api_ms_win_crt_string_l1_1_0": 1419, + "004317b0": 1420, + "0x140025c4a": 1421, + "0x4a8": 1422, + "fsub": 1423, + "0x1388": 1424, + "0x40218d": 1425, + "0x8a": 1426, + "aad": 1427, + "0x47b9b3": 1428, + "0x005b0838": 1429, + "0x558": 1430, + "exe": 1431, + "0x00596fb0": 1432, + "004400d0": 1433, + "0x1450": 1434, + "0x740": 1435, + "0xef": 1436, + "24": 1437, + "0x4b8": 1438, + "0x22c": 1439, + "0x490": 1440, + "004722d0": 1441, + "0x005968a0": 1442, + "0x6e6f6974": 1443, + "0xc000008e": 1444, + "0042c8c0": 1445, + "0xaaaaaaab": 1446, + "fldcw": 1447, + "0x004e08a0": 1448, + "spc": 1449, + "00432f20": 1450, + "0x005b0ce8": 1451, + "0x20019": 1452, + "dll_VdsTraceW": 1453, + "0040cbc0": 1454, + "0x10b": 1455, + "0xca": 1456, + "0x005974c8": 1457, + "0x005977f0": 1458, + "vmovsd": 1459, + "00412280": 1460, + "0044ac50": 1461, + "0x004ba500": 1462, + "0x4a3a4e": 1463, + "0044aed0": 1464, + "00494db0": 1465, + "dll_SysStringLen": 1466, + "0040e870": 1467, + "0x8f": 1468, + "vaddsd": 1469, + "00402080": 1470, + "0041e310": 1471, + "jecxz": 1472, + "00426a20": 1473, + "0048205b": 1474, + "0x16b8": 1475, + "0x472c42": 1476, + "0x489e01": 1477, + "0x4e8": 1478, + "0xfffffffffff00000": 1479, + "0xFC": 1480, + "1400c50b8": 1481, + "DLL_GetProcAddress": 1482, + "pr": 1483, + "0x00596828": 1484, + "00429900": 1485, + "subs": 1486, + "0x441846": 1487, + "0x4c8": 1488, + "0xc0000091": 1489, + "0002c880": 1490, + "00433e50": 1491, + "0x331": 1492, + "0xfffffff9": 1493, + "0x004e526e": 1494, + "0x4641a4": 1495, + "00446c30": 1496, + "00468360": 1497, + "0x005b0bef": 1498, + "0x41dbe8": 1499, + "00402f90": 1500, + "00446190": 1501, + "0049b0e0": 1502, + "0x2d656e69": 1503, + "SHLWAPI": 1504, + "dll_strlen": 1505, + "0x43b772": 1506, + "0x6f6c622d": 1507, + "0x9f": 1508, + "movne": 1509, + "0x004e0ea0": 1510, + "DLL_CloseHandle": 1511, + "mac": 1512, + "0x004c1360": 1513, + "fiadd": 1514, + "00445f60": 1515, + "0x005978a8": 1516, + "0x348": 1517, + "0x41e065": 1518, + "00438dd0": 1519, + "0x407026": 1520, + "0x69666f72706d656d": 1521, + "0xad": 1522, + "WININET": 1523, + "andpd": 1524, + "0041de90": 1525, + "0x4029b7": 1526, + "0x45c11f": 1527, + "0x48989e": 1528, + "fr0": 1529, + "004309e0": 1530, + "00440d30": 1531, + "0x004e13b6": 1532, + "0x00596958": 1533, + "0x00597830": 1534, + "0x404828": 1535, + "1000767c0": 1536, + "00446390": 1537, + "0x004e7858": 1538, + "0x005977a8": 1539, + "0x489a9b": 1540, + "0x4395d8": 1541, + "0x4d8": 1542, + "004215b0": 1543, + "0x005a0680": 1544, + "0x49c3a3": 1545, + "0x40b2e3": 1546, + "0x47d53f": 1547, + "00436ad0": 1548, + "0x00597900": 1549, + "0x30313a74": 1550, + "0x8040": 1551, + "api_ms_win_core_heap_l1_1_0": 1552, + "dll_void___cdecl_operator_delete_void__": 1553, + "00411e80": 1554, + "0045a5b0": 1555, + "0x8d": 1556, + "punpckldq": 1557, + "0x00597160": 1558, + "0x00597170": 1559, + "0x112": 1560, + "cqo": 1561, + "dll_GetTickCount": 1562, + "0044d300": 1563, + "0047e800": 1564, + "0x004f8ef0": 1565, + "0x005968b8": 1566, + "0x005a0620": 1567, + "0x43b573": 1568, + "0x49b8b1": 1569, + "0x4a2582": 1570, + "0043452a": 1571, + "0049b180": 1572, + "0x43974f": 1573, + "00467d10": 1574, + "0x00596830": 1575, + "0x107": 1576, + "0x2e656d69746e7572": 1577, + "UPX1": 1578, + "ldm": 1579, + "0x005821e8": 1580, + "0x00597820": 1581, + "0x434": 1582, + "r0r1": 1583, + "00446050": 1584, + "0047ace0": 1585, + "0x004ba4c0": 1586, + "0x00596890": 1587, + "0x00597840": 1588, + "0x005b0718": 1589, + "0x005b0be7": 1590, + "0x140051cd0": 1591, + "0xb31a546d": 1592, + "0x00597890": 1593, + "0x402244": 1594, + "0x4395f3": 1595, + "0x47d538": 1596, + "0x528": 1597, + "dll_GetModuleHandleA": 1598, + "0x00597000": 1599, + "0x00599ee0": 1600, + "0x495475": 1601, + "api_ms_win_security_base_l1_1_0": 1602, + "0x00596f58": 1603, + "0x005b07b0": 1604, + "0x005b0be6": 1605, + "0x42c406": 1606, + "0x4916f0": 1607, + "0x80004003": 1608, + "0xbd4ad792": 1609, + "api_ms_win_core_libraryloader_l1_2_0": 1610, + "sfence": 1611, + "00403b00": 1612, + "00416bc8": 1613, + "004377d0": 1614, + "0x005b0808": 1615, + "0x47eddc": 1616, + "0x4a4652": 1617, + "0x8000ffff": 1618, + "dll_GetCurrentProcess": 1619, + "00406314": 1620, + "0045a010": 1621, + "004795f0": 1622, + "0x00596840": 1623, + "0x2723a30d96da1399": 1624, + "psrlq": 1625, + "004301c0": 1626, + "0047d220": 1627, + "0x00596e38": 1628, + "0x00596f98": 1629, + "0x005b0be0": 1630, + "0x005b2e40": 1631, + "0x14009c028": 1632, + "dll_CreateFileW": 1633, + "fld1": 1634, + "0x005978e0": 1635, + "0x005b07e0": 1636, + "0xc0000409": 1637, + "dll_GetModuleHandleW": 1638, + "paddq": 1639, + "00449a20": 1640, + "0044b1d0": 1641, + "0044c160": 1642, + "0045da00": 1643, + "0x004e7848": 1644, + "0x314": 1645, + "0x548": 1646, + "0x8b0": 1647, + "0xc998": 1648, + "14000d874": 1649, + "00449c90": 1650, + "0x005968e0": 1651, + "00405600": 1652, + "00409410": 1653, + "0x005821f0": 1654, + "0x005b0880": 1655, + "ble": 1656, + "cmovge": 1657, + "comisd": 1658, + "00442f70": 1659, + "00443d40": 1660, + "0x459afb": 1661, + "0x4904ff": 1662, + "0x80000001": 1663, + "0x49113a": 1664, + "0x696e": 1665, + "0xbbad4102": 1666, + "0045f6c0": 1667, + "1400021b2": 1668, + "fnstcw": 1669, + "0x00597860": 1670, + "0x005b0bed": 1671, + "0x1400f0e70": 1672, + "0x498651": 1673, + "0x4a02c4": 1674, + "0x580": 1675, + "0x6b8": 1676, + "00474c10": 1677, + "0x00596878": 1678, + "0x414": 1679, + "0x4599bb": 1680, + "0x5b4": 1681, + "0x963f9bff": 1682, + "fcom": 1683, + "0x005b0f80": 1684, + "0x44231b": 1685, + "0x4709a5": 1686, + "0x47b719": 1687, + "0x74c": 1688, + "0xb7": 1689, + "0xea38ec9079f01541": 1690, + "0x10000000001": 1691, + "0x3000": 1692, + "00409bc0": 1693, + "0x47bbcc": 1694, + "140013c4c": 1695, + "das": 1696, + "0x005b09c0": 1697, + "dll_WaitForSingleObject": 1698, + "dll_lstrlenA": 1699, + "MSVCR100": 1700, + "adcx": 1701, + "filesystem": 1702, + "0045fbf0": 1703, + "0x005966b0": 1704, + "0x21040": 1705, + "0xc994": 1706, + "00404068": 1707, + "00443be0": 1708, + "0x360": 1709, + "0xb0c23ed3": 1710, + "salc": 1711, + "0x00597b80": 1712, + "0x07": 1713, + "0x140002ef9": 1714, + "0x06": 1715, + "blt": 1716, + "0043a710": 1717, + "0x16d": 1718, + "0x20a": 1719, + "0x461134": 1720, + "0040d9a0": 1721, + "0046c740": 1722, + "0x004c15a0": 1723, + "0x47bd4a": 1724, + "0x6c612d6c": 1725, + "fpul": 1726, + "0044b9c0": 1727, + "0x004b9c00": 1728, + "0x40c4ab": 1729, + "0x97": 1730, + "dll_CreateFileA": 1731, + "0x005b0d28": 1732, + "0x860": 1733, + "0xffffffc0": 1734, + "004430f0": 1735, + "0x412c68": 1736, + "cvtsd2ss": 1737, + "004040a0": 1738, + "00423890": 1739, + "004590e0": 1740, + "0048e3a0": 1741, + "0x140012f92": 1742, + "004218e0": 1743, + "0x00597260": 1744, + "0x1218": 1745, + "0x7ff00000": 1746, + "0x99": 1747, + "0045f1f0": 1748, + "0048c5f0": 1749, + "0x005966c0": 1750, + "0x005b07a8": 1751, + "0x3000000": 1752, + "0x40702c": 1753, + "00407c90": 1754, + "00431060": 1755, + "0049b860": 1756, + "0x80000003": 1757, + "dll_GetEnhMetaFileA": 1758, + "0x402a3a": 1759, + "0x41094f": 1760, + "0047d130": 1761, + "0x41ddf8": 1762, + "0x4ec0": 1763, + "0042ea40": 1764, + "0x004c2b40": 1765, + "0x409930": 1766, + "0x41b695": 1767, + "0xff00": 1768, + "00408f80": 1769, + "0x42f607": 1770, + "0x8e": 1771, + "0x93": 1772, + "0xfffffff5": 1773, + "0x200000": 1774, + "0x213": 1775, + "0x4055e9": 1776, + "h": 1777, + "0041e8f0": 1778, + "0042edf0": 1779, + "004826df": 1780, + "004972b0": 1781, + "0x868": 1782, + "1400c20a0": 1783, + "fxch": 1784, + "ldc": 1785, + "stosw": 1786, + "0040be70": 1787, + "0041b1b0": 1788, + "00479ad0": 1789, + "0x00597110": 1790, + "0x416c14": 1791, + "dll_GetModuleFileNameA": 1792, + "into": 1793, + "00403f20": 1794, + "0040c6f0": 1795, + "0040df50": 1796, + "0043a030": 1797, + "0x426c2d": 1798, + "exception": 1799, + "00405590": 1800, + "00420c20": 1801, + "00486300": 1802, + "0x004d8340": 1803, + "0x6574": 1804, + "0045a070": 1805, + "00498cb0": 1806, + "0x106": 1807, + "0xffffffc9": 1808, + "fsubr": 1809, + "repz": 1810, + "0043cf20": 1811, + "0x209": 1812, + "api_ms_win_core_sysinfo_l1_1_0": 1813, + "0040add0": 1814, + "0x004e0a9e": 1815, + "0x27c": 1816, + "__1": 1817, + "lcall": 1818, + "00452080": 1819, + "0046c580": 1820, + "0x004c1720": 1821, + "0xf4240": 1822, + "0002c960": 1823, + "0041b490": 1824, + "004679c0": 1825, + "0047a1c0": 1826, + "0x216": 1827, + "0x2868": 1828, + "0xfde8": 1829, + "00412490": 1830, + "004693f0": 1831, + "0x00597340": 1832, + "0x3b313a74": 1833, + "api_ms_win_core_handle_l1_1_0": 1834, + "00438660": 1835, + "0046f770": 1836, + "0x004d2140": 1837, + "0x1280": 1838, + "0x43c5e7": 1839, + "0x47bd60": 1840, + "0x0A": 1841, + "0x488054": 1842, + "0xc22e450672894ab7": 1843, + "0x005859e0": 1844, + "0x00597420": 1845, + "0x264": 1846, + "0xfffffffffffffff6": 1847, + "vpsrlq": 1848, + "00401bc0": 1849, + "004721a0": 1850, + "0x00597258": 1851, + "0x109": 1852, + "0x45992c": 1853, + "00405a0c": 1854, + "0x40c": 1855, + "0xfff8": 1856, + "00411bc0": 1857, + "0x41d0e7": 1858, + "0xe18": 1859, + "14002db00": 1860, + "ymm14": 1861, + "004409f0": 1862, + "0046ffc0": 1863, + "0x447e09": 1864, + "0x49d005": 1865, + "0x49d012": 1866, + "0x858": 1867, + "0xc0000093": 1868, + "aris": 1869, + "0x004c1420": 1870, + "0x17c": 1871, + "0x8000000": 1872, + "140001d61": 1873, + "0x004c16c0": 1874, + "0x005969c0": 1875, + "MSVBVM60": 1876, + "shll8": 1877, + "0x004dea17": 1878, + "0x4104bf": 1879, + "0x76c": 1880, + "0xc000008f": 1881, + "dll_GetEnhMetaFileW": 1882, + "0x194": 1883, + "004313f0": 1884, + "00438730": 1885, + "0x004de762": 1886, + "0x14009e848": 1887, + "0x29303032": 1888, + "0x45c0bc": 1889, + "0xb9": 1890, + "0x47b6e8": 1891, + "0x47c999": 1892, + "0xf7": 1893, + "004012b0": 1894, + "004439f0": 1895, + "0xF8": 1896, + "0xb2": 1897, + "movt": 1898, + "00461480": 1899, + "0x004c7740": 1900, + "SHELL32": 1901, + "dll_SendMessageA": 1902, + "0x004c1600": 1903, + "0x390": 1904, + "0x67696568": 1905, + "0x7ffffffe": 1906, + "1400085a4": 1907, + "00411be0": 1908, + "0x459a6c": 1909, + "0x49b511": 1910, + "0x656c7275": 1911, + "140007004": 1912, + "DLL_VirtualAlloc": 1913, + "fisttp": 1914, + "00487590": 1915, + "004a38f0": 1916, + "0x0059c688": 1917, + "0xcccccccd": 1918, + "dll_GetCurrentProcessId": 1919, + "punpcklbw": 1920, + "00411aa0": 1921, + "00471a90": 1922, + "004a25b0": 1923, + "0x140046a00": 1924, + "0x1400ac010": 1925, + "0x30000": 1926, + "0x508": 1927, + "0xffffffffffffff80": 1928, + "00436b40": 1929, + "0x005970f0": 1930, + "0x1003": 1931, + "0x41c": 1932, + "0x004deb66": 1933, + "0x1fffffff": 1934, + "0x36312e30": 1935, + "movnti": 1936, + "0040d6d0": 1937, + "004177c0": 1938, + "004985c0": 1939, + "0x6f0": 1940, + "CppMicroServices4": 1941, + "0042a4a0": 1942, + "0x004cebe0": 1943, + "0x005b2e50": 1944, + "palignr": 1945, + "0040ea80": 1946, + "0042a100": 1947, + "0x005978c0": 1948, + "0x78657427": 1949, + "moveq": 1950, + "0x004c17e0": 1951, + "0x005967e0": 1952, + "0xffffffffffffffc0": 1953, + "dll_EncodePointer": 1954, + "pextrw": 1955, + "0042a8a0": 1956, + "0x46aa23": 1957, + "0x4f8": 1958, + "0x748": 1959, + "r0r8": 1960, + "0x4385be": 1961, + "0x441dad": 1962, + "0x7ff0": 1963, + "0x7ffff800000": 1964, + "movea": 1965, + "00425e70": 1966, + "0042b580": 1967, + "0x100000004": 1968, + "0x140042a10": 1969, + "0x4012c4": 1970, + "0x412a42": 1971, + "0x89": 1972, + "14": 1973, + "140002d29": 1974, + "cmn": 1975, + "00429aa0": 1976, + "00438490": 1977, + "0x004de768": 1978, + "0x00596848": 1979, + "0x00596ae0": 1980, + "0x005b09d0": 1981, + "0x2820": 1982, + "0x40445e": 1983, + "0x404a64": 1984, + "0x449dea": 1985, + "0x491e69": 1986, + "0x4f4": 1987, + "14000bd60": 1988, + "00415d90": 1989, + "00426fa0": 1990, + "004597b0": 1991, + "0047e140": 1992, + "0048f370": 1993, + "0x19a": 1994, + "0x43cb4b": 1995, + "dll_SetFilePointer": 1996, + "00415900": 1997, + "0045a380": 1998, + "0x005905c0": 1999 + }, + "unk_token": "[UNK]" + } +} \ No newline at end of file diff --git a/exe2json.py b/exe2json.py index 9765717..8ca3042 100644 --- a/exe2json.py +++ b/exe2json.py @@ -1,11 +1,36 @@ -import os import r2pipe -import re import hashlib -import log_utils +from my_utils import * import json +# 基础块抽取 +from bert.obtain_inst_vec import bb2vec +from tqdm import tqdm +import numpy as np +import os +# 禁用分词器多线程 +os.environ["TOKENIZERS_PARALLELISM"] = "false" +ret_trap_opcode_family = ["ret", "hlt", "int3", "ud2"] + + +def extract_opcode(disasm_text): + """ + 从反汇编文本中提取操作码和操作数 + 正则表达式用于匹配操作码和操作数,考虑到操作数可能包含空格和逗号 + 将操作码与操作数转化为bert模型输入 + """ + op_list = disasm_text.split(' ') + res = [] + for item in op_list: + item = item.strip().replace(',', '') + if '[' in item: + res.append('[') + res.append(item.replace('[', '').replace(']', '')) + if ']' in item: + res.append(']') + return ' '.join(res) + def calc_sha256(file_path): with open(file_path, 'rb') as f: bytes = f.read() @@ -13,57 +38,88 @@ def calc_sha256(file_path): sha256 = sha256obj.hexdigest() return sha256 -def extract_opcode(disasm_text): - """ - 从反汇编文本中提取操作码和操作数 - 正则表达式用于匹配操作码和操作数,考虑到操作数可能包含空格和逗号 - """ - match = re.search(r"^\s*(\S+)(?:\s+(.*))?$", disasm_text) - if match: - opcode = match.group(1) - # operands_str = match.group(2) if match.group(2) is not None else "" - # split_pattern = re.compile(r",(?![^\[]*\])") # 用于切分操作数的正则表达式 - # operands = split_pattern.split(operands_str) - # return opcode, [op.strip() for op in operands if op.strip()] - return opcode - return "" -def get_graph_cfg_r2pipe(r2pipe_open): +def get_graph_cfg_r2pipe(r2pipe_open, file_path): # CFG提取 acfg_item = [] try: # 获取函数列表 function_list = r2pipe_open.cmdj("aflj") - - for function in function_list: - # 局部函数内的特征提取 node_list = [] edge_list = [] temp_edge_list = [] - block_list = r2pipe_open.cmdj("afbj @" + str(function['offset'])) - block_number = len(block_list) block_feature_list = [] - for block in block_list: - node_list.append(block["addr"]) + # 基本快块列表 + block_list = r2pipe_open.cmdj("afbj @" + str(function['offset'])) + # 获取基本块数量 + block_number = len(block_list) + + for block in block_list: + # 基础块内的语句 + block_addr = block["addr"] + block_Statement = [] + + node_list.append(block["addr"]) # 获取基本块的反汇编指令 disasm = r2pipe_open.cmdj("pdj " + str(block["ninstr"]) + " @" + str(block["addr"])) if disasm: - for op in disasm: - if op["type"] == "invalid": - continue - # TODO :这里需要处理指令的特征提取 - block_feature = '' - block_feature_list.append(block_feature) + for op_index, op in enumerate(disasm): + # 提取操作码并转换为bert模型输入格式 + block_Statement.append(extract_opcode(op["disasm"])) + # 处理跳转码并构建cfg + if 'jump' in op: + if op['jump'] == 0: + if op_index != len(disasm) - 1: + node_list.append(disasm[op_index + 1]['offset']) - # 处理跳转指令 - if "jump" in op and op["jump"] != 0: - temp_edge_list.append([block["addr"], op["jump"]]) + elif op['type'] == 'jmp': + temp_edge_list.append([block["addr"], op['jump']]) + if op_index != len(disasm) - 1: + node_list.append(disasm[op_index + 1]['offset']) + + elif op['type'] == 'cjmp': + temp_edge_list.append([block["addr"], op['jump']]) + if op_index == len(disasm) - 1: + temp_edge_list.append([block_addr, op['jump']]) + + else: + temp_edge_list.append([block_addr, disasm[op_index + 1]["offset"]]) + node_list.append(disasm[op_index + 1]["offset"]) + elif op['type'] == 'call': + temp_edge_list.append([block_addr, op["jump"]]) + temp_edge_list.append([op["jump"], block_addr]) + if op_index == len(disasm) - 1: + temp_edge_list.append([block_addr, op["offset"] + op["size"]]) + else: + logger.warning( + f"二进制可执行文件解析警告,跳转指令识别出错,指令{op}") + + # 操作码不存在跳转指令 + else: + if op_index != len(disasm) - 1: + # 当前指令不是基础块的最后一条指令 + if op in ret_trap_opcode_family and op["type"] in ["ret", "trap"]: + node_list.append(disasm[op_index + 1]["offset"]) + + else: + # 当前指令是基础块的最后一条指令 + if op not in ret_trap_opcode_family or op["type"] not in ["ret", "trap"]: + temp_edge_list.append([block_addr, op["offset"] + op["size"]]) + + # bert模型转化特征 + block_feature_list = bb2vec(block_Statement) + # block_feature_list = [] + # 过滤不存在的边 for temp_edge in temp_edge_list: - if temp_edge[1] in node_list: + if temp_edge[0] in node_list and temp_edge[1] in node_list: edge_list.append(temp_edge) + # 单独错误信息日志 + if block_number == 0 or len(block_feature_list) == 0: + logger.warning(f"二进制可执行文件解析出错,出错文件:{file_path},出错函数地址:{function['offset']},基础块个数{block_number},基础块特征{block_feature_list}") + # cfg构建 acfg = { 'block_number': block_number, 'block_edges': [[d[0] for d in edge_list], [d[1] for d in edge_list]], @@ -74,45 +130,16 @@ def get_graph_cfg_r2pipe(r2pipe_open): except Exception as e: return False, e, None - # for block in block_list: - # node_list.append(block["addr"]) - # - # # 获取基本块的反汇编指令 - # disasm = r2pipe_open.cmdj("pdj " + str(block["ninstr"]) + " @" + str(block["addr"])) - # node_info = [] - # if disasm: - # for op in disasm: - # if op["type"] == "invalid": - # continue - # opcode, operands = extract_opcode(op["disasm"]) - # # 处理跳转指令 - # if "jump" in op and op["jump"] != 0: - # temp_edge_list.append([block["addr"], op["jump"]]) - # node_info.append([op["offset"], op["bytes"], opcode, op["jump"]]) - # else: - # node_info.append([op["offset"], op["bytes"], opcode, None]) - # node_info_list.append(node_info) - # 完成 CFG 构建后, 检查并清理不存在的出边 - - - # 获取排序后元素的原始索引 - # sorted_indices = [i for i, v in sorted(enumerate(node_list), key=lambda x: x[1])] - # # 根据这些索引重新排列 - # node_list = [node_list[i] for i in sorted_indices] - # node_info_list = [node_info_list[i] for i in sorted_indices] - # - # return True, "二进制可执行文件解析成功", node_list, edge_list, node_info_list - # except Exception as e: - # return False, e, None, None, None def get_graph_fcg_r2pipe(r2pipe_open): # FCG提取 try: - function_list = r2pipe_open.cmdj("aflj") node_list = [] func_name_list = [] edge_list = [] temp_edge_list = [] + + function_list = r2pipe_open.cmdj("aflj") function_num = len(function_list) for function in function_list: @@ -121,13 +148,11 @@ def get_graph_fcg_r2pipe(r2pipe_open): pdf = r2pipe_open.cmdj('pdfj') if pdf is None: continue - node_bytes = "" node_opcode = "" for op in pdf["ops"]: if op["type"] == "invalid": continue - node_bytes += op["bytes"] opcode = extract_opcode(op["disasm"]) node_opcode += opcode + " " @@ -141,13 +166,14 @@ def get_graph_fcg_r2pipe(r2pipe_open): for temp_edge in temp_edge_list: if temp_edge[1] in node_list: edge_list.append(temp_edge) - sub_function_name_list = ('fcn.', 'loc.', 'main', 'entry') - func_name_list = [func_name for func_name in func_name_list if not func_name.startswith(sub_function_name_list)] + sub_function_name_list = ('sym.','sub','imp') + func_name_list = [func_name for func_name in func_name_list if func_name.startswith(sub_function_name_list)] return True, "二进制可执行文件解析成功", function_num, edge_list, func_name_list except Exception as e: return False, e, None, None, None def get_r2pipe(file_path): + # 初始化r2pipe try: r2 = r2pipe.open(file_path, flags=['-2']) r2.cmd("aaa") @@ -157,16 +183,21 @@ def get_r2pipe(file_path): return None def init_logging(): - log_file = "./out/exe2json.log" - logging = log_utils.setup_logger('exe2json', log_file) - return logging + # 初始化日志 + log_file = "./log/exe2json.log" + return setup_logger('exe2json', log_file) -def exe_to_json(file_path, output_path): - logging = init_logging() +def exe_to_json(file_path): + output_path = "./out/json/malware" + + + # 获取r2pipe并解析文件 解析完即释放r2 r2 = get_r2pipe(file_path) fcg_Operation_flag, fcg_Operation_message, function_num, function_fcg_edge_list, function_names = get_graph_fcg_r2pipe(r2) - cfg_Operation_flag, cfg_Operation_message, cfg_item = get_graph_cfg_r2pipe(r2) + cfg_Operation_flag, cfg_Operation_message, cfg_item = get_graph_cfg_r2pipe(r2,file_path) + r2.quit() + # 文件json构建 file_fingerprint = calc_sha256(file_path) if fcg_Operation_flag and cfg_Operation_flag: json_obj = { @@ -178,19 +209,24 @@ def exe_to_json(file_path, output_path): 'function_names': function_names } else: - logging.error(f"二进制可执行文件解析失败 文件地址{file_path}") + logger.error(f"二进制可执行文件解析失败 文件名{file_path}") if not fcg_Operation_flag: - logging.error(f"fcg错误:{fcg_Operation_message}") + logger.error(f"fcg错误:{fcg_Operation_message}") if not cfg_Operation_flag: - logging.error(f"cfg错误:{cfg_Operation_message}") + logger.error(f"cfg错误:{cfg_Operation_message}") return False - r2.quit() - result = json.dumps(json_obj,ensure_ascii=False) + # json写入 + result = json.dumps(json_obj,ensure_ascii=False, default=lambda x: float(x) if isinstance(x, np.float32) else x) + os.makedirs(output_path, exist_ok=True) with open(os.path.join(output_path, file_fingerprint + '.jsonl'), 'w') as out: out.write(result) out.close() return True if __name__ == '__main__': - test_file_path = '/mnt/d/bishe/exe2json/sample/VirusShare_0a3b625380161cf92c4bb10135326bb5' - exe_to_json(test_file_path, './out/json') + logger = init_logging() + sample_file_path = "/mnt/d/bishe/dataset/sample_malware" + sample_file_list = os.listdir(sample_file_path) + multi_thread(exe_to_json, [os.path.join(sample_file_path, file_name) for file_name in sample_file_list]) + # test_file_path = '/mnt/d/bishe/exe2json/sample/VirusShare_0a3b625380161cf92c4bb10135326bb5' + # exe_to_json(test_file_path) diff --git a/my_utils.py b/my_utils.py new file mode 100644 index 0000000..1a9dead --- /dev/null +++ b/my_utils.py @@ -0,0 +1,65 @@ +import logging +import os + +""" +日志工具 + +使用方法: + logger = setup_logger(日志记录器的实例名字, 日志文件目录) +""" +def setup_logger(name, log_file, level=logging.INFO): + """Function setup as many loggers as you want""" + if not os.path.exists(os.path.dirname(log_file)): + os.makedirs(os.path.dirname(log_file)) + + formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s') + + handler = logging.FileHandler(log_file) + handler.setFormatter(formatter) + + # 控制台是否输出日志信息 + # stream_handler = logging.StreamHandler() + # stream_handler.setFormatter(formatter) + + logger = logging.getLogger(name) + logger.setLevel(level) + logger.addHandler(handler) + # 控制台 + # logger.addHandler(stream_handler) + + # 刷新原有log文件 + + if os.path.exists(log_file): + open(log_file, 'w').close() + + return logger + + +""" +多线程工具 +""" +THREAD_FULL = os.cpu_count() +THREAD_HALF = int(os.cpu_count() / 2) +def multi_thread(func, args, thread_num=THREAD_FULL): + """ + 多线程执行函数 + :param func: 函数 + :param args: list函数参数 + :param thread_num: 线程数 + :return: + """ + import concurrent.futures + from tqdm import tqdm + logger = setup_logger('multi_thread', './multi_thread.log') + result = [] + with concurrent.futures.ThreadPoolExecutor(max_workers=thread_num) as executor: + futures_to_args = { + executor.submit(func, arg): arg for arg in args + } + for future in tqdm(concurrent.futures.as_completed(futures_to_args), total=len(args)): + try: + result.append(future.result()) + except Exception as exc: + logger.error('%r generated an exception: %s' % (futures_to_args[future], exc)) + return result +