DroidEvolver/feature_extraction.py

70 lines
1.8 KiB
Python
Raw Permalink Normal View History

2019-06-13 18:31:42 +08:00
'''
Extract detection feature for each app according to included Android API
input: smali files of an app stored under /app_name/
output: detection features for the app stored in app_name.feature
'''
import os
import sys
import string
import pickle as pkl
import argparse
import glob
import operator
def extract_feature(filedir):
feature = []
for dirpath, dirnames, filenames in os.walk(filedir):
for filename in [f for f in filenames if f.endswith ('.smali')]:
fn = os.path.join(dirpath, filename) # each smali file
lines = open(fn,'r').readlines()
lines = [line.strip() for line in lines]
for line in lines:
# get all class names in invoke
try:
start = line.index(', ') + len(', ')
end = line.index(';', start)
classes = line[start:end]
except ValueError:
classes = ''
# get invoking method name
try:
start = line.index(';->') + len(';->')
end = line.index('(', start)
methods = line[start:end]
except ValueError:
methods = ''
objects = classes.split('/')
a = len(objects)
current_class = classes[:-(len(objects[a-1])+1)]
if current_class in packages: # android api
fe = classes + ':' + methods
feature.append(fe)
with open(filedir + '.feature', 'wb') as result:
pkl.dump(feature, result)
def main():
family = ['android','google','java','javax', 'xml','apache', 'junit','json', 'dom']
# correspond to the android.*, com.google.*, java.*, javax.*, org.xml.*, org.apache.*, junit.*, org.json, and org.w3c.dom.* packages
global packages
packages = open('android_package.name','r').readlines()
packages = [package.strip() for package in packages] # packages correspond to family
print 'official package number:', len(packages)
names = ['--list of app names ----']
for app_name in names:
extract_feature(app_name)
if __name__ == "__main__":
main()