Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions find_infected_files.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from installed_softwares import RegistryConnection
from installed_softwares import InstalledSoftware


class FindFiles:
def __init__(self):
self.reg_conn = RegistryConnection()
self.inst_sftw = InstalledSoftware()

def dir_file_list(self):
return self.inst_sftw.dump_software_lst_to_json(
['DisplayName', 'InstallLocation', 'InstallSource', 'UninstallString'],
'name_dir.json', False)

def sftw_name_to_dir(self, sftw_name): # Enter DisplayName as in the registry
lst = self.dir_file_list()
for i in range(len(lst[0])):
index = lst[0].index(sftw_name)
for j in range(1, 4):
if lst[j][index]:
if j != 3:
return lst[j][index]
else: # Removing the last '\\' in the string
splitted = lst[j][index].split('\\')
splitted = splitted[:-1]
return '\\'.join(splitted).replace('"','')
return 'NO PATH'





13 changes: 7 additions & 6 deletions installed_softwares.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,14 +86,15 @@ def remove_empty_list_items(self, lst):
len_cols = len_cols - 1
col -= 1

def dump_software_lst_to_json(self, requested_fields_lst):
def dump_software_lst_to_json(self, requested_fields_lst, file_name = 'registry_data.json', dump = True):
final_lst = []
for field in requested_fields_lst:
self.requested_data_field = field
final_lst.append(self.get_installed_software())
self.remove_empty_list_items(final_lst)

df = pd.DataFrame(data=final_lst)
df = df.rename(index={df.index[i]: requested_fields_lst[i] for i in range(len(requested_fields_lst))})
df.to_json("registry_data.json")

if dump:
df = pd.DataFrame(data=final_lst)
df = df.rename(index={df.index[i]: requested_fields_lst[i] for i in range(len(requested_fields_lst))})
df.to_json(file_name)
else:
return final_lst
18 changes: 9 additions & 9 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,20 +8,20 @@


def execute():
print('Initializing the scan & matching process...')
print("Downloading CVE data...")
DownloadDb()
print("Downloading CPE data...")
download_db.download_file()
download_db.unzip_file('official-cpe-dictionary_v2.3.xml.zip', directory_to_extract=None)
# print('Initializing the scan & matching process...')
# print("Downloading CVE data...")
# DownloadDb()
# print("Downloading CPE data...")
# download_db.download_file()
# download_db.unzip_file('official-cpe-dictionary_v2.3.xml.zip', directory_to_extract=None)

print('Getting installed softwares...')
i_s = InstalledSoftware()
i_s.dump_software_lst_to_json(["Publisher", 'DisplayVersion', 'DisplayName'])

print('Parsing the CPE data...')
b = CpeXmlParser('official-cpe-dictionary_v2.3.xml')
b.csv_creator('official-cpe-dictionary_v2.3.xml')
# print('Parsing the CPE data...')
# b = CpeXmlParser('official-cpe-dictionary_v2.3.xml')
# b.csv_creator('official-cpe-dictionary_v2.3.xml')

c = MatcherCveCpe()
res_json = c.match_cve_cpe()
Expand Down
10 changes: 7 additions & 3 deletions matching_cve_cpe.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import pandas as pd
from tqdm import tqdm
import json
from find_infected_files import FindFiles


class MatcherCveCpe:
Expand All @@ -17,22 +18,25 @@ def __init__(self):
cpe_sw_fitter = sEngine.CpeSwFitter("parsed_xml.csv", "cosin")
self.cpe_data_dict = cpe_sw_fitter.fit_all(1)
print('Engine finished and CPE-Installed softwares results dumped!')
self.find_inf_files = FindFiles()

def match_cve_cpe(self):
# Initialize data frame items
sftw_names = list(self.cpe_data_dict['registry_sw'].values())
cpe_23_names = list(self.cpe_data_dict['cpe_23_names'].values())
sim_score = list(self.cpe_data_dict['sim_score'].values())
asso_cve = []
df = pd.DataFrame([sftw_names, cpe_23_names, sim_score, asso_cve]).transpose()
df.columns = ['sftw_name', 'cpe_23', 'sim_score', 'asso_cve']
sftw_dirs = []
df = pd.DataFrame([sftw_names, cpe_23_names, sim_score, asso_cve, sftw_dirs]).transpose()
df.columns = ['sftw_name', 'cpe_23', 'sim_score', 'asso_cve', 'sftw_dirs']

# Matching process
cve_gen = self.cve_funcs.get_all_cpe23_uri()
_dict = {}
for cpe_23, cve_id in tqdm(cve_gen, desc="Matching CPE-CVE"):
_dict[cpe_23] = _dict.get(cpe_23, []) + [cve_id]
df['asso_cve'] = df['cpe_23'].apply(lambda x: _dict[x] if x in _dict else [])
df['sftw_dirs'] = df['sftw_name'].apply(lambda x: self.find_inf_files.sftw_name_to_dir(x))
json_res = self.organize_df_make_json(df)
df.to_csv('result.csv')
return json_res
Expand All @@ -42,7 +46,7 @@ def organize_df_make_json(self, df):
df = df.drop(df[df.sim_score < 0.5].index)
df = df[df['asso_cve'].map(lambda d: len(d)) > 0]
for index, row in df.iterrows():
final_res[row['sftw_name']] = row['asso_cve']
final_res[row['sftw_name']] = [row['asso_cve'], row['sftw_dirs']]
with open('json_final_res.json', 'w') as jf:
json.dump(final_res, jf)
return json.dumps(final_res, indent=4, sort_keys=True)
Expand Down
5 changes: 3 additions & 2 deletions searchEngine.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,11 +107,12 @@ def fit_all(self, num_to_retrieve):
for col in tqdm(self.registry_data):
query = self.registry_data[col].str.cat(sep=' ', na_rep='')
relevant_docs = self.searcher(query, num_to_retrieve)
sftw_name = self.registry_data[col]['DisplayName']
for i in range(len(relevant_docs)):
if relevant_docs.empty:
final_res.append([query, None, None, 0])
final_res.append([sftw_name, None, None, 0])
else:
final_res.append([query, relevant_docs["cpe_23_names"].iloc[i], relevant_docs["titles"].iloc[i],
final_res.append([sftw_name, relevant_docs["cpe_23_names"].iloc[i], relevant_docs["titles"].iloc[i],
relevant_docs["sim_score"].iloc[i]])
final_res = pd.DataFrame(final_res)
final_res.columns = ["registry_sw", "cpe_23_names", "titles", "sim_score"]
Expand Down