Shayaomer · Shayaomer · Feb 8, 2022 · Feb 8, 2022 · Feb 16, 2022 · Feb 18, 2022
diff --git a/find_infected_files.py b/find_infected_files.py
@@ -0,0 +1,32 @@
+from installed_softwares import RegistryConnection
+from installed_softwares import InstalledSoftware
+
+
+class FindFiles:
+    def __init__(self):
+        self.reg_conn = RegistryConnection()
+        self.inst_sftw = InstalledSoftware()
+
+    def dir_file_list(self):
+        return self.inst_sftw.dump_software_lst_to_json(
+            ['DisplayName', 'InstallLocation', 'InstallSource', 'UninstallString'],
+            'name_dir.json', False)
+
+    def sftw_name_to_dir(self, sftw_name):  # Enter DisplayName as in the registry
+        lst = self.dir_file_list()
+        for i in range(len(lst[0])):
+            index = lst[0].index(sftw_name)
+            for j in range(1, 4):
+                if lst[j][index]:
+                    if j != 3:
+                        return lst[j][index]
+                    else:  # Removing the last '\\' in the string
+                        splitted = lst[j][index].split('\\')
+                        splitted = splitted[:-1]
+                        return '\\'.join(splitted).replace('"','')
+        return 'NO PATH'
+
+
+
+
+
diff --git a/installed_softwares.py b/installed_softwares.py
@@ -86,14 +86,15 @@ def remove_empty_list_items(self, lst):
                     len_cols = len_cols - 1
                     col -= 1
 
-    def dump_software_lst_to_json(self, requested_fields_lst):
+    def dump_software_lst_to_json(self, requested_fields_lst, file_name = 'registry_data.json', dump = True):
         final_lst = []
         for field in requested_fields_lst:
             self.requested_data_field = field
             final_lst.append(self.get_installed_software())
         self.remove_empty_list_items(final_lst)
-
-        df = pd.DataFrame(data=final_lst)
-        df = df.rename(index={df.index[i]: requested_fields_lst[i] for i in range(len(requested_fields_lst))})
-        df.to_json("registry_data.json")
-
+        if dump:
+            df = pd.DataFrame(data=final_lst)
+            df = df.rename(index={df.index[i]: requested_fields_lst[i] for i in range(len(requested_fields_lst))})
+            df.to_json(file_name)
+        else:
+            return final_lst
diff --git a/main.py b/main.py
@@ -8,20 +8,20 @@
 
 
 def execute():
-    print('Initializing the scan & matching process...')
-    print("Downloading CVE data...")
-    DownloadDb()
-    print("Downloading CPE data...")
-    download_db.download_file()
-    download_db.unzip_file('official-cpe-dictionary_v2.3.xml.zip', directory_to_extract=None)
+    # print('Initializing the scan & matching process...')
+    # print("Downloading CVE data...")
+    # DownloadDb()
+    # print("Downloading CPE data...")
+    # download_db.download_file()
+    # download_db.unzip_file('official-cpe-dictionary_v2.3.xml.zip', directory_to_extract=None)
 
     print('Getting installed softwares...')
     i_s = InstalledSoftware()
     i_s.dump_software_lst_to_json(["Publisher", 'DisplayVersion', 'DisplayName'])
 
-    print('Parsing the CPE data...')
-    b = CpeXmlParser('official-cpe-dictionary_v2.3.xml')
-    b.csv_creator('official-cpe-dictionary_v2.3.xml')
+    # print('Parsing the CPE data...')
+    # b = CpeXmlParser('official-cpe-dictionary_v2.3.xml')
+    # b.csv_creator('official-cpe-dictionary_v2.3.xml')
 
     c = MatcherCveCpe()
     res_json = c.match_cve_cpe()

diff --git a/matching_cve_cpe.py b/matching_cve_cpe.py
@@ -3,6 +3,7 @@
 import pandas as pd
 from tqdm import tqdm
 import json
+from find_infected_files import FindFiles
 
 
 class MatcherCveCpe:
@@ -17,22 +18,25 @@ def __init__(self):
         cpe_sw_fitter = sEngine.CpeSwFitter("parsed_xml.csv", "cosin")
         self.cpe_data_dict = cpe_sw_fitter.fit_all(1)
         print('Engine finished and CPE-Installed softwares results dumped!')
+        self.find_inf_files = FindFiles()
 
     def match_cve_cpe(self):
         # Initialize data frame items
         sftw_names = list(self.cpe_data_dict['registry_sw'].values())
         cpe_23_names = list(self.cpe_data_dict['cpe_23_names'].values())
         sim_score = list(self.cpe_data_dict['sim_score'].values())
         asso_cve = []
-        df = pd.DataFrame([sftw_names, cpe_23_names, sim_score, asso_cve]).transpose()
-        df.columns = ['sftw_name', 'cpe_23', 'sim_score', 'asso_cve']
+        sftw_dirs = []
+        df = pd.DataFrame([sftw_names, cpe_23_names, sim_score, asso_cve, sftw_dirs]).transpose()
+        df.columns = ['sftw_name', 'cpe_23', 'sim_score', 'asso_cve', 'sftw_dirs']
 
         # Matching process
         cve_gen = self.cve_funcs.get_all_cpe23_uri()
         _dict = {}
         for cpe_23, cve_id in tqdm(cve_gen, desc="Matching CPE-CVE"):
             _dict[cpe_23] = _dict.get(cpe_23, []) + [cve_id]
         df['asso_cve'] = df['cpe_23'].apply(lambda x: _dict[x] if x in _dict else [])
+        df['sftw_dirs'] = df['sftw_name'].apply(lambda x: self.find_inf_files.sftw_name_to_dir(x))
         json_res = self.organize_df_make_json(df)
         df.to_csv('result.csv')
         return json_res
@@ -42,7 +46,7 @@ def organize_df_make_json(self, df):
         df = df.drop(df[df.sim_score < 0.5].index)
         df = df[df['asso_cve'].map(lambda d: len(d)) > 0]
         for index, row in df.iterrows():
-            final_res[row['sftw_name']] = row['asso_cve']
+            final_res[row['sftw_name']] = [row['asso_cve'], row['sftw_dirs']]
         with open('json_final_res.json', 'w') as jf:
             json.dump(final_res, jf)
         return json.dumps(final_res, indent=4, sort_keys=True)

diff --git a/searchEngine.py b/searchEngine.py
@@ -107,11 +107,12 @@ def fit_all(self, num_to_retrieve):
         for col in tqdm(self.registry_data):
             query = self.registry_data[col].str.cat(sep=' ', na_rep='')
             relevant_docs = self.searcher(query, num_to_retrieve)
+            sftw_name = self.registry_data[col]['DisplayName']
             for i in range(len(relevant_docs)):
                 if relevant_docs.empty:
-                    final_res.append([query, None, None, 0])
+                    final_res.append([sftw_name, None, None, 0])
                 else:
-                    final_res.append([query, relevant_docs["cpe_23_names"].iloc[i], relevant_docs["titles"].iloc[i],
+                    final_res.append([sftw_name, relevant_docs["cpe_23_names"].iloc[i], relevant_docs["titles"].iloc[i],
                                     relevant_docs["sim_score"].iloc[i]])
         final_res = pd.DataFrame(final_res)
         final_res.columns = ["registry_sw", "cpe_23_names", "titles", "sim_score"]