-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathxml_generator.py
More file actions
94 lines (67 loc) · 3.21 KB
/
xml_generator.py
File metadata and controls
94 lines (67 loc) · 3.21 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
from random import choice, randint
from common import *
import os
NUMBER_OF_ENTRIES = 1
OUTPUT_DIR = "data/xml"
def generate_bacterium(id_) -> str:
id_string = "<idBacterium>" + str(id_) + "</idBacterium>\n"
morphology = "<Morphology>" + choice(MORPHOLOGIES) + "</Morphology>\n"
year = "<Year>" + str(choice(YEARS)) + "</Year>\n"
metabolism = "<MetabolismType>" + \
choice(METABOLISM) + "</MetabolismType>\n"
movement = "<MovementType>" + choice(MOVEMENT) + "</MovementType>\n"
oxy_demand = "<OxygenDemand>" + choice(OXY_DEMAND) + "</OxygenDemand>\n"
gram = "<GramStain>" + choice(GRAM) + "</GramStain>\n"
taxonom = "<Taxonomy>" + generate_taxonomy() + "</Taxonomy>\n"
diseases_tmp = ["<Disease>\n" + generate_disease() +
"</Disease>\n" for _ in range(randint(0, 3))]
diseases = "<Diseases>\n" + ''.join(diseases_tmp) + "</Diseases>\n"
genome = "<Genome>\n" + generate_genome(id_) + "</Genome>\n"
xml_string = id_string + morphology + year + metabolism + \
movement + oxy_demand + gram + taxonom + diseases + genome
xml_string = f"<Bacterium>\n{xml_string}</Bacterium>"
return xml_string
def generate_taxonomy() -> str:
phylum = "<Phylum>" + choice(PHYLUM) + "</Phylum>\n"
class_ = "<Class>" + choice(CLASS) + "</Class>\n"
order = "<Order>" + choice(ORDER) + "</Order>\n"
family = "<Family>" + choice(FAMILY) + "</Family>\n"
genus = "<Genus>" + choice(GENUS) + "</Genus>\n"
specie = "<Specie>" + \
TAXONOM_SPECIES.pop(randint(0, len(TAXONOM_SPECIES))) + "</Specie>\n"
xml_string = phylum + class_ + order + family + genus + specie
return xml_string
def generate_disease() -> str:
name = "<NameDisease>" + \
choice(NAME_DISEASE).replace("'", "") + "</NameDisease>\n"
symptoms = "<Symptoms>" + ', '.join([choice(SYMPTOMS).replace(
";", "").replace("'", "") for _ in range(randint(1, 8))]) + "</Symptoms>\n"
cure = "<HasCure>" + choice(HAS_CURE) + "</HasCure>\n"
desc = "<Description>Some description about the disease...</Description>\n"
xml_statement = name + symptoms + cure + desc
return xml_statement
def generate_genome(id_: int) -> str:
id_ = "<idGenome>" + str(id_) + "</idGenome>\n"
cat = "<Category>" + choice(CATEGORY) + "</Category>\n"
genes_tmp = ["<Gene>\n" +
generate_gen(i) + "</Gene>\n" for i in range(randint(1, 2))]
genes = "<Genes>" + ''.join(genes_tmp) + "</Genes>\n"
xml_statement = id_ + cat + genes
return xml_statement
def generate_gen(id_: int) -> str:
id_ = "<idGen>" + str(id_) + "</idGen>\n"
seq = "<Sequence>" + ''.join([choice(["A", "C", "G", "T"])
for _ in range(randint(20, 100))]) + "</Sequence>\n"
xml_statement = id_ + seq
return xml_statement
def main(requested_items):
if not os.path.exists(OUTPUT_DIR):
os.makedirs(OUTPUT_DIR)
bacterium_xml = open(f"{OUTPUT_DIR}/data.xml", "w")
bacterium_xml.write("<?xml version=1.0?>\n\n")
bacterium_xml.write("<Bacteria>")
for i in range(requested_items):
bacterium_xml.write(generate_bacterium(i))
bacterium_xml.write("</Bacteria>")
if __name__ == "__main__":
main(NUMBER_OF_ENTRIES)