-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbiohack.py
More file actions
64 lines (52 loc) · 2.59 KB
/
biohack.py
File metadata and controls
64 lines (52 loc) · 2.59 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import argparse
import sys
import pandas as pd
from defs import etl_query
from defs import etl_fetch
from defs import etl_convert
from defs import etl_gliner # Add import for etl_gliner function
def main():
parser = argparse.ArgumentParser(description="Multi-mode data processing program")
subparsers = parser.add_subparsers(dest="mode", help="Operation mode")
# Query mode parser
query_parser = subparsers.add_parser("query", help="Query mode operations")
query_parser.add_argument("--source", required=True, help="Source file/location")
query_parser.add_argument("--sink", required=True, help="Output destination")
query_parser.add_argument("--query", required=True, help="SPARQL query file")
query_parser.add_argument("--table", required=True, help="LanceDB Table name")
# Augment mode parser
fetch_parser = subparsers.add_parser("fetch", help="fetch resources mode operations")
fetch_parser.add_argument("--source", required=True, help="Source file/location")
# Convert mode parser
convert_parser = subparsers.add_parser("convert", help="Convert HTML or PDF to Markdown")
convert_parser.add_argument("-url", help="URL of HTML or PDF document to convert")
convert_parser.add_argument("-local", help="Path to local HTML or PDF file to convert")
convert_parser.add_argument("-output", required=True, help="Output file name for the markdown")
# Gliner mode parser - new mode
gliner_parser = subparsers.add_parser("gliner", help="Process data with etl_gliner")
gliner_parser.add_argument("input_string", help="String to process with etl_gliner")
args = parser.parse_args()
if args.mode is None:
parser.print_help()
sys.exit(1)
# Mode selection
mode_handlers = {
"query": etl_query.query_mode,
"fetch": etl_fetch.fetch_resources,
"convert": etl_convert.convert_document,
"gliner": lambda input_string: print(etl_gliner.process(input_string)) # Add handler for gliner mode
}
# Execute the selected mode with appropriate parameters
if args.mode == "query":
mode_handlers[args.mode](args.source, args.sink, args.query, args.table)
elif args.mode == "fetch":
mode_handlers[args.mode](args.source)
elif args.mode == "convert":
mode_handlers[args.mode](url=args.url, local_file=args.local, output_file=args.output)
elif args.mode == "gliner":
# Call etl_gliner with the input string and print the results
results = etl_gliner.process(args.input_string)
df = pd.DataFrame(results)
print(df)
if __name__ == "__main__":
main()