Skip to content

Commit 245ada2

Browse files
♻️ 💥 change raw_http attribute in responses to be actual json strings
1 parent 5c17f1d commit 245ada2

14 files changed

Lines changed: 180 additions & 120 deletions

File tree

bin/v1/parser.rb

Lines changed: 22 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,11 @@ def init_product_parser
4242
V1_PRODUCTS.each do |doc_key, doc_value|
4343
v1_product_parser[doc_key] = OptionParser.new do |options_parser|
4444
options_parser.on('-w', '--all-words', 'Include words in response') { |v| @options[:all_words] = v }
45-
options_parser.on('-c', '--cut-pages', 'Cut document pages') { |v| @options[:cut_pages] = v }
4645
options_parser.on('-k [KEY]', '--key [KEY]', 'API key for the endpoint') { |v| @options[:api_key] = v }
47-
options_parser.on('-f', '--full', 'Print the full data') { @options[:print_full] = true }
46+
options_parser.on('-o FORMAT', '--output-format FORMAT', ['raw', 'full', 'summary'],
47+
'Format of the output (raw, full, summary). Default: summary') do |format|
48+
@options[:output_format] = format
49+
end
4850
options_parser.on('-F', '--fix-pdf', 'Repair PDF') { @options[:repair_pdf] = true }
4951

5052
if doc_key != 'universal'
@@ -69,23 +71,27 @@ def send(product_command, endpoint_name, options)
6971
doc_class = V1_PRODUCTS[product_command][:doc_class]
7072
input_source = setup_input_source(mindee_client, options)
7173
custom_endpoint = setup_endpoint(mindee_client, product_command, endpoint_name, options)
72-
page_options = setup_page_options(options)
7374
options[:parse_async] = !V1_PRODUCTS[product_command][:sync] if options[:parse_async].nil?
7475

7576
mindee_client.parse(
7677
input_source,
7778
doc_class,
7879
options: { endpoint: custom_endpoint,
79-
options: Mindee::ParseOptions.new(
80-
params: { page_options: page_options }
81-
),
8280
enqueue: options[:parse_async] }
8381
)
8482
end
8583

84+
def print_result(result, output_format)
85+
if output_format == :raw
86+
puts JSON.pretty_generate(JSON.parse(result.raw_http))
87+
else
88+
puts output_format == :full ? result.document : result.document.inference.prediction
89+
end
90+
end
91+
8692
# @param product_command [String]
8793
def execute
88-
options = {}
94+
@options = { output_format: :summary }
8995
product_command = @arguments.shift
9096

9197
abort(@options_parser.help) unless V1_PRODUCTS.include?(product_command)
@@ -97,23 +103,27 @@ def execute
97103
abort(@product_parser[product_command].help)
98104
end
99105
endpoint_name = @arguments[0]
100-
options[:file_path] = @arguments[1]
106+
@options[:file_path] = @arguments[1]
101107
else
102108
if @arguments.empty?
103109
warn 'file missing'
104110
abort(@product_parser[product_command].help)
105111
end
106112
endpoint_name = nil
107-
options[:file_path] = @arguments[0]
113+
@options[:file_path] = @arguments[0]
108114
end
109115

110-
result = send(product_command, endpoint_name, options)
111-
112-
puts options[:print_full] ? result.document : result.document.inference.prediction
116+
result = send(product_command, endpoint_name, @options)
117+
print_result(result, output_format)
113118
end
114119

115120
private
116121

122+
# @return [Symbol]
123+
def output_format
124+
@options[:output_format]&.to_sym || :summary
125+
end
126+
117127
# @param mindee_client [Mindee::V1::Client]
118128
# @param options [Hash]
119129
# @return [Hash]
@@ -139,22 +149,5 @@ def setup_endpoint(mindee_client, product_command, endpoint_name, options)
139149
version: options[:endpoint_version] || '1'
140150
)
141151
end
142-
143-
# @param options [Hash]
144-
# @return [Hash]
145-
def setup_page_options(options)
146-
if options[:cut_pages].nil? || !options[:cut_pages].is_a?(Integer) ||
147-
options[:cut_pages].negative?
148-
nil
149-
else
150-
151-
{ params: {
152-
page_indexes: (0..options[:cut_pages].to_i).to_a,
153-
operation: :KEEP_ONLY,
154-
on_min_pages: 0,
155-
} }
156-
157-
end
158-
end
159152
end
160153
end

bin/v2/parser.rb

Lines changed: 55 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -28,41 +28,39 @@ def initialize(arguments)
2828
@search_parser = init_search_parser
2929
end
3030

31-
# Summarize the result of the command.
31+
# Summarize and print the result of the command.
3232
# @param command [String]
33-
# @return [String]
34-
def summarize_result(command)
33+
def print_result(command)
3534
if command == 'search-models'
3635
@search_parser.parse!(@arguments)
37-
@result = search(@options)
38-
summarized_result = @options[:print_full] ? @result.to_s : @result.models.to_s
36+
result = search(@options)
37+
summarized_result = output_format == :full ? result.to_s : result.models.to_s
3938
else
4039
@product_parser[command].parse!(@arguments)
4140
@options[:file_path] = @arguments.shift
4241
if @options[:file_path].nil?
4342
warn 'file missing'
4443
abort(@product_parser[command].help)
4544
end
46-
@result = send(command, @options)
47-
summarized_result = @options[:print_full] ? @result.inference.to_s : @result.inference.result.to_s
45+
result = send(command, @options)
46+
summarized_result = output_format == :full ? result.inference.to_s : result.inference.result.to_s
47+
end
48+
49+
if output_format == :raw
50+
puts JSON.pretty_generate(raw_payload(result.raw_http))
51+
else
52+
puts summarized_result
4853
end
49-
summarized_result
5054
end
5155

5256
# Executes the command.
5357
# @return [void]
5458
def execute
55-
@options = {}
59+
@options = { output_format: :summary }
5660
command = @arguments.shift
5761

5862
validate_command!(command)
59-
summarized_result = summarize_result(command)
60-
61-
if @options[:raw_json]
62-
puts JSON.pretty_generate(@result.raw_http)
63-
else
64-
puts summarized_result
65-
end
63+
print_result(command)
6664
rescue OptionParser::InvalidOption, OptionParser::MissingArgument => e
6765
if command == 'search-models'
6866
abort("#{e.message}\n\n#{@search_parser.help}")
@@ -101,24 +99,24 @@ def init_search_parser
10199
end
102100

103101
def setup_specific_options(options_parser, doc_value)
104-
options_parser.on('-r', '--rag', 'Enable RAG') { @options[:rag] = true } if doc_value[:rag]
105-
if doc_value[:raw_text]
102+
options_parser.on('-r', '--rag', 'Enable RAG') { @options[:rag] = true } if doc_value.key?(:rag)
103+
if doc_value.key?(:raw_text)
106104
options_parser.on('-R', '--raw-text', 'Enable Raw Text retrieval') do
107105
@options[:raw_text] = true
108106
end
109107
end
110-
if doc_value[:confidence]
108+
if doc_value.key?(:confidence)
111109
options_parser.on('-c', '--confidence', 'Enable confidence scores') do
112110
@options[:confidence] = true
113111
end
114112
end
115-
options_parser.on('-p', '--polygon', 'Enable polygons') { @options[:polygon] = true } if doc_value[:polygon]
116-
if doc_value[:text_context]
113+
options_parser.on('-p', '--polygon', 'Enable polygons') { @options[:polygon] = true } if doc_value.key?(:polygon)
114+
if doc_value.key?(:text_context)
117115
options_parser.on('-t [TEXT CONTEXT]', '--text-context [TEXT CONTEXT]', 'Add Text Context') do |v|
118116
@options[:text_context] = v
119117
end
120118
end
121-
return unless doc_value[:data_schema]
119+
return unless doc_value.key?(:data_schema)
122120

123121
options_parser.on('-d [DATA SCHEMA]', '--data-schema [DATA SCHEMA]', 'Add Data Schema') do |v|
124122
@options[:data_schema] = v
@@ -129,8 +127,31 @@ def setup_specific_options(options_parser, doc_value)
129127
# @param options_parser [OptionParser]
130128
def init_common_options(options_parser)
131129
options_parser.on('-k [KEY]', '--key [KEY]', 'API key for the endpoint') { |v| @options[:api_key] = v }
132-
options_parser.on('-f', '--full', 'Print the full data') { @options[:print_full] = true }
133-
options_parser.on('-j', '--raw-json', 'Print the full raw jason data') { @options[:raw_json] = true }
130+
options_parser.on('-o FORMAT', '--output-format FORMAT', ['raw', 'full', 'summary'],
131+
'Format of the output (raw, full, summary). Default: summary') do |format|
132+
@options[:output_format] = format
133+
end
134+
end
135+
136+
# @return [Symbol]
137+
def output_format
138+
@options[:output_format]&.to_sym || :summary
139+
end
140+
141+
# Handles JSON payloads represented either as a string or an already-parsed hash.
142+
# Also tolerates one extra JSON encoding layer.
143+
# @param payload [String, Hash]
144+
# @return [Hash, Array, String]
145+
def raw_payload(payload)
146+
parsed_payload = payload
147+
2.times do
148+
break unless parsed_payload.is_a?(String)
149+
150+
parsed_payload = JSON.parse(parsed_payload)
151+
rescue JSON::ParserError
152+
break
153+
end
154+
parsed_payload
134155
end
135156

136157
# @return [Hash]
@@ -144,19 +165,21 @@ def init_product_parser
144165
@options[:alias] = v
145166
end
146167
init_common_options(options_parser)
147-
options_parser.on('-C PAGES', '--cut-pages PAGES', 'Cut document pages') { |v| @options[:cut_pages] = v }
148-
options_parser.on('-F', '--fix-pdf', 'Repair PDF') { @options[:repair_pdf] = true }
168+
options_parser.on('-F', '--fix-pdf', 'Attempt to repair PDF before enqueueing') do
169+
@options[:repair_pdf] = true
170+
end
149171
setup_specific_options(options_parser, product_values)
150172
end
151173
end
152174
v2_product_parser
153175
end
154176

155-
# @param options [Hash] General options.
156-
# @return page_options [Hash] Page options.
157-
def setup_product_params(options, page_options)
158-
params = { model_id: options[:model_id] }
159-
params[:options] = Mindee::ParseOptions.new(params: page_options) unless page_options.nil?
177+
# @return [Hash]
178+
def setup_product_params
179+
params = { model_id: @options[:model_id] }
180+
@options.each_pair do |key, value|
181+
params[key] = value if V2_PRODUCTS['extraction'].include?(key)
182+
end
160183
params
161184
end
162185

@@ -167,8 +190,7 @@ def send(product_command, options)
167190
mindee_client = Mindee::ClientV2.new(api_key: options[:api_key])
168191
response_class = V2_PRODUCTS[product_command][:response_class]
169192
input_source = setup_input_source(options)
170-
page_options = setup_page_options(options)
171-
params = setup_product_params(options, page_options)
193+
params = setup_product_params
172194

173195
mindee_client.enqueue_and_get_result(
174196
response_class,

bin/v2/products.rb

Lines changed: 6 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -8,51 +8,27 @@
88
'classification' => {
99
description: 'Classification Utility',
1010
response_class: Mindee::V2::Product::Classification::Classification,
11-
rag: false,
12-
polygon: false,
13-
confidence: false,
14-
raw_text: false,
15-
text_context: false,
16-
data_schema: false,
1711
},
1812
'extraction' => {
1913
description: 'Extraction Inference',
2014
response_class: Mindee::V2::Product::Extraction::Extraction,
21-
rag: false,
22-
polygon: false,
23-
confidence: false,
24-
raw_text: false,
25-
text_context: false,
26-
data_schema: false,
15+
rag: true,
16+
polygon: true,
17+
confidence: true,
18+
raw_text: true,
19+
text_context: true,
20+
data_schema: true,
2721
},
2822
'crop' => {
2923
description: 'Crop Utility',
3024
response_class: Mindee::V2::Product::Crop::Crop,
31-
rag: false,
32-
polygon: false,
33-
confidence: false,
34-
raw_text: false,
35-
text_context: false,
36-
data_schema: false,
3725
},
3826
'ocr' => {
3927
description: 'OCR Utility',
4028
response_class: Mindee::V2::Product::Ocr::Ocr,
41-
rag: false,
42-
polygon: false,
43-
confidence: false,
44-
raw_text: true,
45-
text_context: false,
46-
data_schema: false,
4729
},
4830
'split' => {
4931
description: 'Split Utility',
5032
response_class: Mindee::V2::Product::Split::Split,
51-
rag: false,
52-
polygon: false,
53-
confidence: false,
54-
raw_text: false,
55-
text_context: false,
56-
data_schema: false,
5733
},
5834
}.freeze

lib/mindee/client.rb

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ def parse_sync(input_source, product_class, endpoint, options)
162162
options
163163
)
164164

165-
Mindee::Parsing::Common::ApiResponse.new(product_class, prediction, raw_http.to_s)
165+
Mindee::Parsing::Common::ApiResponse.new(product_class, prediction, raw_http)
166166
end
167167

168168
# Enqueue a document for async parsing
@@ -200,7 +200,7 @@ def enqueue(input_source, product_class, endpoint: nil, options: {})
200200
input_source,
201201
opts
202202
)
203-
Mindee::Parsing::Common::ApiResponse.new(product_class, prediction, raw_http.to_json)
203+
Mindee::Parsing::Common::ApiResponse.new(product_class, prediction, raw_http)
204204
end
205205

206206
# Parses a queued document
@@ -215,7 +215,7 @@ def parse_queued(job_id, product_class, endpoint: nil)
215215
endpoint = initialize_endpoint(product_class) if endpoint.nil?
216216
logger.debug("Fetching queued document as '#{endpoint.url_root}'")
217217
prediction, raw_http = endpoint.parse_async(job_id)
218-
Mindee::Parsing::Common::ApiResponse.new(product_class, prediction, raw_http.to_json)
218+
Mindee::Parsing::Common::ApiResponse.new(product_class, prediction, raw_http)
219219
end
220220

221221
# Enqueue a document for async parsing and automatically try to retrieve it

lib/mindee/parsing/common/api_response.rb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,10 +39,10 @@ class ApiResponse
3939

4040
# @param product_class [Mindee::Inference]
4141
# @param http_response [Hash]
42-
# @param raw_http [Hash]
42+
# @param raw_http [String]
4343
def initialize(product_class, http_response, raw_http)
4444
logger.debug('Handling API response')
45-
@raw_http = raw_http.to_s
45+
@raw_http = raw_http
4646
raise Errors::MindeeAPIError, 'Invalid response format.' unless http_response.key?('api_request')
4747

4848
@api_request = Mindee::Parsing::Common::ApiRequest.new(http_response['api_request'])

lib/mindee/v2/parsing/common_response.rb

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,18 @@
11
# frozen_string_literal: true
22

3+
require 'json'
4+
35
module Mindee
46
module V2
57
module Parsing
68
# Base class for inference and job responses on the V2 API.
79
class CommonResponse
8-
# @return [Hash]
10+
# @return [String]
911
attr_reader :raw_http
1012

1113
# @param http_response [Hash]
1214
def initialize(http_response)
13-
@raw_http = http_response
15+
@raw_http = JSON.generate(http_response)
1416
end
1517
end
1618
end

sig/mindee/parsing/common/api_response.rbs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,12 @@ module Mindee
1515

1616

1717
class ApiResponse
18+
attr_reader document: Parsing::Common::Document?
19+
attr_reader job: Parsing::Common::Job?
20+
attr_reader api_request: Parsing::Common::ApiRequest
21+
attr_reader raw_http: String
22+
1823
def logger: () -> Logger
19-
def document: -> Parsing::Common::Document?
20-
def job: -> Parsing::Common::Job?
21-
def api_request: -> Parsing::Common::ApiRequest?
22-
def raw_http: -> String
2324
def initialize: (singleton(Parsing::Common::Inference), Hash[String | Symbol, untyped] | Net::HTTPResponse, String) -> void
2425
end
2526
end

0 commit comments

Comments
 (0)