-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathchallenge1.rb
More file actions
137 lines (114 loc) · 4.79 KB
/
challenge1.rb
File metadata and controls
137 lines (114 loc) · 4.79 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
# challenge1.rb
#
# Following are the command line options to invoke the functionality really quickly:
# If your data is in the default zips.json file located in the current directory, run this
# ruby -r './challenge1.rb' -e 'puts Challenge1.perform.to_s'
# If your JSON input file is located elsewhere,you can specify its file spec. E.g, running from the parent directory:
# ruby -r './gg_challenge1/challenge1.rb' -e "puts Challenge1.perform('./gg_challenge1/zips.json').to_s"
# If your data is coming from another source, then convert it to the array of zip code infos <myarr>
# start interactive ruby irb, and then:
# irb> load '/path/to/this/file/challenge1.rb'
# irb> c = Challenge1.new
# irb> c.source = myarr
# irb> c.process_source_data
require 'json'
class Challenge1
# Features:
# Return States with Populations above 10 Million
# Return Average City Population by State
# Return Largest and Smallest Cities by State
# Sample element of the array of US zip code information
# {
# "_id": "10280",
# "city": "NEW YORK",
# "state": "NY",
# "pop": 5574,
# "loc": [
# -74.016323,
# 40.710537
# ]
# }
#
#
DEFAULT_SRC_FILE_SPEC = 'zips.json'.freeze
LARGE_STATE_MIN_POPULATION = 10_000_000
STATE = 'state'.freeze
CITY = 'city'.freeze
POP = 'pop'.freeze
attr_accessor :source # ruby array containing information for individual zip codes (see a sample element above)
# Top level interface for default data source being a JSON file like zips.json
def self.perform(src_file_spec = DEFAULT_SRC_FILE_SPEC)
self.new.perform(src_file_spec)
end
def perform(src_file_spec)
begin
acquire_source(src_file_spec) # Read the source file and extract the array of zip code data
rescue =>exc
raise "Unable to acquire source data from #{src_file_spec}: #{exc.message}"
end unless source
process_source_data
end
# We let process_source_data method be public, so that if your array of zip code info comes from a different source
# then you can instantiate the class, populate the instance's :source with the array, and then just invoke
# this method on the instance
def process_source_data
process_statistics(collect_state_statistics(source)) # process the source file and compute all results
end
private
# Returns intermediate statistics for each state, which will be further processed to yield 3 required answers
def collect_state_statistics(source)
# All the required output is on the level of states, so collect the necessary info into a hash keyed by state.
# Observation: the source array is already sorted by states, but not cities within states
source.each_with_object(Hash.new) do |sr, memo|
curr_state = sr[STATE]; curr_city = sr[CITY]; curr_pop = sr[POP]
state_info = memo[curr_state]
unless state_info
state_info = new_state_info_for(curr_state)
memo[curr_state] = state_info
end
state_info.pop += curr_pop
city_pop = (state_info.cities[curr_city] || 0) + curr_pop
state_info.cities[curr_city] = city_pop
end
end
def process_statistics(statistics)
sorted_state_infos = statistics.values.sort { |a, b| a.state <=> b.state }
OpenStruct.new(
feature1: feature_1(sorted_state_infos),
feature2: feature_2(sorted_state_infos),
feature3: feature_3(sorted_state_infos)
)
end
def acquire_source(file_spec)
self.source = File.open(file_spec) { |f| JSON.parse(f.read)['cities'] }
end
# Returns states with large populations
def feature_1(state_infos)
state_infos.select { |state_info| state_info.pop > LARGE_STATE_MIN_POPULATION }.
map { |state_info| { _id: state_info.state, pop: state_info.pop } }
end
# Returns average city population per state
def feature_2(state_infos)
state_infos.map do |state_info|
{ _id: state_info.state, avgCityPop: state_info.pop / state_info.cities.size }
end
end
# Biggest and smallest cities in each state
def feature_3(state_infos)
state_infos.map do |state_info|
cities = state_info.cities.to_a # array of two element arrays like e.g. ['Fair Lawn', 32000]
max_city = cities.max_by(&:last) # highest pop city
min_city = cities.min_by(&:last) # lowest pop city
{ _id: state_info.state,
biggestCity: { name: max_city.first, pop: max_city.last },
smallestCity: { name: min_city.first, pop: min_city.last }
}
end
end
# Intermediate statistics for individual states will be collected in open_struct's returned by this method
def new_state_info_for(state)
# state_info structure will contain :name, :pop,
# and :cities being a hash keyed by city name, with the values being city population
OpenStruct.new(state: state, pop: 0, cities: Hash.new)
end
end