Transform the raw data into business data

The sales system API expects the traffic data in a specific format. You currently have all the necessary raw data, but you can not send that as is since the API would not understand that format.

You decide to implement a keyword to handle the transformation of raw data to sales system API payloads:

from robocorp.tasks import task from RPA.HTTP import HTTP from RPA.JSON import JSON from RPA.Tables import Tables http = HTTP() json = JSON() table = Tables() TRAFFIC_JSON_FILE_PATH = "output/traffic.json" @task def produce_traffic_data(): """ Inhuman Insurance, Inc. Artificial Intelligence System automation. Produces traffic data work items. """ http.download( url="https://github.com/robocorp/inhuman-insurance-inc/raw/main/RS_198.json", target_file=TRAFFIC_JSON_FILE_PATH, overwrite=True, ) traffic_data = load_traffic_data_as_table() filtered_data = filter_and_sort_traffic_data(traffic_data) filtered_data = get_latest_data_by_country(filtered_data) payloads = create_work_item_payloads(filtered_data) @task def consume_traffic_data(): """ Inhuman Insurance, Inc. Artificial Intelligence System robot. Consumes traffic data work items. """ print("consume") def load_traffic_data_as_table(): json_data = json.load_json_from_file(TRAFFIC_JSON_FILE_PATH) return table.create_table(json_data["value"]) def filter_and_sort_traffic_data(data): rate_key = "NumericValue" max_rate = 5.0 gender_key = "Dim1" both_genders = "BTSX" year_key = "TimeDim" table.filter_table_by_column(data, rate_key, "<", max_rate) table.filter_table_by_column(data, gender_key, "==", both_genders) table.sort_table_by_column(data, year_key, False) return data def get_latest_data_by_country(data): country_key = "SpatialDim" data = table.group_table_by_column(data, country_key) latest_data_by_country = [] for group in data: first_row = table.pop_table_row(group) latest_data_by_country.append(first_row) return latest_data_by_country def create_work_item_payloads(traffic_data): payloads = [] for row in traffic_data: payload = dict( country = row["SpatialDim"], year = row["TimeDim"], rate = row["NumericValue"] ) payloads.append(payload) return payloads
  • The create_work_item_payloads() function loops the list of traffic data - essentially rows.
  • For each row, you create a new dictionary (a data structure that supports named keys).
  • You append the dictionaries to a list that you then return from the keyword.

After running the robot, the log provides a small snippet of the payloads:

payloads = [{'country': 'VCT', 'year': 2011, 'rate': 3.69293}, {'country': 'SWError', 'year': 2019, 'rate': 3.13947}, {'country': 'SWE', 'year': 2018, 'rate': 3.61718}...

Looks good. But what's that smell? SpatialDim, TimeDim, NumericValue appear in the code more than once. Duplication? In your code? Better deal with it immediately.

You decide to create variables for the data keys to avoid duplicating the values:

from robocorp.tasks import task from RPA.HTTP import HTTP from RPA.JSON import JSON from RPA.Tables import Tables http = HTTP() json = JSON() table = Tables() TRAFFIC_JSON_FILE_PATH = "output/traffic.json" # JSON data keys COUNTRY_KEY = "SpatialDim" YEAR_KEY = "TimeDim" RATE_KEY = "NumericValue" GENDER_KEY = "Dim1" @task def produce_traffic_data(): """ Inhuman Insurance, Inc. Artificial Intelligence System automation. Produces traffic data work items. """ http.download( url="https://github.com/robocorp/inhuman-insurance-inc/raw/main/RS_198.json", target_file=TRAFFIC_JSON_FILE_PATH, overwrite=True, ) traffic_data = load_traffic_data_as_table() filtered_data = filter_and_sort_traffic_data(traffic_data) filtered_data = get_latest_data_by_country(filtered_data) payloads = create_work_item_payloads(filtered_data) @task def consume_traffic_data(): """ Inhuman Insurance, Inc. Artificial Intelligence System robot. Consumes traffic data work items. """ print("consume") def load_traffic_data_as_table(): json_data = json.load_json_from_file(TRAFFIC_JSON_FILE_PATH) return table.create_table(json_data["value"]) def filter_and_sort_traffic_data(data): max_rate = 5.0 both_genders = "BTSX" table.filter_table_by_column(data, RATE_KEY, "<", max_rate) table.filter_table_by_column(data, GENDER_KEY, "==", both_genders) table.sort_table_by_column(data, YEAR_KEY, False) return data def get_latest_data_by_country(data): data = table.group_table_by_column(data, COUNTRY_KEY) latest_data_by_country = [] for group in data: first_row = table.pop_table_row(group) latest_data_by_country.append(first_row) return latest_data_by_country def create_work_item_payloads(traffic_data): payloads = [] for row in traffic_data: payload = dict( country=row[COUNTRY_KEY], year=row[YEAR_KEY], rate=row[RATE_KEY], ) payloads.append(payload) return payloads

That's better!