I am importing CSVs to my data lake through ION. The business has requested that UIDs be added to each line of every CSV for their purposes. Current flow is as follows:
Get File from SFTP -> Apply object schema "Report" -> run Script -> out-put object "Report_with_UID"
However, the data flow appears to work just fine, but the preview of the lake object is messed up and the object cannot be queried in Birst.
import csv
import datetime
USE_PACIFIC_TIME = True
MINIMUM_LINE_NUMBER_LENGTH = 3
TRIM_TO_3_MICROSECONDS = True
UID_COLUMN_NAME_TO_ADD = 'UID'
def get_uid(current_line_number):
# Helper function that formats the UID in this format:
# 20210214.182433.555.001 (yyyymmddHHMMSSsss) + line_number_identifier
date_format = "%Y%m%d.%H%M%S.%f"
#now = datetime.datetime.now(tz=utc).astimezone(timezone('US/Pacific'))
now = datetime.datetime.now()
if USE_PACIFIC_TIME:
from pytz import timezone, utc
now = now.astimezone(timezone('US/Pacific'))
uid = now.strftime(date_format)
if TRIM_TO_3_MICROSECONDS:
uid = uid[:-3]
# format the line number:
prefix = ''
number_of_zeros_to_prefix = MINIMUM_LINE_NUMBER_LENGTH - len(str(current_line_number))
if number_of_zeros_to_prefix > 0:
prefix += '0' * number_of_zeros_to_prefix
line_number_identifier = prefix + str(current_line_number)
# append current line number at the end.
uid += '.%s' % line_number_identifier
return uid
def add_uid_to_raw_csv(raw_csv):
line_number = 0
lines = raw_csv.split('
')
raw_out = ''
for line in lines:
if line == '':
continue
new_line = line
if line_number == 0:
#Use this line if CSV lines terminate with ','
new_line += '"%s",' % UID_COLUMN_NAME_TO_ADD
#Use this line if CSV lines do not terminate in ','
#new_line += ',"%s"' % UID_COLUMN_NAME_TO_ADD
else:
#Use this for CSV lines that terminate with ','
new_line += '"%s",' % get_uid(line_number)
#Use this if the CSV lines do not terminate in ','
#new_line += ',"%s"' % get_uid(line_number)
line_number += 1
raw_out += new_line + '
'
return raw_out
# Infor ION syntax takes the value from input_var_1 and exports it to output_var_1
output_var_1 = add_uid_to_raw_csv(input_var_1)
```