# Delimited Text data Processing:
# Ruby script to read a pipe (|) delimited file, split each line into an array,
# process individual columns, re-arrange into new array and print to outfile
# Define input and Output files as variables
infile = File::open('../data/CAS_Einecs.txt', 'r')
outfile = File::open('outfile.txt', 'w')
# Iterate through lines of the infile
infile.each do |line|
# splits the line at | delimiter and collects the results into array 'cols'. Whitespace stripped
cols = line.split('|').collect {|h| h.strip}
#Convert number to string, so regex can work, then Regex removes leading zeros
cas_short = cols[2].to_s.sub(/^[0:]*/,"")
#append the stripped Cas to the array
cols.push(cas_short)
line = [] #Create a new array into where we can place the columns in any order we like
line << cols[5].to_s.chomp #Adds column as array element and removes trailing newline
line << cols[1].to_s.chomp
line << cols[3].to_s.chomp
line << cols[4].to_s.chomp
line << cols[2].to_s.chomp
# Joins the new array and appends to the outfile.
outfile.puts(line.join('|'))
end
#Close any open files.\
outfile.close
infile.close
#Success message
puts "Done!"