Wednesday, November 14, 2012

Ruby code to parse and combine text files

I use this ruby code to parse several tab-delimitted text files that contain individual raters' perceptions of a target (in this case a video). The rater id is embedded in the filename. The target video number is also embedded in the filename.


#! /usr/bin/env ruby

out = Dir.glob('*.txt')

# open the file to write to and add the column headers
columns = "group\trater\tmin\tengage\tprepare\tdiverge\tconverge\texecute\tcentralize\tattentive\ttone\tactivation\n" 
File.open("./all_ratings.txt", 'w') { |f| f.write(columns) }

out.each do |filename|
  rater = filename.split('.')[0].split('_')[0]
  group = filename.split('.')[0].split('_')[1]  
 
  # Assign a number for the rater
  case rater.downcase
    when "rater1"
      rater_id = 1
    when "rater2"
      rater_id = 2
    when "rater3"
      rater_id = 3
    when "rater4"
      rater_id = 4
    end
    puts "rater: " + rater + "(#{rater_id})" + " group: " + group

    # Open the file
    f = File.open(filename, "r").read
 
    # Split by lines - This will make sure that the end of line from Mac Classic is \n
    str = f.gsub!(/\r\n?/, "\n").split("\n")
 
    # Identify the line number that starts the data entry for this file by finding a specific expression in the text of the file
 
    linenum = 0
    exp = "- Low marked by sluggishness"
    line = str[linenum]
    puts line
    until line.include?(exp)    
      line = str[linenum] 
      linenum += 1
    end
 
    linenum.upto(linenum+30) do |currentline|
      min = (currentline-linenum)+1
      # add the rater_id and the group_id to the line
      line = group.to_s + "\t" + rater_id.to_s + "\t" + str[currentline] + "\n"
      File.open("./all_ratings.txt", 'a') { |f| f.write(line) }
    end
end


No comments:

Post a Comment