human_task_agent.rb 11KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292
  1. require 'rturk'
  2. module Agents
  3. class HumanTaskAgent < Agent
  4. default_schedule "every_10m"
  5. description <<-MD
  6. You can use a HumanTaskAgent to create Human Intelligence Tasks (HITs) on Mechanical Turk.
  7. HITs can be created in response to events, or on a schedule. Set `trigger_on` to either `schedule` or `event`.
  8. The schedule of this Agent is how often it should check for completed HITs, __NOT__ how often to submit one. To configure how often a new HIT
  9. should be submitted when in `schedule` mode, set `submission_period` to a number of hours.
  10. If created with an event, all HIT fields can contain interpolated values via [JSONPaths](http://goessner.net/articles/JsonPath/) placed between < and > characters.
  11. For example, if the incoming event was a Twitter event, you could make a HITT to rate its sentiment like this:
  12. {
  13. "expected_receive_period_in_days": 2,
  14. "trigger_on": "event",
  15. "hit": {
  16. "max_assignments": 1,
  17. "title": "Sentiment evaluation",
  18. "description": "Please rate the sentiment of this message: '<$.message>'",
  19. "reward": 0.05,
  20. "questions": [
  21. {
  22. "type": "selection",
  23. "key": "sentiment",
  24. "name": "Sentiment",
  25. "required": "true",
  26. "question": "Please select the best sentiment value:",
  27. "selections": [
  28. { "key": "happy", "text": "Happy" },
  29. { "key": "sad", "text": "Sad" },
  30. { "key": "neutral", "text": "Neutral" }
  31. ]
  32. },
  33. {
  34. "type": "free_text",
  35. "key": "feedback",
  36. "name": "Have any feedback for us?",
  37. "required": "false",
  38. "question": "Feedback",
  39. "default": "Type here...",
  40. "min_length": "2",
  41. "max_length": "2000"
  42. }
  43. ]
  44. }
  45. }
  46. As you can see, you configure the created HIT with the `hit` option. Required fields are `title`, which is the
  47. title of the created HIT, `description`, which is the description of the HIT, and `questions` which is an array of
  48. questions. Questions can be of `type` _selection_ or _free\\_text_. Both types require the `key`, `name`, `required`,
  49. `type`, and `question` configuration options. Additionally, _selection_ requires a `selections` array of options, each of
  50. which contain `key` and `text`. For _free\\_text_, the special configuration options are all optional, and are
  51. `default`, `min_length`, and `max_length`.
  52. If all of the `questions` are of `type` _selection_, you can set `take_majority` to _true_ at the top level to
  53. automatically select the majority vote for each question across all `max_assignments`.
  54. As with most Agents, `expected_receive_period_in_days` is required if `trigger_on` is set to `event`.
  55. MD
  56. event_description <<-MD
  57. Events look like:
  58. {
  59. }
  60. MD
  61. def validate_options
  62. errors.add(:base, "'trigger_on' must be one of 'schedule' or 'event'") unless %w[schedule event].include?(options[:trigger_on])
  63. if options[:trigger_on] == "event"
  64. errors.add(:base, "'expected_receive_period_in_days' is required when 'trigger_on' is set to 'event'") unless options[:expected_receive_period_in_days].present?
  65. elsif options[:trigger_on] == "schedule"
  66. errors.add(:base, "'submission_period' must be set to a positive number of hours when 'trigger_on' is set to 'schedule'") unless options[:submission_period].present? && options[:submission_period].to_i > 0
  67. end
  68. if options[:take_majority] == "true" && options[:hit][:questions].any? { |question| question[:type] != "selection" }
  69. errors.add(:base, "all questions must be of type 'selection' to use the 'take_majority' option")
  70. end
  71. end
  72. def default_options
  73. {
  74. :expected_receive_period_in_days => 2,
  75. :trigger_on => "event",
  76. :hit =>
  77. {
  78. :max_assignments => 1,
  79. :title => "Sentiment evaluation",
  80. :description => "Please rate the sentiment of this message: '<$.message>'",
  81. :reward => 0.05,
  82. :questions =>
  83. [
  84. {
  85. :type => "selection",
  86. :key => "sentiment",
  87. :name => "Sentiment",
  88. :required => "true",
  89. :question => "Please select the best sentiment value:",
  90. :selections =>
  91. [
  92. { :key => "happy", :text => "Happy" },
  93. { :key => "sad", :text => "Sad" },
  94. { :key => "neutral", :text => "Neutral" }
  95. ]
  96. },
  97. {
  98. :type => "free_text",
  99. :key => "feedback",
  100. :name => "Have any feedback for us?",
  101. :required => "false",
  102. :question => "Feedback",
  103. :default => "Type here...",
  104. :min_length => "2",
  105. :max_length => "2000"
  106. }
  107. ]
  108. }
  109. }
  110. end
  111. def working?
  112. last_receive_at && last_receive_at > options[:expected_receive_period_in_days].to_i.days.ago && !recent_error_logs?
  113. end
  114. def check
  115. setup!
  116. review_hits
  117. if options[:trigger_on] == "schedule" && (memory[:last_schedule] || 0) <= Time.now.to_i - options[:submission_period].to_i * 60 * 60
  118. memory[:last_schedule] = Time.now.to_i
  119. create_hit
  120. end
  121. end
  122. def receive(incoming_events)
  123. if options[:trigger_on] == "event"
  124. setup!
  125. incoming_events.each do |event|
  126. create_hit event
  127. end
  128. end
  129. end
  130. # To be moved either into an initilizer or a per-agent setting.
  131. def setup!
  132. RTurk::logger.level = Logger::DEBUG
  133. RTurk.setup(ENV['AWS_ACCESS_KEY_ID'], ENV['AWS_ACCESS_KEY'], :sandbox => ENV['AWS_SANDBOX'] == "true") unless Rails.env.test?
  134. end
  135. protected
  136. def review_hits
  137. reviewable_hit_ids = RTurk::GetReviewableHITs.create.hit_ids
  138. my_reviewed_hit_ids = reviewable_hit_ids & (memory[:hits] || {}).keys.map(&:to_s)
  139. log "MTurk reports the following HITs [#{reviewable_hit_ids.to_sentence}], of which I own [#{my_reviewed_hit_ids.to_sentence}]"
  140. my_reviewed_hit_ids.each do |hit_id|
  141. hit = RTurk::Hit.new(hit_id)
  142. assignments = hit.assignments
  143. log "Looking at HIT #{hit_id}. I found #{assignments.length} assignments#{" with the statuses: #{assignments.map(&:status).to_sentence}" if assignments.length > 0}"
  144. if assignments.length == hit.max_assignments && assignments.all? { |assignment| assignment.status == "Submitted" }
  145. if options[:take_majority] == "true"
  146. counts = {}
  147. options[:hit][:questions].each do |question|
  148. question_counts = question[:selections].inject({}) { |memo, selection| memo[selection[:key]] = 0; memo }
  149. assignments.each do |assignment|
  150. answers = ActiveSupport::HashWithIndifferentAccess.new(assignment.answers)
  151. answer = answers[question[:key]]
  152. question_counts[answer] += 1
  153. end
  154. counts[question[:key]] = question_counts
  155. end
  156. majority_answer = counts.inject({}) do |memo, (key, question_counts)|
  157. memo[key] = question_counts.to_a.sort {|a, b| a.last <=> b.last }.last.first
  158. memo
  159. end
  160. event = create_event :payload => { :answers => assignments.map(&:answers), :counts => counts, :majority_answer => majority_answer }
  161. else
  162. event = create_event :payload => { :answers => assignments.map(&:answers) }
  163. end
  164. log "Event emitted with answer(s)", :outbound_event => event, :inbound_event => Event.find_by_id(memory[:hits][hit_id.to_sym])
  165. assignments.each(&:approve!)
  166. memory[:hits].delete(hit_id.to_sym)
  167. end
  168. end
  169. end
  170. def create_hit(event = nil)
  171. payload = event ? event.payload : {}
  172. title = Utils.interpolate_jsonpaths(options[:hit][:title], payload).strip
  173. description = Utils.interpolate_jsonpaths(options[:hit][:description], payload).strip
  174. questions = Utils.recursively_interpolate_jsonpaths(options[:hit][:questions], payload)
  175. hit = RTurk::Hit.create(:title => title) do |hit|
  176. hit.max_assignments = (options[:hit][:max_assignments] || 1).to_i
  177. hit.description = description
  178. hit.question_form AgentQuestionForm.new(:title => title, :description => description, :questions => questions)
  179. hit.reward = (options[:hit][:reward] || 0.05).to_f
  180. #hit.qualifications.add :approval_rate, { :gt => 80 }
  181. end
  182. memory[:hits] ||= {}
  183. memory[:hits][hit.id] = event && event.id
  184. log "HIT created with ID #{hit.id} and URL #{hit.url}", :inbound_event => event
  185. end
  186. # RTurk Question Form
  187. class AgentQuestionForm < RTurk::QuestionForm
  188. needs :title, :description, :questions
  189. def question_form_content
  190. Overview do
  191. Title do
  192. text @title
  193. end
  194. Text do
  195. text @description
  196. end
  197. end
  198. @questions.each.with_index do |question, index|
  199. Question do
  200. QuestionIdentifier do
  201. text question[:key] || "question_#{index}"
  202. end
  203. DisplayName do
  204. text question[:name] || "Question ##{index}"
  205. end
  206. IsRequired do
  207. text question[:required] || 'true'
  208. end
  209. QuestionContent do
  210. Text do
  211. text question[:question]
  212. end
  213. end
  214. AnswerSpecification do
  215. if question[:type] == "selection"
  216. SelectionAnswer do
  217. StyleSuggestion do
  218. text 'radiobutton'
  219. end
  220. Selections do
  221. question[:selections].each do |selection|
  222. Selection do
  223. SelectionIdentifier do
  224. text selection[:key]
  225. end
  226. Text do
  227. text selection[:text]
  228. end
  229. end
  230. end
  231. end
  232. end
  233. else
  234. FreeTextAnswer do
  235. if question[:min_length].present? || question[:max_length].present?
  236. Constraints do
  237. lengths = {}
  238. lengths[:minLength] = question[:min_length].to_s if question[:min_length].present?
  239. lengths[:maxLength] = question[:max_length].to_s if question[:max_length].present?
  240. Length lengths
  241. end
  242. end
  243. if question[:default].present?
  244. DefaultText do
  245. text question[:default]
  246. end
  247. end
  248. end
  249. end
  250. end
  251. end
  252. end
  253. end
  254. end
  255. end
  256. end