human_task_agent.rb 13KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331
  1. require 'rturk'
  2. module Agents
  3. class HumanTaskAgent < Agent
  4. default_schedule "every_10m"
  5. description <<-MD
  6. You can use a HumanTaskAgent to create Human Intelligence Tasks (HITs) on Mechanical Turk.
  7. HITs can be created in response to events, or on a schedule. Set `trigger_on` to either `schedule` or `event`.
  8. The schedule of this Agent is how often it should check for completed HITs, __NOT__ how often to submit one. To configure how often a new HIT
  9. should be submitted when in `schedule` mode, set `submission_period` to a number of hours.
  10. If created with an event, all HIT fields can contain interpolated values via [JSONPaths](http://goessner.net/articles/JsonPath/) placed between < and > characters.
  11. For example, if the incoming event was a Twitter event, you could make a HITT to rate its sentiment like this:
  12. {
  13. "expected_receive_period_in_days": 2,
  14. "trigger_on": "event",
  15. "hit": {
  16. "assignments": 1,
  17. "title": "Sentiment evaluation",
  18. "description": "Please rate the sentiment of this message: '<$.message>'",
  19. "reward": 0.05,
  20. "lifetime_in_seconds": "3600",
  21. "questions": [
  22. {
  23. "type": "selection",
  24. "key": "sentiment",
  25. "name": "Sentiment",
  26. "required": "true",
  27. "question": "Please select the best sentiment value:",
  28. "selections": [
  29. { "key": "happy", "text": "Happy" },
  30. { "key": "sad", "text": "Sad" },
  31. { "key": "neutral", "text": "Neutral" }
  32. ]
  33. },
  34. {
  35. "type": "free_text",
  36. "key": "feedback",
  37. "name": "Have any feedback for us?",
  38. "required": "false",
  39. "question": "Feedback",
  40. "default": "Type here...",
  41. "min_length": "2",
  42. "max_length": "2000"
  43. }
  44. ]
  45. }
  46. }
  47. As you can see, you configure the created HIT with the `hit` option. Required fields are `title`, which is the
  48. title of the created HIT, `description`, which is the description of the HIT, and `questions` which is an array of
  49. questions. Questions can be of `type` _selection_ or _free\\_text_. Both types require the `key`, `name`, `required`,
  50. `type`, and `question` configuration options. Additionally, _selection_ requires a `selections` array of options, each of
  51. which contain `key` and `text`. For _free\\_text_, the special configuration options are all optional, and are
  52. `default`, `min_length`, and `max_length`.
  53. If all of the `questions` are of `type` _selection_, you can set `take_majority` to _true_ at the top level to
  54. automatically select the majority vote for each question across all `assignments`. If all selections are numeric, an `average_answer` will also be generated.
  55. `lifetime_in_seconds` is the number of seconds a HIT is left on Amazon before it's automatically closed. The default is 1 day.
  56. As with most Agents, `expected_receive_period_in_days` is required if `trigger_on` is set to `event`.
  57. MD
  58. event_description <<-MD
  59. Events look like:
  60. {
  61. }
  62. MD
  63. def validate_options
  64. options['hit'] ||= {}
  65. options['hit']['questions'] ||= []
  66. errors.add(:base, "'trigger_on' must be one of 'schedule' or 'event'") unless %w[schedule event].include?(options['trigger_on'])
  67. errors.add(:base, "'hit.assignments' should specify the number of HIT assignments to create") unless options['hit']['assignments'].present? && options['hit']['assignments'].to_i > 0
  68. errors.add(:base, "'hit.title' must be provided") unless options['hit']['title'].present?
  69. errors.add(:base, "'hit.description' must be provided") unless options['hit']['description'].present?
  70. errors.add(:base, "'hit.questions' must be provided") unless options['hit']['questions'].present? && options['hit']['questions'].length > 0
  71. if options['trigger_on'] == "event"
  72. errors.add(:base, "'expected_receive_period_in_days' is required when 'trigger_on' is set to 'event'") unless options['expected_receive_period_in_days'].present?
  73. elsif options['trigger_on'] == "schedule"
  74. errors.add(:base, "'submission_period' must be set to a positive number of hours when 'trigger_on' is set to 'schedule'") unless options['submission_period'].present? && options['submission_period'].to_i > 0
  75. end
  76. if options['hit']['questions'].any? { |question| %w[key name required type question].any? {|k| !question[k].present? } }
  77. errors.add(:base, "all questions must set 'key', 'name', 'required', 'type', and 'question'")
  78. end
  79. if options['hit']['questions'].any? { |question| question['type'] == "selection" && (!question['selections'].present? || question['selections'].length == 0 || !question['selections'].all? {|s| s['key'].present? } || !question['selections'].all? { |s| s['text'].present? })}
  80. errors.add(:base, "all questions of type 'selection' must have a selections array with selections that set 'key' and 'name'")
  81. end
  82. if options['take_majority'] == "true" && options['hit']['questions'].any? { |question| question['type'] != "selection" }
  83. errors.add(:base, "all questions must be of type 'selection' to use the 'take_majority' option")
  84. end
  85. end
  86. def default_options
  87. {
  88. 'expected_receive_period_in_days' => 2,
  89. 'trigger_on' => "event",
  90. 'hit' =>
  91. {
  92. 'assignments' => 1,
  93. 'title' => "Sentiment evaluation",
  94. 'description' => "Please rate the sentiment of this message: '<$.message>'",
  95. 'reward' => 0.05,
  96. 'lifetime_in_seconds' => 24 * 60 * 60,
  97. 'questions' =>
  98. [
  99. {
  100. 'type' => "selection",
  101. 'key' => "sentiment",
  102. 'name' => "Sentiment",
  103. 'required' => "true",
  104. 'question' => "Please select the best sentiment value:",
  105. 'selections' =>
  106. [
  107. { 'key' => "happy", 'text' => "Happy" },
  108. { 'key' => "sad", 'text' => "Sad" },
  109. { 'key' => "neutral", 'text' => "Neutral" }
  110. ]
  111. },
  112. {
  113. 'type' => "free_text",
  114. 'key' => "feedback",
  115. 'name' => "Have any feedback for us?",
  116. 'required' => "false",
  117. 'question' => "Feedback",
  118. 'default' => "Type here...",
  119. 'min_length' => "2",
  120. 'max_length' => "2000"
  121. }
  122. ]
  123. }
  124. }
  125. end
  126. def working?
  127. last_receive_at && last_receive_at > options['expected_receive_period_in_days'].to_i.days.ago && !recent_error_logs?
  128. end
  129. def check
  130. review_hits
  131. if options['trigger_on'] == "schedule" && (memory['last_schedule'] || 0) <= Time.now.to_i - options['submission_period'].to_i * 60 * 60
  132. memory['last_schedule'] = Time.now.to_i
  133. create_hit
  134. end
  135. end
  136. def receive(incoming_events)
  137. if options['trigger_on'] == "event"
  138. incoming_events.each do |event|
  139. create_hit event
  140. end
  141. end
  142. end
  143. protected
  144. def review_hits
  145. reviewable_hit_ids = RTurk::GetReviewableHITs.create.hit_ids
  146. my_reviewed_hit_ids = reviewable_hit_ids & (memory['hits'] || {}).keys
  147. if reviewable_hit_ids.length > 0
  148. log "MTurk reports #{reviewable_hit_ids.length} HITs, of which I own [#{my_reviewed_hit_ids.to_sentence}]"
  149. end
  150. my_reviewed_hit_ids.each do |hit_id|
  151. hit = RTurk::Hit.new(hit_id)
  152. assignments = hit.assignments
  153. log "Looking at HIT #{hit_id}. I found #{assignments.length} assignments#{" with the statuses: #{assignments.map(&:status).to_sentence}" if assignments.length > 0}"
  154. if assignments.length == hit.max_assignments && assignments.all? { |assignment| assignment.status == "Submitted" }
  155. payload = { 'answers' => assignments.map(&:answers) }
  156. if options['take_majority'] == "true"
  157. counts = {}
  158. options['hit']['questions'].each do |question|
  159. question_counts = question['selections'].inject({}) { |memo, selection| memo[selection['key']] = 0; memo }
  160. assignments.each do |assignment|
  161. answers = ActiveSupport::HashWithIndifferentAccess.new(assignment.answers)
  162. answer = answers[question['key']]
  163. question_counts[answer] += 1
  164. end
  165. counts[question['key']] = question_counts
  166. end
  167. payload['counts'] = counts
  168. majority_answer = counts.inject({}) do |memo, (key, question_counts)|
  169. memo[key] = question_counts.to_a.sort {|a, b| a.last <=> b.last }.last.first
  170. memo
  171. end
  172. payload['majority_answer'] = majority_answer
  173. if all_questions_are_numeric?
  174. average_answer = counts.inject({}) do |memo, (key, question_counts)|
  175. sum = divisor = 0
  176. question_counts.to_a.each do |num, count|
  177. sum += num.to_s.to_f * count
  178. divisor += count
  179. end
  180. memo[key] = sum / divisor.to_f
  181. memo
  182. end
  183. payload['average_answer'] = average_answer
  184. end
  185. end
  186. event = create_event :payload => payload
  187. log "Event emitted with answer(s)", :outbound_event => event, :inbound_event => Event.find_by_id(memory['hits'][hit_id])
  188. assignments.each(&:approve!)
  189. hit.dispose!
  190. memory['hits'].delete(hit_id)
  191. end
  192. end
  193. end
  194. def all_questions_are_numeric?
  195. options['hit']['questions'].all? do |question|
  196. question['selections'].all? do |selection|
  197. selection['key'] == selection['key'].to_f.to_s || selection['key'] == selection['key'].to_i.to_s
  198. end
  199. end
  200. end
  201. def create_hit(event = nil)
  202. payload = event ? event.payload : {}
  203. title = Utils.interpolate_jsonpaths(options['hit']['title'], payload).strip
  204. description = Utils.interpolate_jsonpaths(options['hit']['description'], payload).strip
  205. questions = Utils.recursively_interpolate_jsonpaths(options['hit']['questions'], payload)
  206. hit = RTurk::Hit.create(:title => title) do |hit|
  207. hit.max_assignments = (options['hit']['assignments'] || 1).to_i
  208. hit.description = description
  209. hit.lifetime = (options['hit']['lifetime_in_seconds'] || 24 * 60 * 60).to_i
  210. hit.question_form AgentQuestionForm.new(:title => title, :description => description, :questions => questions)
  211. hit.reward = (options['hit']['reward'] || 0.05).to_f
  212. #hit.qualifications.add :approval_rate, { :gt => 80 }
  213. end
  214. memory['hits'] ||= {}
  215. memory['hits'][hit.id] = event && event.id
  216. log "HIT created with ID #{hit.id} and URL #{hit.url}", :inbound_event => event
  217. end
  218. # RTurk Question Form
  219. class AgentQuestionForm < RTurk::QuestionForm
  220. needs :title, :description, :questions
  221. def question_form_content
  222. Overview do
  223. Title do
  224. text @title
  225. end
  226. Text do
  227. text @description
  228. end
  229. end
  230. @questions.each.with_index do |question, index|
  231. Question do
  232. QuestionIdentifier do
  233. text question['key'] || "question_#{index}"
  234. end
  235. DisplayName do
  236. text question['name'] || "Question ##{index}"
  237. end
  238. IsRequired do
  239. text question['required'] || 'true'
  240. end
  241. QuestionContent do
  242. Text do
  243. text question['question']
  244. end
  245. end
  246. AnswerSpecification do
  247. if question['type'] == "selection"
  248. SelectionAnswer do
  249. StyleSuggestion do
  250. text 'radiobutton'
  251. end
  252. Selections do
  253. question['selections'].each do |selection|
  254. Selection do
  255. SelectionIdentifier do
  256. text selection['key']
  257. end
  258. Text do
  259. text selection['text']
  260. end
  261. end
  262. end
  263. end
  264. end
  265. else
  266. FreeTextAnswer do
  267. if question['min_length'].present? || question['max_length'].present?
  268. Constraints do
  269. lengths = {}
  270. lengths['minLength'] = question['min_length'].to_s if question['min_length'].present?
  271. lengths['maxLength'] = question['max_length'].to_s if question['max_length'].present?
  272. Length lengths
  273. end
  274. end
  275. if question['default'].present?
  276. DefaultText do
  277. text question['default']
  278. end
  279. end
  280. end
  281. end
  282. end
  283. end
  284. end
  285. end
  286. end
  287. end
  288. end