Няма описание http://j1x-huginn.herokuapp.com

human_task_agent.rb 19KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485
  1. require 'rturk'
  2. module Agents
  3. class HumanTaskAgent < Agent
  4. default_schedule "every_10m"
  5. description <<-MD
  6. You can use a HumanTaskAgent to create Human Intelligence Tasks (HITs) on Mechanical Turk.
  7. HITs can be created in response to events, or on a schedule. Set `trigger_on` to either `schedule` or `event`.
  8. # Schedule
  9. The schedule of this Agent is how often it should check for completed HITs, __NOT__ how often to submit one. To configure how often a new HIT
  10. should be submitted when in `schedule` mode, set `submission_period` to a number of hours.
  11. # Example
  12. If created with an event, all HIT fields can contain interpolated values via [liquid templating](https://github.com/cantino/huginn/wiki/Formatting-Events-using-Liquid).
  13. For example, if the incoming event was a Twitter event, you could make a HITT to rate its sentiment like this:
  14. {
  15. "expected_receive_period_in_days": 2,
  16. "trigger_on": "event",
  17. "hit": {
  18. "assignments": 1,
  19. "title": "Sentiment evaluation",
  20. "description": "Please rate the sentiment of this message: '{{message}}'",
  21. "reward": 0.05,
  22. "lifetime_in_seconds": "3600",
  23. "questions": [
  24. {
  25. "type": "selection",
  26. "key": "sentiment",
  27. "name": "Sentiment",
  28. "required": "true",
  29. "question": "Please select the best sentiment value:",
  30. "selections": [
  31. { "key": "happy", "text": "Happy" },
  32. { "key": "sad", "text": "Sad" },
  33. { "key": "neutral", "text": "Neutral" }
  34. ]
  35. },
  36. {
  37. "type": "free_text",
  38. "key": "feedback",
  39. "name": "Have any feedback for us?",
  40. "required": "false",
  41. "question": "Feedback",
  42. "default": "Type here...",
  43. "min_length": "2",
  44. "max_length": "2000"
  45. }
  46. ]
  47. }
  48. }
  49. As you can see, you configure the created HIT with the `hit` option. Required fields are `title`, which is the
  50. title of the created HIT, `description`, which is the description of the HIT, and `questions` which is an array of
  51. questions. Questions can be of `type` _selection_ or _free\\_text_. Both types require the `key`, `name`, `required`,
  52. `type`, and `question` configuration options. Additionally, _selection_ requires a `selections` array of options, each of
  53. which contain `key` and `text`. For _free\\_text_, the special configuration options are all optional, and are
  54. `default`, `min_length`, and `max_length`.
  55. # Combining answers
  56. There are a couple of ways to combine HITs that have multiple `assignments`, all of which involve setting `combination_mode` at the top level.
  57. ## Taking the majority
  58. Option 1: if all of your `questions` are of `type` _selection_, you can set `combination_mode` to `take_majority`.
  59. This will cause the Agent to automatically select the majority vote for each question across all `assignments` and return it as `majority_answer`.
  60. If all selections are numeric, an `average_answer` will also be generated.
  61. Option 2: you can have the Agent ask additional human workers to rank the `assignments` and return the most highly ranked answer.
  62. To do this, set `combination_mode` to `poll` and provide a `poll_options` object. Here is an example:
  63. {
  64. "trigger_on": "schedule",
  65. "submission_period": 12,
  66. "combination_mode": "poll",
  67. "poll_options": {
  68. "title": "Take a poll about some jokes",
  69. "instructions": "Please rank these jokes from most funny (5) to least funny (1)",
  70. "assignments": 3,
  71. "row_template": "{{joke}}"
  72. },
  73. "hit": {
  74. "assignments": 5,
  75. "title": "Tell a joke",
  76. "description": "Please tell me a joke",
  77. "reward": 0.05,
  78. "lifetime_in_seconds": "3600",
  79. "questions": [
  80. {
  81. "type": "free_text",
  82. "key": "joke",
  83. "name": "Your joke",
  84. "required": "true",
  85. "question": "Joke",
  86. "min_length": "2",
  87. "max_length": "2000"
  88. }
  89. ]
  90. }
  91. }
  92. Resulting events will have the original `answers`, as well as the `poll` results, and a field called `best_answer` that contains the best answer as determined by the poll.
  93. # Other settings
  94. `lifetime_in_seconds` is the number of seconds a HIT is left on Amazon before it's automatically closed. The default is 1 day.
  95. As with most Agents, `expected_receive_period_in_days` is required if `trigger_on` is set to `event`.
  96. MD
  97. event_description <<-MD
  98. Events look like:
  99. {
  100. "answers": [
  101. {
  102. "feedback": "Hello!",
  103. "sentiment": "happy"
  104. }
  105. ]
  106. }
  107. MD
  108. def validate_options
  109. options['hit'] ||= {}
  110. options['hit']['questions'] ||= []
  111. errors.add(:base, "'trigger_on' must be one of 'schedule' or 'event'") unless %w[schedule event].include?(options['trigger_on'])
  112. errors.add(:base, "'hit.assignments' should specify the number of HIT assignments to create") unless options['hit']['assignments'].present? && options['hit']['assignments'].to_i > 0
  113. errors.add(:base, "'hit.title' must be provided") unless options['hit']['title'].present?
  114. errors.add(:base, "'hit.description' must be provided") unless options['hit']['description'].present?
  115. errors.add(:base, "'hit.questions' must be provided") unless options['hit']['questions'].present? && options['hit']['questions'].length > 0
  116. if options['trigger_on'] == "event"
  117. errors.add(:base, "'expected_receive_period_in_days' is required when 'trigger_on' is set to 'event'") unless options['expected_receive_period_in_days'].present?
  118. elsif options['trigger_on'] == "schedule"
  119. errors.add(:base, "'submission_period' must be set to a positive number of hours when 'trigger_on' is set to 'schedule'") unless options['submission_period'].present? && options['submission_period'].to_i > 0
  120. end
  121. if options['hit']['questions'].any? { |question| %w[key name required type question].any? {|k| !question[k].present? } }
  122. errors.add(:base, "all questions must set 'key', 'name', 'required', 'type', and 'question'")
  123. end
  124. if options['hit']['questions'].any? { |question| question['type'] == "selection" && (!question['selections'].present? || question['selections'].length == 0 || !question['selections'].all? {|s| s['key'].present? } || !question['selections'].all? { |s| s['text'].present? })}
  125. errors.add(:base, "all questions of type 'selection' must have a selections array with selections that set 'key' and 'name'")
  126. end
  127. if take_majority? && options['hit']['questions'].any? { |question| question['type'] != "selection" }
  128. errors.add(:base, "all questions must be of type 'selection' to use the 'take_majority' option")
  129. end
  130. if create_poll?
  131. errors.add(:base, "poll_options is required when combination_mode is set to 'poll' and must have the keys 'title', 'instructions', 'row_template', and 'assignments'") unless options['poll_options'].is_a?(Hash) && options['poll_options']['title'].present? && options['poll_options']['instructions'].present? && options['poll_options']['row_template'].present? && options['poll_options']['assignments'].to_i > 0
  132. end
  133. end
  134. def default_options
  135. {
  136. 'expected_receive_period_in_days' => 2,
  137. 'trigger_on' => "event",
  138. 'hit' =>
  139. {
  140. 'assignments' => 1,
  141. 'title' => "Sentiment evaluation",
  142. 'description' => "Please rate the sentiment of this message: '{{message}}'",
  143. 'reward' => 0.05,
  144. 'lifetime_in_seconds' => 24 * 60 * 60,
  145. 'questions' =>
  146. [
  147. {
  148. 'type' => "selection",
  149. 'key' => "sentiment",
  150. 'name' => "Sentiment",
  151. 'required' => "true",
  152. 'question' => "Please select the best sentiment value:",
  153. 'selections' =>
  154. [
  155. { 'key' => "happy", 'text' => "Happy" },
  156. { 'key' => "sad", 'text' => "Sad" },
  157. { 'key' => "neutral", 'text' => "Neutral" }
  158. ]
  159. },
  160. {
  161. 'type' => "free_text",
  162. 'key' => "feedback",
  163. 'name' => "Have any feedback for us?",
  164. 'required' => "false",
  165. 'question' => "Feedback",
  166. 'default' => "Type here...",
  167. 'min_length' => "2",
  168. 'max_length' => "2000"
  169. }
  170. ]
  171. }
  172. }
  173. end
  174. def working?
  175. last_receive_at && last_receive_at > interpolated['expected_receive_period_in_days'].to_i.days.ago && !recent_error_logs?
  176. end
  177. def check
  178. review_hits
  179. if interpolated['trigger_on'] == "schedule" && (memory['last_schedule'] || 0) <= Time.now.to_i - interpolated['submission_period'].to_i * 60 * 60
  180. memory['last_schedule'] = Time.now.to_i
  181. create_basic_hit
  182. end
  183. end
  184. def receive(incoming_events)
  185. if interpolated['trigger_on'] == "event"
  186. incoming_events.each do |event|
  187. create_basic_hit event
  188. end
  189. end
  190. end
  191. protected
  192. def take_majority?
  193. interpolated['combination_mode'] == "take_majority" || interpolated['take_majority'] == "true"
  194. end
  195. def create_poll?
  196. interpolated['combination_mode'] == "poll"
  197. end
  198. def event_for_hit(hit_id)
  199. if memory['hits'][hit_id].is_a?(Hash)
  200. Event.find_by_id(memory['hits'][hit_id]['event_id'])
  201. else
  202. nil
  203. end
  204. end
  205. def hit_type(hit_id)
  206. if memory['hits'][hit_id].is_a?(Hash) && memory['hits'][hit_id]['type']
  207. memory['hits'][hit_id]['type']
  208. else
  209. 'user'
  210. end
  211. end
  212. def review_hits
  213. reviewable_hit_ids = RTurk::GetReviewableHITs.create.hit_ids
  214. my_reviewed_hit_ids = reviewable_hit_ids & (memory['hits'] || {}).keys
  215. if reviewable_hit_ids.length > 0
  216. log "MTurk reports #{reviewable_hit_ids.length} HITs, of which I own [#{my_reviewed_hit_ids.to_sentence}]"
  217. end
  218. my_reviewed_hit_ids.each do |hit_id|
  219. hit = RTurk::Hit.new(hit_id)
  220. assignments = hit.assignments
  221. log "Looking at HIT #{hit_id}. I found #{assignments.length} assignments#{" with the statuses: #{assignments.map(&:status).to_sentence}" if assignments.length > 0}"
  222. if assignments.length == hit.max_assignments && assignments.all? { |assignment| assignment.status == "Submitted" }
  223. inbound_event = event_for_hit(hit_id)
  224. if hit_type(hit_id) == 'poll'
  225. # handle completed polls
  226. log "Handling a poll: #{hit_id}"
  227. scores = {}
  228. assignments.each do |assignment|
  229. assignment.answers.each do |index, rating|
  230. scores[index] ||= 0
  231. scores[index] += rating.to_i
  232. end
  233. end
  234. top_answer = scores.to_a.sort {|b, a| a.last <=> b.last }.first.first
  235. payload = {
  236. 'answers' => memory['hits'][hit_id]['answers'],
  237. 'poll' => assignments.map(&:answers),
  238. 'best_answer' => memory['hits'][hit_id]['answers'][top_answer.to_i - 1]
  239. }
  240. event = create_event :payload => payload
  241. log "Event emitted with answer(s) for poll", :outbound_event => event, :inbound_event => inbound_event
  242. else
  243. # handle normal completed HITs
  244. payload = { 'answers' => assignments.map(&:answers) }
  245. if take_majority?
  246. counts = {}
  247. options['hit']['questions'].each do |question|
  248. question_counts = question['selections'].inject({}) { |memo, selection| memo[selection['key']] = 0; memo }
  249. assignments.each do |assignment|
  250. answers = ActiveSupport::HashWithIndifferentAccess.new(assignment.answers)
  251. answer = answers[question['key']]
  252. question_counts[answer] += 1
  253. end
  254. counts[question['key']] = question_counts
  255. end
  256. payload['counts'] = counts
  257. majority_answer = counts.inject({}) do |memo, (key, question_counts)|
  258. memo[key] = question_counts.to_a.sort {|a, b| a.last <=> b.last }.last.first
  259. memo
  260. end
  261. payload['majority_answer'] = majority_answer
  262. if all_questions_are_numeric?
  263. average_answer = counts.inject({}) do |memo, (key, question_counts)|
  264. sum = divisor = 0
  265. question_counts.to_a.each do |num, count|
  266. sum += num.to_s.to_f * count
  267. divisor += count
  268. end
  269. memo[key] = sum / divisor.to_f
  270. memo
  271. end
  272. payload['average_answer'] = average_answer
  273. end
  274. end
  275. if create_poll?
  276. questions = []
  277. selections = 5.times.map { |i| { 'key' => i+1, 'text' => i+1 } }.reverse
  278. assignments.length.times do |index|
  279. questions << {
  280. 'type' => "selection",
  281. 'name' => "Item #{index + 1}",
  282. 'key' => index,
  283. 'required' => "true",
  284. 'question' => interpolate_string(options['poll_options']['row_template'], assignments[index].answers),
  285. 'selections' => selections
  286. }
  287. end
  288. poll_hit = create_hit 'title' => options['poll_options']['title'],
  289. 'description' => options['poll_options']['instructions'],
  290. 'questions' => questions,
  291. 'assignments' => options['poll_options']['assignments'],
  292. 'lifetime_in_seconds' => options['poll_options']['lifetime_in_seconds'],
  293. 'reward' => options['poll_options']['reward'],
  294. 'payload' => inbound_event && inbound_event.payload,
  295. 'metadata' => { 'type' => 'poll',
  296. 'original_hit' => hit_id,
  297. 'answers' => assignments.map(&:answers),
  298. 'event_id' => inbound_event && inbound_event.id }
  299. log "Poll HIT created with ID #{poll_hit.id} and URL #{poll_hit.url}. Original HIT: #{hit_id}", :inbound_event => inbound_event
  300. else
  301. event = create_event :payload => payload
  302. log "Event emitted with answer(s)", :outbound_event => event, :inbound_event => inbound_event
  303. end
  304. end
  305. assignments.each(&:approve!)
  306. hit.dispose!
  307. memory['hits'].delete(hit_id)
  308. end
  309. end
  310. end
  311. def all_questions_are_numeric?
  312. interpolated['hit']['questions'].all? do |question|
  313. question['selections'].all? do |selection|
  314. selection['key'] == selection['key'].to_f.to_s || selection['key'] == selection['key'].to_i.to_s
  315. end
  316. end
  317. end
  318. def create_basic_hit(event = nil)
  319. hit = create_hit 'title' => options['hit']['title'],
  320. 'description' => options['hit']['description'],
  321. 'questions' => options['hit']['questions'],
  322. 'assignments' => options['hit']['assignments'],
  323. 'lifetime_in_seconds' => options['hit']['lifetime_in_seconds'],
  324. 'reward' => options['hit']['reward'],
  325. 'payload' => event && event.payload,
  326. 'metadata' => { 'event_id' => event && event.id }
  327. log "HIT created with ID #{hit.id} and URL #{hit.url}", :inbound_event => event
  328. end
  329. def create_hit(opts = {})
  330. payload = opts['payload'] || {}
  331. title = interpolate_string(opts['title'], payload).strip
  332. description = interpolate_string(opts['description'], payload).strip
  333. questions = interpolate_options(opts['questions'], payload)
  334. hit = RTurk::Hit.create(:title => title) do |hit|
  335. hit.max_assignments = (opts['assignments'] || 1).to_i
  336. hit.description = description
  337. hit.lifetime = (opts['lifetime_in_seconds'] || 24 * 60 * 60).to_i
  338. hit.question_form AgentQuestionForm.new(:title => title, :description => description, :questions => questions)
  339. hit.reward = (opts['reward'] || 0.05).to_f
  340. #hit.qualifications.add :approval_rate, { :gt => 80 }
  341. end
  342. memory['hits'] ||= {}
  343. memory['hits'][hit.id] = opts['metadata'] || {}
  344. hit
  345. end
  346. # RTurk Question Form
  347. class AgentQuestionForm < RTurk::QuestionForm
  348. needs :title, :description, :questions
  349. def question_form_content
  350. Overview do
  351. Title do
  352. text @title
  353. end
  354. Text do
  355. text @description
  356. end
  357. end
  358. @questions.each.with_index do |question, index|
  359. Question do
  360. QuestionIdentifier do
  361. text question['key'] || "question_#{index}"
  362. end
  363. DisplayName do
  364. text question['name'] || "Question ##{index}"
  365. end
  366. IsRequired do
  367. text question['required'] || 'true'
  368. end
  369. QuestionContent do
  370. Text do
  371. text question['question']
  372. end
  373. end
  374. AnswerSpecification do
  375. if question['type'] == "selection"
  376. SelectionAnswer do
  377. StyleSuggestion do
  378. text 'radiobutton'
  379. end
  380. Selections do
  381. question['selections'].each do |selection|
  382. Selection do
  383. SelectionIdentifier do
  384. text selection['key']
  385. end
  386. Text do
  387. text selection['text']
  388. end
  389. end
  390. end
  391. end
  392. end
  393. else
  394. FreeTextAnswer do
  395. if question['min_length'].present? || question['max_length'].present?
  396. Constraints do
  397. lengths = {}
  398. lengths['minLength'] = question['min_length'].to_s if question['min_length'].present?
  399. lengths['maxLength'] = question['max_length'].to_s if question['max_length'].present?
  400. Length lengths
  401. end
  402. end
  403. if question['default'].present?
  404. DefaultText do
  405. text question['default']
  406. end
  407. end
  408. end
  409. end
  410. end
  411. end
  412. end
  413. end
  414. end
  415. end
  416. end