Class: BulkSubmission

Inherits:
Object
  • Object
show all
Extended by:
ActiveModel::Naming
Includes:
ActiveModel::AttributeMethods, ActiveModel::Conversion, ActiveModel::Validations, ManifestUtil, Submission::AssetSubmissionFinder, Submission::ValidationsByTemplateName
Defined in:
app/models/bulk_submission.rb

Overview

A bulk submission is created through the upload of a spreadsheet (csv) It contains the information for setting up one or more submissions, allowing for the quick request of multiple pieces of work simultaneously. Bulk Submissions are not currently persisted.

Constant Summary collapse

DEFAULT_ENCODING =

This is the default output from excel

'Windows-1252'
COMMON_FIELDS =

rubocop:enable Metrics/AbcSize, Metrics/MethodLength, Metrics/PerceivedComplexity

[
  # Needed to construct the submission ...
  'template name',
  'study id',
  'study name',
  'project id',
  'project name',
  'submission name',
  'user login',
  # Needed to identify the assets and what happens to them ...
  'asset group id',
  'asset group name',
  'fragment size from',
  'fragment size to',
  'pcr cycles',
  'primer panel',
  'read length',
  'library type',
  'bait library',
  'bait library name',
  'comments',
  'number of lanes',
  'pre-capture plex level',
  'pre-capture group',
  'gigabases expected',
  'priority',
  'flowcell type',
  'scrna core number of samples per pool',
  'scrna core cells per chip well'
].freeze
ALIAS_FIELDS =
{ 'plate barcode' => 'barcode', 'tube barcode' => 'barcode' }.freeze

Constants included from Submission::ValidationsByTemplateName

Submission::ValidationsByTemplateName::HEADER_CELLS_PER_CHIP_WELL, Submission::ValidationsByTemplateName::HEADER_NUM_SAMPLES, Submission::ValidationsByTemplateName::HEADER_PROJECT_NAME, Submission::ValidationsByTemplateName::HEADER_STUDY_NAME, Submission::ValidationsByTemplateName::HEADER_TEMPLATE_NAME, Submission::ValidationsByTemplateName::SCRNA_CORE_CDNA_PREP_GEM_X_5P

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from ManifestUtil

#filter_end_of_header, #is_end_of_header?

Methods included from Submission::ValidationsByTemplateName

#apply_additional_validations_by_template_name, #validate_consistent_column_value

Methods included from Submission::AssetSubmissionFinder

#find_all_assets_by_name_including_samples!, #find_tubes_including_samples_for!, #find_wells_including_samples_for!, #is_plate?, #is_tube?

Constructor Details

#initialize(attrs = {}) ⇒ BulkSubmission

Returns a new instance of BulkSubmission.



52
53
54
55
# File 'app/models/bulk_submission.rb', line 52

def initialize(attrs = {})
  self.spreadsheet = attrs[:spreadsheet]
  self.encoding = attrs.fetch(:encoding, DEFAULT_ENCODING)
end

Instance Attribute Details

#encodingObject

Returns the value of attribute encoding.



37
38
39
# File 'app/models/bulk_submission.rb', line 37

def encoding
  @encoding
end

#spreadsheetObject

Returns the value of attribute spreadsheet.



37
38
39
# File 'app/models/bulk_submission.rb', line 37

def spreadsheet
  @spreadsheet
end

Instance Method Details

#add_study_to_assets(assets, study) ⇒ Object



311
312
313
# File 'app/models/bulk_submission.rb', line 311

def add_study_to_assets(assets, study)
  assets.map(&:samples).flatten.uniq.each { |sample| sample.studies << study unless sample.studies.include?(study) }
end

#assign_value_if_source_present(source_obj, source_key, target_obj, target_key) ⇒ Object



315
316
317
# File 'app/models/bulk_submission.rb', line 315

def assign_value_if_source_present(source_obj, source_key, target_obj, target_key)
  target_obj[target_key] = source_obj[source_key] if source_obj[source_key].present?
end

#completed_submissionsObject

This is used to present a list of successes



452
453
454
# File 'app/models/bulk_submission.rb', line 452

def completed_submissions
  [@submission_ids, @completed_submissions]
end

#extract_request_options(details) ⇒ Object



319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
# File 'app/models/bulk_submission.rb', line 319

def extract_request_options(details)
  { read_length: details['read length'], multiplier: {} }.tap do |request_options|
    [
      ['library type', 'library_type'],
      ['fragment size from', 'fragment_size_required_from'],
      ['fragment size to', 'fragment_size_required_to'],
      ['pcr cycles', 'pcr_cycles'],
      ['bait library name', :bait_library_name],
      ['bait library', :bait_library_name],
      ['pre-capture plex level', 'pre_capture_plex_level'],
      ['gigabases expected', 'gigabases_expected'],
      ['primer panel', 'primer_panel_name'],
      ['flowcell type', 'requested_flowcell_type'],
      ['scrna core number of samples per pool', 'number_of_samples_per_pool'],
      ['scrna core cells per chip well', 'cells_per_chip_well']
    ].each do |source_key, target_key|
      assign_value_if_source_present(details, source_key, request_options, target_key)
    end
  end
end

#find_template(template_name) ⇒ Object

Returns the SubmissionTemplate and checks that it is valid

Raises:

  • (StandardError)


443
444
445
446
447
448
449
# File 'app/models/bulk_submission.rb', line 443

def find_template(template_name)
  template = SubmissionTemplate.find_by(name: template_name) or
    raise StandardError, "Cannot find template #{template_name}"
  raise(StandardError, "Template: '#{template_name}' is deprecated and no longer in use.") unless template.visible

  template
end

#idObject



48
49
50
# File 'app/models/bulk_submission.rb', line 48

def id
  nil
end

#persisted?Boolean

Returns:

  • (Boolean)


44
45
46
# File 'app/models/bulk_submission.rb', line 44

def persisted?
  false
end

#prepare_order(details) ⇒ Object

Returns an order for the given details rubocop:todo Metrics/PerceivedComplexity, Metrics/MethodLength, Metrics/AbcSize



342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
# File 'app/models/bulk_submission.rb', line 342

def prepare_order(details) # rubocop:todo Metrics/CyclomaticComplexity
  # Retrieve common attributes
  study = Study.find_by_id_or_name!(details['study id'], details['study name'])
  project = Project.find_by_id_or_name!(details['project id'], details['project name'])
  user = User.find_by(login: details['user login']) or
    raise StandardError, "Cannot find user #{details['user login'].inspect}"

  # Extract the request options from the row details
  request_options = extract_request_options(details)

  # Check the library type matches a value from the table
  if request_options['library_type'].present?
    # find is case insensitive but we want the correct case sensitive name for requests or we get issues downstream in
    # NPG
    lt = LibraryType.find_by(name: request_options['library_type'])&.name or
      raise StandardError, "Cannot find library type #{request_options['library_type'].inspect}"
    request_options['library_type'] = lt
  end

  # Set up the order attributes
  attributes = {
    study: study,
    project: project,
    user: user,
    comments: details['comments'],
    request_options: request_options,
    pre_cap_group: details['pre-capture group']
  }

  # Deal with the asset group: either it's one we should be loading, or one we should be creating.

  attributes[:asset_group] = study.asset_groups.find_by_id_or_name(
    details['asset group id'],
    details['asset group name']
  )
  attributes[:asset_group_name] = details['asset group name'] if attributes[:asset_group].nil?

  ##
  # We go ahead and find our assets regardless of whether we have an asset group.
  # While this takes longer, it helps to detect cases where an asset group name has been
  # reused. This is a common cause of submission problems.

  # Locate either the assets by name or ID, or find the plate and it's well
  if is_plate?(details)
    found_assets = find_wells_including_samples_for!(details)
    # We've probably got a tube
  elsif is_tube?(details)
    found_assets = find_tubes_including_samples_for!(details)
  else
    asset_ids, asset_names = details.fetch('asset ids', ''), details.fetch('asset names', '')
    found_assets =
      if attributes[:asset_group] && asset_ids.blank? && asset_names.blank?
        []
      elsif asset_names.present?
        Array(find_all_assets_by_name_including_samples!(asset_names)).uniq
      elsif asset_ids.present?
        raise StandardError, 'Specifying assets by id is no longer possible. Please provide a name or barcode.'
      else
        raise StandardError, 'Please specify a barcode or name for each asset.'
      end

    assets_found, expecting =
      found_assets.map { |asset| "#{asset.name}(#{asset.id})" },
      asset_ids.size + asset_names.size
    if assets_found.size < expecting
      raise StandardError, "Too few assets found for #{details['rows']}: #{assets_found.inspect}"
    end
    if assets_found.size > expecting
      raise StandardError, "Too many assets found for #{details['rows']}: #{assets_found.inspect}"
    end
  end

  if attributes[:asset_group].nil?
    attributes[:assets] = found_assets
  elsif found_assets.present? && found_assets != attributes[:asset_group].assets
    raise StandardError,
          "Asset Group '#{attributes[:asset_group].name}' contains different assets to those you specified. " \
            'You may be reusing an asset group name'
  end

  add_study_to_assets(found_assets, study)

  # Create the order.  Ensure that the number of lanes is correctly set.
  sub_template = find_template(details['template name'])
  number_of_lanes = details.fetch('number of lanes', 1).to_i

  sub_template
    .new_order(attributes)
    .tap do |new_order|
      new_order.request_type_multiplier do |multiplexed_request_type_id|
        new_order.request_options[:multiplier][multiplexed_request_type_id] = number_of_lanes
      end
    end
rescue => e
  errors.add :spreadsheet, "There was a problem on row(s) #{details['rows']}: #{e.message}"
  nil
end

#processObject

rubocop:todo Metrics/PerceivedComplexity, Metrics/MethodLength, Metrics/AbcSize



141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
# File 'app/models/bulk_submission.rb', line 141

def process # rubocop:todo Metrics/CyclomaticComplexity
  # Store the details of the successful submissions so the user can be presented with a summary
  @submission_ids = []
  @completed_submissions = {}

  csv_content = spreadsheet.read
  @csv_rows = CSV.parse(csv_content.encode!('utf-8', encoding))

  if spreadsheet_valid?
    submission_details = submission_structure

    # Apply any additional validations based on the submission template name
    apply_additional_validations_by_template_name unless errors.count > 0

    raise ActiveRecord::RecordInvalid, self if errors.count > 0

    # Within a single transaction process each of the rows of the CSV file as a separate submission.  Any name
    # fields need to be mapped to IDs, and the 'assets' field needs to be split up and processed if present.
    # rubocop:todo Metrics/BlockLength
    ActiveRecord::Base.transaction do
      submission_details.each do |submissions|
        submissions.each do |submission_name, orders|
          user = User.find_by(login: orders.first['user login'])
          if user.nil?
            errors.add(
              :spreadsheet,
              if orders.first['user login'].nil?
                "No user specified for #{submission_name}"
              else
                "Cannot find user #{orders.first['user login'].inspect}"
              end
            )
            next
          end
          begin
            orders_processed = orders.map(&method(:prepare_order)).compact

            submission =
              Submission.create!(
                name: submission_name,
                user: user,
                orders: orders_processed,
                priority: max_priority(orders)
              )
            submission.built!

            # Collect successful submissions
            @submission_ids << submission.id
            @completed_submissions[
              submission.id
            ] = "Submission #{submission.id} built (#{submission.orders.count} orders)"
          rescue Submission::ProjectValidation::Error => e
            errors.add :spreadsheet, "There was an issue with a project: #{e.message}"
          end
        end
      end

      # If there are any errors then the transaction needs to be rolled back.
      raise ActiveRecord::Rollback if errors.present?
    end
    # rubocop:enable Metrics/BlockLength
  end
end

#process_fileObject

rubocop:todo Metrics/MethodLength



60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
# File 'app/models/bulk_submission.rb', line 60

def process_file # rubocop:todo Metrics/AbcSize
  # Slightly inelegant file-type checking
  # TODO (jr) Find a better way of verifying the CSV file?
  if spreadsheet.present?
    if spreadsheet.size == 0
      errors.add(:spreadsheet, 'The supplied file was empty')
    else
      if spreadsheet.original_filename.end_with?('.csv')
        process
      else
        errors.add(:spreadsheet, 'The supplied file was not a CSV file')
      end
    end
  end
rescue CSV::MalformedCSVError
  errors.add(:spreadsheet, 'The supplied file was not a valid CSV file (try opening it with MS Excel)')
rescue Encoding::InvalidByteSequenceError
  errors.add(:encoding, "didn't match for the provided file.")
end

#shared_options!(rows) ⇒ Object

rubocop:enable Metrics/AbcSize, Metrics/MethodLength, Metrics/PerceivedComplexity



295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
# File 'app/models/bulk_submission.rb', line 295

def shared_options!(rows) # rubocop:todo Metrics/MethodLength
  # Builds an array of the common fields. Raises an exception if the fields are inconsistent
  COMMON_FIELDS.map do |field|
    option = rows.pluck(field).uniq
    if option.count > 1
      provided_values = option.map { |o| "'#{o}'" }.to_sentence
      errors.add(
        :spreadsheet,
        "#{field} should be identical for all requests in asset group '#{rows.first['asset group name']}'. " \
          "Given values were: #{provided_values}."
      )
    end
    [field, option.first]
  end
end

#submission_structureObject

Process CSV into a structure this creates an array containing a hash for each distinct “submission name” “submission name” => array of orders where each order is a hash of headers to values (grouped by “asset group name”) rubocop:todo Metrics/PerceivedComplexity, Metrics/MethodLength, Metrics/AbcSize



257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
# File 'app/models/bulk_submission.rb', line 257

def submission_structure # rubocop:todo Metrics/CyclomaticComplexity
  Hash
    .new { |h, i| h[i] = Array.new }
    .tap do |submission|
      csv_data_rows.each_with_index do |row, index|
        next if row.all?(&:nil?)

        details =
          headers
            .each_with_index
            .filter_map { |header, pos| validate_entry(header, pos, row, index + start_row) }
            .to_h
            .merge('row' => index + start_row)
        submission[details['submission name']] << details
      end
    end
    .map do |submission_name, rows|
      order =
        rows
          .group_by { |details| details['asset group name'] }
          .map do |_group_name, rows|
            shared_options!(rows)
              .to_h
              .tap do |details|
                details['rows'] = rows.comma_separate_field_list_for_display('row')
                details['asset ids'] = rows.field_list('asset id', 'asset ids')
                details['asset names'] = rows.field_list('asset name', 'asset names')
                details['plate well'] = rows.field_list('plate well')
                details['barcode'] = rows.field_list('barcode')
              end
              .delete_if { |_, v| v.blank? }
          end
      { submission_name => order }
    end
end

#translate(header) ⇒ Object



240
241
242
# File 'app/models/bulk_submission.rb', line 240

def translate(header)
  ALIAS_FIELDS[header] || header
end

#valid_header?Boolean

Returns:

  • (Boolean)


116
117
118
119
120
121
122
123
124
# File 'app/models/bulk_submission.rb', line 116

def valid_header?
  return false if headers.nil?
  return true if headers.include? 'submission name'

  errors.add :spreadsheet,
             'You submitted an incompatible spreadsheet. Please ensure your spreadsheet contains ' \
               "the 'submission name' column"
  false
end