Solving 'Rails bug': 3.0.20: UTF8-encoded JSON request params result in Encoding::CompatibilityError

So I ran acrross a bug: 3.0.20: UTF8-encoded JSON request params result in Encoding::CompatibilityError (https://github.com/rails/rails/issues/9136( in Rails 3.0.20. Which is no longer supported. And ended up writing the Rack middlewear listed below as a 'fix'.

My code is heavily inspired by this post here by Pivotal Labs: Sanitizing POST params with custom Rack middleware (http://pivotallabs.com/sanitizing-post-params-with-custom-rack-middleware/)

Rack Middlewear for force_encoding request bodies to UTF-8

JSONScrubber

# config/initializers/json_scrubber.rb
require 'uri'

class JSONScrubber  
  def initialize(app, options)
    @app = app
    @routes = options[:routes]
  end

  def call(env)
    scrub(env)
    @app.call(env)
  end

  private
  def scrub(env)
    return unless @routes.include?(env['PATH_INFO'])
    rack_input = env['rack.input'].read.force_encoding(Encoding::UTF_8)
    env['rack.input'] = StringIO.new(rack_input)
  ensure
    env['rack.input'].rewind
  end
end

Rails.application.config.middleware.insert_before(0, JSONScrubber, routes: ['/api/some_action_here.json'])  

JSONScrubberSpec

#json_scrubber_spec.rb
# encoding: UTF-8
require 'spec_helper'

describe JSONScrubber do  
  let(:app) { ->(env) { [200, env, "app"] } }

  let :middleware do
    JSONScrubber.new(app, routes: ['/api/some_action_here.json'])
  end

  it 'converts non UTF-8 (ASCII-8BIT) encoded response bodies (with special characters) to UTF-8 encoded response bodies' do
    env = env_for('http://app.abcapp.com.au/api/some_action_here.json')
    request_array = set_input_and_call env, 'ASCII-8BIT'

    expect(request_array[0]).to eq(200)
    expect(request_array[1]['rack.input'].read.encoding.to_s).to eq('UTF-8')
  end

  it 'process UTF-8 encoded response bodies (with special characters) without issue' do
    env = env_for('http://app.abcapp.com.au/api/some_action_here.json')
    request_array = set_input_and_call env, 'UTF-8'

    expect(request_array[0]).to eq(200)
    expect(request_array[1]['rack.input'].read.encoding.to_s).to eq('UTF-8')
  end

  it "will only modify requests who's paths match the given path options" do
    env = env_for('http://app.abcapp.com.au/api/dont_touch_me.json')
    request_array = set_input_and_call env, 'ASCII-8BIT'

    expect(request_array[0]).to eq(200)
    expect(request_array[1]['rack.input'].read.encoding.to_s).to eq('ASCII-8BIT')
  end

  def env_for url, opts={}
    Rack::MockRequest.env_for(url, opts)
  end

  def set_input_and_call env, encoding
    rack_input = %q("{"data":{"name":"Name ❤ ☀ ☆ ☂ ☻ ♞ ☯ ☭ ☢ € → ☎ ❄ ♫ ✂ ▷ ✇ ♎ ⇧ ☮ ♻ ⌘ ⌛ ☘","email":"email@gmail.com","org":"Sch☂ ☻ool","day":"Teacher"}}")
    env['rack.input'] = StringIO.new(rack_input.force_encoding(encoding))
    env['rack.input'].rewind
    middleware.call(env)
  end
end  

This should force_encode all request bodies to UTF-8. So if they happen to have any special unicode characters it won't break your code.

Happy coding!

References