x86: Add high bitdepth cfl_ac_444 AVX2 asm
cfl_ac_444_w4_16bpc_c: 177.5
cfl_ac_444_w4_16bpc_ssse3: 16.1
cfl_ac_444_w4_16bpc_avx2: 16.0
cfl_ac_444_w8_16bpc_c: 268.3
cfl_ac_444_w8_16bpc_ssse3: 31.7
cfl_ac_444_w8_16bpc_avx2: 25.8
cfl_ac_444_w16_16bpc_c: 370.3
cfl_ac_444_w16_16bpc_ssse3: 50.8
cfl_ac_444_w16_16bpc_avx2: 40.5
cfl_ac_444_w32_16bpc_c: 841.9
cfl_ac_444_w32_16bpc_ssse3: 129.9
cfl_ac_444_w32_16bpc_avx2: 92.8