Slow access to arrays in SharedArray

Hi

I have a requirement to use specific IDs for specific scenarios, and each iteration of each scenario must use a different ID.

This is the approach I’ve taken: Define the scenarios, then create a SharedArray of objects that have a list of IDs for each scenario. In the example below I’ve simplified reality for the sake of analysis by generating a list of integers (in reality these are loaded from a file).

I have two questions:

  1. Why is it so slow to access an array?
  2. What alternatives are there that are faster?

Thanks in advance for taking the time to help :slight_smile:

const testCaseScenarios = {
  Valid_Login: {
    exec: 'doValidLogin',
    executor: 'shared-iterations',
    iterations: 2500,
    vus: 25,
    maxDuration: '1h',
    },
  Invalid_Login: {
    exec: 'doInvalidLogin',
    executor: 'shared-iterations',
    iterations: 2500,
    vus: 25,
    maxDuration: '1h',
    },
}

export const options = {
  scenarios: testCaseScenarios,
};

let idList = new SharedArray('idList', function () {
  return [
    { scenarioName: 'Valid_Login', ids: makeArrayOfInt(1, testCaseScenarios.Valid_Login.iterations) },
    { scenarioName: 'Invalid_Login', ids: makeArrayOfInt(testCaseScenarios.Valid_Login.iterations+1, testCaseScenarios.Invalid_Login.iterations) },
  ]
})

function makeArrayOfInt(start, count) {
  const numbers = [];
  for (let i = 0; i < count; i++) {
    numbers.push(start + i)
  }
  return numbers
}

The issue may lie with the approach I’ve chosen, or how I’m accessing the idList for each test to get the id - see getMyId():

export function doValidLogin() {
  login()
}
export function doInvalidLogin() {
  login()
}

function login() {
  const id = getMyId()
  // Remainder of method removed to focus on the one function above
}

function getMyId() {
  const scenarioDefinition = idList.filter(x => x.scenarioName === scenario.name)
  if (!scenarioDefinition || scenarioDefinition.length < 1) {
    throw new Error(`No definition in idList with scenarioName '${scenario.name}'`)
  }

  const id = scenarioDefinition[0].ids[scenario.iterationInInstance]
  if (!id) {
    throw new Error(`No id in the ids of idList with scenarioName '${scenario.name}' at position ${scenario.iterationInInstance}.\nidList: ${JSON.stringify(scenarioDefinition[0].ids)}`)
  }

  return id
}

When I run this locally, I see the 5000 http requests completing, but they take ages

running (0h00m17.9s), 00/50 VUs, 5000 complete and 0 interrupted iterations
Invalid_Login ✓ [======================================] 25 VUs  0h00m17.9s/1h0m0s  2500/2500 shared iters
Valid_Login   ✓ [======================================] 25 VUs  0h00m17.9s/1h0m0s  2500/2500 shared iters

     iteration_duration...: avg=176.22ms min=9.99ms med=36ms max=2.62s p(90)=545.47ms p(95)=803.62ms
     iterations...........: 5000 279.016878/s

If I replace the getMyId() function with a simple return 1, the test is instantaneous:

running (0h00m00.0s), 00/50 VUs, 5000 complete and 0 interrupted iterations
Invalid_Login ✓ [======================================] 25 VUs  0h00m00.0s/1h0m0s  2500/2500 shared iters
Valid_Login   ✓ [======================================] 25 VUs  0h00m00.0s/1h0m0s  2500/2500 shared iters

     iteration_duration...: avg=2.19µs min=0s med=0s max=1ms p(90)=0s p(95)=0s
     iterations...........: 5000 499995.00005/s

The complete script for anyone who wants to try run this locally:

import { SharedArray } from 'k6/data';
import { scenario } from 'k6/execution';

const testCaseScenarios = {
  Valid_Login: {
    exec: 'doValidLogin',
    executor: 'shared-iterations',
    iterations: 2500,
    vus: 25,
    maxDuration: '1h',
    },
  Invalid_Login: {
    exec: 'doInvalidLogin',
    executor: 'shared-iterations',
    iterations: 2500,
    vus: 25,
    maxDuration: '1h',
    },
}

export const options = {
  scenarios: testCaseScenarios,
};

let idList = new SharedArray('idList', function () {
  return [
    { scenarioName: 'Valid_Login', ids: makeArrayOfInt(1, testCaseScenarios.Valid_Login.iterations) },
    { scenarioName: 'Invalid_Login', ids: makeArrayOfInt(testCaseScenarios.Valid_Login.iterations+1, testCaseScenarios.Invalid_Login.iterations) },
  ]
})

function makeArrayOfInt(start, count) {
  const numbers = [];
  for (let i = 0; i < count; i++) {
    numbers.push(start + i)
  }
  return numbers
}

export function doValidLogin() {
  login()
}
export function doInvalidLogin() {
  login()
}

function login() {
  const id = getMyId()
}

function getMyId() {
  // return 1
  const scenarioDefinition = idList.filter(x => x.scenarioName === scenario.name)
  if (!scenarioDefinition || scenarioDefinition.length < 1) {
    throw new Error(`No definition in idList with scenarioName '${scenario.name}'`)
  }

  const id = scenarioDefinition[0].ids[scenario.iterationInInstance]
  if (!id) {
    throw new Error(`No id in the ids of idList with scenarioName '${scenario.name}' at position ${scenario.iterationInInstance}.\nidList: ${JSON.stringify(scenarioDefinition[0].ids)}`)
  }

  return id
}

For what it’s worth, further investigation shows that it’s not the filter of the array that costs so much time.

I changed the SharedArray to hold only one object which has property names for the scenarios:

let idList = new SharedArray('idList', function () {
  return [
    {
      'Valid_Login': makeArrayOfInt(1, testCaseScenarios.Valid_Login.iterations),
      'Invalid_Login': makeArrayOfInt(testCaseScenarios.Valid_Login.iterations+1, testCaseScenarios.Invalid_Login.iterations)
    },
  ]
})

The getMyId function changes to this:

function getMyId() {
  //return 1
  const idObject = idList[0] // TODO: Error handling
  const ids = idObject[scenario.name]
  if (!ids) {
    throw new Error(`No property with name '${scenario.name}' in idObject: ${JSON.stringify(idObject)}`)
  }

  const id = ids[scenario.iterationInInstance]
  if (!id) {
    throw new Error(`No id in the ids of idList with scenarioName '${scenario.name}' at position ${scenario.iterationInInstance}.\nidList: ${JSON.stringify(ids)}`)
  }

  return id
}

But the performance is not much better :frowning:

running (0h00m17.3s), 00/50 VUs, 5000 complete and 0 interrupted iterations
Invalid_Login ✓ [======================================] 25 VUs  0h00m17.3s/1h0m0s  2500/2500 shared iters
Valid_Login   ✓ [======================================] 25 VUs  0h00m17.3s/1h0m0s  2500/2500 shared iters

     iteration_duration...: avg=170.76ms min=9.99ms med=33.1ms max=3.45s p(90)=517.57ms p(95)=765.88ms
     iterations...........: 5000 289.205649/s

In desperation I tried a few things, eventually removing the SharedArray altogether because the array is not so big that having 25 copes in memory is an issue.

// Init code
let idList = [
  {
    'Valid_Login': makeArrayOfInt(1, testCaseScenarios.Valid_Login.iterations),
    'Invalid_Login': makeArrayOfInt(testCaseScenarios.Valid_Login.iterations+1, testCaseScenarios.Invalid_Login.iterations)
  },
]

The function:

function getMyId() {
  const idObject = idList[0] // TODO: Error handling
  const ids = idObject[scenario.name]
  if (!ids) {
    throw new Error(`No property with name '${scenario.name}' in idObject: ${JSON.stringify(idObject)}`)
  }

  const id = ids[scenario.iterationInInstance]
  if (!id) {
    throw new Error(`No id in the ids of idList with scenarioName '${scenario.name}' at position ${scenario.iterationInInstance}.\nidList: ${JSON.stringify(ids)}`)
  }

  return id
}

I could also simplify it now to be an object with arrays per scenario name rather than a single array, but for the purpose of identifying the cause of the slowdown, I’ve left it as close to the SharedArray as possible. Test run results:

running (0h00m00.1s), 00/50 VUs, 5000 complete and 0 interrupted iterations
Invalid_Login ✓ [======================================] 25 VUs  0h00m00.1s/1h0m0s  2500/2500 shared iters
Valid_Login   ✓ [======================================] 25 VUs  0h00m00.1s/1h0m0s  2500/2500 shared iters

     iteration_duration...: avg=107.28µs min=0s med=0s max=32.51ms p(90)=0s p(95)=702.3µs
     iterations...........: 5000 76381.82357/s

Hi @bhofmann
From the use case you have you are putting an object in a (shared) array. This is definitely not how it will suppose to happen. Given that you clearly have two different arrays of ids with two distinct names you can just make 2 SharedArrays.

import { SharedArray } from 'k6/data';
import { scenario } from 'k6/execution';

const testCaseScenarios = {
  Valid_Login: {
    exec: 'doValidLogin',
    executor: 'shared-iterations',
    iterations: 2500,
    vus: 25,
    maxDuration: '1h',
  },
  Invalid_Login: {
    exec: 'doInvalidLogin',
    executor: 'shared-iterations',
    iterations: 2500,
    vus: 25,
    maxDuration: '1h',
  },
}

export const options = {
  scenarios: testCaseScenarios,
};

let idList = {
  "Valid_Login": makeArrayOfInt("valid_login", 1, testCaseScenarios.Valid_Login.iterations),
  "Invalid_Login": makeArrayOfInt("invalid", testCaseScenarios.Valid_Login.iterations + 1, testCaseScenarios.Invalid_Login.iterations),
}

function makeArrayOfInt(name, start, count) {
  return new SharedArray(name, () => {
    const numbers = [];
    for (let i = 0; i < count; i++) {
      numbers.push(start + i)
    }
    return numbers
  })
}

export function doValidLogin() {
  login()
}
export function doInvalidLogin() {
  login()
}

function login() {
  const id = getMyId()
}

function getMyId() {
  // return 1
  const scenarioDefinition = idList[scenario.name]
  if (!scenarioDefinition || scenarioDefinition.length < 1) {
    throw new Error(`No definition in idList with scenarioName '${scenario.name}'`)
  }

  const id = scenarioDefinition[scenario.iterationInInstance]
  if (!id) {
    throw new Error(`No id in the ids of idList with scenarioName '${scenario.name}' at position ${scenario.iterationInInstance}.\nidList: ${JSON.stringify(scenarioDefinition[0])}`)
  }

  return id
}

(Note that I wrote this in 2 minutes so I could’ve missed something in the script)

As an explanation on why the other thing is slow - the implementation basically marshals the contents to a (shared) json an each time you want an element will unmarshal them from it and give it to you. But you are having 2 big elements while it was designed for a lot of small elements. So you are basically in your case unmarshalling all the 2500 ids to get one … the exact thing you wanted to avoid I guess.

The script above makes it more aligned to what it was designed for.

The SharedArray is still slower as it still needs to do more work, and for your particular case with 50 VUs it’s probably not needed either way but the difference is a lot closer and I would argue will be negligable once you are doing any kind of actual work (like doing http requests):

 iterations...........: 5000 39158.384812/s

vs

 iterations...........: 5000 70293.976439/s

Hope this helps you and I welcome any suggestion to better explain all of this in the docs

1 Like

BOOM! @mstoykov hit the nail on the head. I’ll include the changes and results below for future reference in case anyone else encounters the same issue.

// Init code
const idList = {}
idList['Valid_Login'] = new SharedArray('Valid_Login', function () {
  return makeArrayOfInt(1, testCaseScenarios.Valid_Login.iterations)
})
idList['Invalid_Login'] = new SharedArray('Invalid_Login', function () {
  return makeArrayOfInt(testCaseScenarios.Valid_Login.iterations+1, testCaseScenarios.Invalid_Login.iterations)
})
/// Function code
function getMyId() {
  return idList[scenario.name][scenario.iterationInInstance]
}

Results:

running (0h00m03.3s), 00/40 VUs, 294000 complete and 0 interrupted iterations
Invalid_Login ✓ [======================================] 20 VUs  0h00m03.3s/1h0m0s  147000/147000 shared iters
Valid_Login   ✓ [======================================] 20 VUs  0h00m03.3s/1h0m0s  147000/147000 shared iters

     iteration_duration...: avg=259.64µs min=0s med=0s max=189.84ms p(90)=0s p(95)=999.2µs
     iterations...........: 294000 89400.484003/s
1 Like