Wayback Machinekoobas.hobune.stream
May JUN Jul
Previous capture 12 Next capture
2021 2022 2023
1 capture
12 Jun 22 - 12 Jun 22
sparklines
Close Help
  • Products
  • Solutions
  • Made with Unity
  • Learning
  • Support & Services
  • Community
  • Asset Store
  • Get Unity

UNITY ACCOUNT

You need a Unity Account to shop in the Online and Asset Stores, participate in the Unity Community and manage your license portfolio. Login Create account
  • Blog
  • Forums
  • Answers
  • Evangelists
  • User Groups
  • Beta Program
  • Advisory Panel

Navigation

  • Home
  • Products
  • Solutions
  • Made with Unity
  • Learning
  • Support & Services
  • Community
    • Blog
    • Forums
    • Answers
    • Evangelists
    • User Groups
    • Beta Program
    • Advisory Panel

Unity account

You need a Unity Account to shop in the Online and Asset Stores, participate in the Unity Community and manage your license portfolio. Login Create account

Language

  • Chinese
  • Spanish
  • Japanese
  • Korean
  • Portuguese
  • Ask a question
  • Spaces
    • Default
    • Help Room
    • META
    • Moderators
    • Topics
    • Questions
    • Users
    • Badges
  • Home /
avatar image
0
Question by tobs89 · Nov 04, 2015 at 01:50 PM · performance optimizationmultithreadingcpucalculations

Multithreading doesn’t improve performance

Hello,I’m doing a project where I need to make a big amount of calculations.In fact I need to repeat the same kind of calculation about 3 milions of time.This obviously is really painful for performance, so I try to improve performances getting this code multithreaded.The basic calculations have this aspect:

 using UnityEngine;
 using System.Collections;
 
 public class NoThread : MonoBehaviour {
     int i,j,k;
     float[,,] matrix;
     float deltaTime;
     float oldTime;
     float value;
     public GameObject cube;
     System.Random rnd;
 
     //visualization variable
     int w,h;
     GUIStyle style;
     Rect rect;
 
     void Start() {
         matrix = new float[100,100,100];
         w = Screen.width; h = Screen.height;
 
         style = new GUIStyle ();
         style.alignment = TextAnchor.UpperLeft;
         style.fontSize = h * 2 / 100;
         style.normal.textColor = new Color (0.0f, 0.0f, 0.5f, 1.0f);
 
         rect = new Rect (0, 0, w, h * 2 / 100);
         rnd = new System.Random ();
     }
 
     void Update(){
         oldTime = Time.realtimeSinceStartup;
         for(i=0;i<100;i++) {
             for(j=0;j<100;j++) {
                 for(k=0;k<100;k++) {
                     matrix [i, j, k] += rnd.Next(-1,2);
                 }
             }
         }
 
         for(k=0;k<100;k++) {
             for(j=0;j<100;j++) {
                 for(i=0;i<100;i++) {
                     matrix [i, j, k] += rnd.Next(-1,2);
                 }
             }
         }
 
         for(i=0;i<100;i++) {
             for(k=0;k<100;k++) {
                 for(j=0;j<100;j++) {
                     matrix [i, j, k] += rnd.Next(-1,2);
                 }
             }
         }
         deltaTime = Time.realtimeSinceStartup - oldTime;
         value = 0;
 
         for(i=0;i<100;i++) {
             for(k=0;k<100;k++) {
                 for(j=0;j<100;j++) {
                     value += matrix[i,j,k]/1000;
                 }
             }
         }
         Debug.Log (value);
         cube.GetComponent<Transform> ().localScale = new Vector3(value,value,value);
     }
 
     void OnGUI ()
     {
         
         float msec = deltaTime * 1000.0f;
         float fps = 1.0f / deltaTime;
         string text = string.Format ("Calculations performance : {0:0.0} ms ({1:0.} fps)", msec, fps);
         GUI.Label (rect, text, style);
     }
 }
 

The first step to improve my code was create a different thread that makes calculations, because in this way the application can run smoothly and then in the main thread apply the results to the scene. So I had to divide the calculations from the part of apply due to the limitations of Unity and its not thread-safe feature. So my code becomes like this:

 using UnityEngine;
 using System.Collections;
 using System.Threading;
 
 
 public class SingleThread : MonoBehaviour
 {
     int i, j, k;
     float[,,] matrix;
     float deltaTime;
     float oldTime;
     float value;
     public GameObject cube;
     bool endCalc = false, running =true;
     Thread t;
     System.Random rnd;
 
     //visualization variable
     int w, h;
     GUIStyle style;
     Rect rect;
 
 
     void Start ()
     {
         matrix = new float[100, 100, 100];
         w = Screen.width;
         h = Screen.height;
 
         style = new GUIStyle ();
         style.alignment = TextAnchor.UpperLeft;
         style.fontSize = h * 2 / 100;
         style.normal.textColor = new Color (0.0f, 0.0f, 0.5f, 1.0f);
 
         rect = new Rect (0, 0, w, h * 2 / 100);
         rnd = new System.Random ();
         t = new Thread (threadUpdate);
         t.Start ();
         oldTime = Time.realtimeSinceStartup;
 
     }
 
     void Update ()
     {
 
         if (endCalc) {
             deltaTime = Time.realtimeSinceStartup - oldTime;
             oldTime = Time.realtimeSinceStartup;
             value = 0;
 
             for (i = 0; i < 100; i++) {
                 for (k = 0; k < 100; k++) {
                     for (j = 0; j < 100; j++) {
                         value += matrix [i, j, k] / 1000;
                     }
                 }
             }
             cube.GetComponent<Transform> ().localScale = new Vector3 (value, value, value);
 
             endCalc = false;
         }
     }
 
     void OnGUI ()
     {
 
         float msec = deltaTime * 1000.0f;
         float fps = 1.0f / deltaTime;
         string text = string.Format ("Calculations performance : {0:0.0} ms ({1:0.} fps)", msec, fps);
         GUI.Label (rect, text, style);
     }
 
     void threadUpdate ()
     {
         while (running) {
             if (!endCalc) {
                 for(i=0;i<100;i++) {
                     for(j=0;j<100;j++) {
                         for(k=0;k<100;k++) {
                             matrix [i, j, k] += rnd.Next(-1,2);
                         }
                     }
                 }
 
                 for(k=0;k<100;k++) {
                     for(j=0;j<100;j++) {
                         for(i=0;i<100;i++) {
                             matrix [i, j, k] += rnd.Next(-1,2);
                         }
                     }
                 }
 
                 for(i=0;i<100;i++) {
                     for(k=0;k<100;k++) {
                         for(j=0;j<100;j++) {
                             matrix [i, j, k] += rnd.Next(-1,2);
                         }
                     }
                 }
                 endCalc = true;
             }
         }
     }
 
     void OnApplicationQuit(){
         running = false;
     }
 }

Then I think that could be better try to divide calculations on more thread so I create 6 thread and I divide the three for loop in 6 semi for loop.

 using UnityEngine;
 using System.Collections;
 using System.Threading;
 
 public class MultiThread : MonoBehaviour
 {
     float[,,] matrix;
     float deltaTime;
     float oldTime;
     float value;
     public GameObject cube;
     bool endCalc = false, running =true;
     Thread t;
     Thread[] threads;
     System.Random[] rnd;
     bool firstPart;
     bool[] calcResults;
 
     //visualization variable
     int w, h;
     GUIStyle style;
     Rect rect;
 
 
 
     void Start ()
     {
         matrix = new float[100, 100, 100];
         w = Screen.width;
         h = Screen.height;
 
         style = new GUIStyle ();
         style.alignment = TextAnchor.UpperLeft;
         style.fontSize = h * 2 / 100;
         style.normal.textColor = new Color (0.0f, 0.0f, 0.5f, 1.0f);
 
         rect = new Rect (0, 0, w, h * 2 / 100);
         oldTime = Time.realtimeSinceStartup;
         rnd = new System.Random [6];
         calcResults = new bool[6];
         for (int i = 0; i < 6; i++) {
             calcResults [i] = true;
             rnd [i] = new System.Random ();
         }
         t = new Thread (threadUpdate);
         t.Start ();
         threads = new Thread [6];
         threads [0] = new Thread (threadUpdate0);
         threads [1] = new Thread (threadUpdate1);
         threads [2] = new Thread (threadUpdate2);
         threads [3] = new Thread (threadUpdate3);
         threads [4] = new Thread (threadUpdate4);
         threads [5] = new Thread (threadUpdate5);
         threads[0].Start ();
         threads[1].Start ();
         threads[2].Start ();
         threads[3].Start ();
         threads[4].Start ();
         threads[5].Start ();
         firstPart = false;
         endCalc = false;
 
     }
 
     void Update ()
     {
 
         if (endCalc) {
             deltaTime = Time.realtimeSinceStartup - oldTime;
             oldTime = Time.realtimeSinceStartup;
             value = 0;
 
             for (int i = 0; i < 100; i++) {
                 for (int k = 0; k < 100; k++) {
                     for (int j = 0; j < 100; j++) {
                         value += matrix [i, j, k] / 1000;
                     }
                 }
             }
             cube.GetComponent<Transform> ().localScale = new Vector3 (value, value, value);
 
             endCalc = false;
         }
     }
 
     void OnGUI ()
     {
 
         float msec = deltaTime * 1000.0f;
         float fps = 1.0f / deltaTime;
         string text = string.Format ("Calculations performance : {0:0.0} ms ({1:0.} fps)", msec, fps);
         GUI.Label (rect, text, style);
     }
 
     void threadUpdate ()
     {
 
         while (running) {
 
             if (!endCalc) {
 
                 if (!firstPart) {
                     firstPart = true;
                     for (int i = 0; i < 6; i++)
                         calcResults [0] = false;
 
                 } else if (calcResults [0] && calcResults [1] && calcResults [2]&&
                     calcResults [3] && calcResults [4] && calcResults [5]) {
                     
                     endCalc = true;
                     firstPart = false;
                 }
             }
         }
     }
 
     void threadUpdate0 ()
     {
         while (running) {
             if (!calcResults[0]) {
                 for(int i=0;i<50;i++) {
                     for(int j=0;j<100;j++) {
                         for(int k=0;k<100;k++) {
                             matrix [i, j, k] += rnd[0].Next(-1,2);
                         }
                     }
                 }
                 calcResults[0] = true;
             }
         }
     }
     void threadUpdate1 ()
     {
         while (running) {
             if (!calcResults[1]) {
                 for(int k=0;k<50;k++) {
                     for(int j=0;j<100;j++) {
                         for(int i=0;i<100;i++) {
                             matrix [i, j, k] += rnd[1].Next(-1,2);
                         }
                     }
                 }
                 calcResults[1] = true;
 
             }
         }
     }
     void threadUpdate2 ()
     {
         while (running) {
             if (!calcResults[2]) {
                 for(int i=0;i<50;i++) {
                     for(int k=0;k<100;k++) {
                         for(int j=0;j<100;j++) {
                             matrix [i, j, k] += rnd[2].Next(-1,2);
                         }
                     }
                 }
                 calcResults[2] = true;
 
             }
         }
     }
 
     void threadUpdate3 ()
     {
         while (running) {
             if (!calcResults[3]) {
                 for(int i=50;i<100;i++) {
                     for(int j=0;j<100;j++) {
                         for(int k=0;k<100;k++) {
                             matrix [i, j, k] += rnd[3].Next(-1,2);
                         }
                     }
                 }
                 calcResults[3] = true;
 
             }
         }
     }
     void threadUpdate4 ()
     {
         while (running) {
             if (!calcResults[4]) {
                 for(int k=50;k<100;k++) {
                     for(int j=0;j<100;j++) {
                         for(int i=0;i<100;i++) {
                             matrix [i, j, k] += rnd[4].Next(-1,2);
                         }
                     }
                 }
                 calcResults[4] = true;
 
             }
         }
     }
     void threadUpdate5 ()
     {
         while (running) {
             if (!calcResults[5]) {
                 for(int i=50;i<100;i++) {
                     for(int k=0;k<100;k++) {
                         for(int j=0;j<100;j++) {
                             matrix [i, j, k] += rnd[5].Next(-1,2);
                         }
                     }
                 }
                 calcResults[5] = true;
 
             }
         }
     }
 
 
     void OnApplicationQuit(){
         running = false;
     }
 }
 

The problem is that the version multi thread doesn't improve the performance by 6, in fact the fps become only two time better than single thread version.Someone of you know why this happends?I see by profiler that I use more CPU power, but it looks like there is something that slowdown my code in multithread.I'm testing this code on PC, Mac and Ps4. You can find the complete project for Unity 5.2.2 at this link . Thanks to everyone for the attention

EDIT:

I create also a three thread version and it runs like six thread version

 using UnityEngine;
 using System.Collections;
 using System.Threading;
 
 public class ThreeThread : MonoBehaviour
 {
     float[,,] matrix;
     float deltaTime;
     float oldTime;
     float value;
     public GameObject cube;
     bool endCalc = false, running =true;
     Thread t;
     Thread[] threads;
     System.Random[] rnd;
     bool firstPart;
     bool[] calcResults;
 
     //visualization variable
     int w, h;
     GUIStyle style;
     Rect rect;
 
 
 
     void Start ()
     {
         matrix = new float[100, 100, 100];
         w = Screen.width;
         h = Screen.height;
 
         style = new GUIStyle ();
         style.alignment = TextAnchor.UpperLeft;
         style.fontSize = h * 2 / 100;
         style.normal.textColor = new Color (0.0f, 0.0f, 0.5f, 1.0f);
 
         rect = new Rect (0, 0, w, h * 2 / 100);
         oldTime = Time.realtimeSinceStartup;
         rnd = new System.Random [3];
         calcResults = new bool[3];
         for (int i = 0; i < 3; i++) {
             calcResults [i] = true;
             rnd [i] = new System.Random ();
         }
         t = new Thread (threadUpdate);
         t.Start ();
         threads = new Thread [3];
         threads [0] = new Thread (threadUpdate0);
         threads [1] = new Thread (threadUpdate1);
         threads [2] = new Thread (threadUpdate2);
         threads[0].Start ();
         threads[1].Start ();
         threads[2].Start ();
         firstPart = false;
         endCalc = false;
 
     }
 
     void Update ()
     {
 
         if (endCalc) {
             deltaTime = Time.realtimeSinceStartup - oldTime;
             oldTime = Time.realtimeSinceStartup;
             value = 0;
 
             for (int i = 0; i < 100; i++) {
                 for (int k = 0; k < 100; k++) {
                     for (int j = 0; j < 100; j++) {
                         value += matrix [i, j, k] / 1000;
                     }
                 }
             }
             cube.GetComponent<Transform> ().localScale = new Vector3 (value, value, value);
 
             endCalc = false;
         }
     }
 
     void OnGUI ()
     {
 
         float msec = deltaTime * 1000.0f;
         float fps = 1.0f / deltaTime;
         string text = string.Format ("Prestazioni calcoli : {0:0.0} ms ({1:0.} fps)", msec, fps);
         GUI.Label (rect, text, style);
     }
 
     void threadUpdate ()
     {
         Debug.Log ("chiamato principale");
 
         while (running) {
             Debug.Log ("entra ciclo principale");
 
             if (!endCalc) {
 
                 if (!firstPart) {
                     firstPart = true;
                     for (int i = 0; i < 3; i++)
                         calcResults [0] = false;
 
                 } else if (calcResults [0] && calcResults [1] && calcResults [2]) {
                     
                     endCalc = true;
                     firstPart = false;
                 }
             }
         }
     }
 
     void threadUpdate0 ()
     {
         while (running) {
             if (!calcResults[0]) {
                 Debug.Log ("chiamato");
 
                 for(int i=0;i<100;i++) {
                     for(int j=0;j<100;j++) {
                         for(int k=0;k<100;k++) {
                             matrix [i, j, k] += rnd[0].Next(-1,2);
                         }
                     }
                 }
                 calcResults[0] = true;
             }
         }
     }
     void threadUpdate1 ()
     {
         while (running) {
             if (!calcResults[1]) {
                 for(int k=0;k<100;k++) {
                     for(int j=0;j<100;j++) {
                         for(int i=0;i<100;i++) {
                             matrix [i, j, k] += rnd[1].Next(-1,2);
                         }
                     }
                 }
                 calcResults[1] = true;
 
             }
         }
     }
     void threadUpdate2 ()
     {
         while (running) {
             if (!calcResults[2]) {
                 for(int i=0;i<100;i++) {
                     for(int k=0;k<100;k++) {
                         for(int j=0;j<100;j++) {
                             matrix [i, j, k] += rnd[2].Next(-1,2);
                         }
                     }
                 }
                 calcResults[2] = true;
 
             }
         }
     }
 
     void OnApplicationQuit(){
         running = false;
     }
 }
 

Comment
Add comment · Show 11
10 |3000 characters needed characters left characters exceeded
▼
  • Viewable by all users
  • Viewable by moderators
  • Viewable by moderators and the original poster
  • Advanced visibility
Viewable by all users
avatar image Bonfire-Boy · Nov 04, 2015 at 02:29 PM 0
Share

How many cores do you have?

avatar image tobs89 Bonfire-Boy · Nov 04, 2015 at 02:32 PM 0
Share

I have an I7 4700k on my PC and another i7 on my $$anonymous$$acbook pro so 4 physical core.

avatar image Baste · Nov 04, 2015 at 02:44 PM 0
Share

Confirmed that the FPS doubles when going from 1 to 6 threads.

I checked, and going from 1 to 6 threads makes all four of my cores start working (ins$$anonymous$$d of just the 1), so you're not running into any issue where you're not getting the work you need out of your CPU. I would never expect a 6-fold improvement, unless you have 6+ cores and are working perfectly with the data.

You could try looking into the array layout. If your array is too large to be fetched from memory all at once, how you loop through it affects performance. See here for an example. It might simply be that the order you're iterating in the multi-thread version is worse than the order in the single-thread version.

avatar image tobs89 Baste · Nov 04, 2015 at 02:54 PM 0
Share

The loops order are the same in each version.

avatar image tobs89 Baste · Nov 04, 2015 at 03:10 PM 0
Share

I already thought that could be a problem related to memory, because in my project where I have much bigger class used in my array the performances improvement is equal to zero. In fact if in this test example I gain a x2 in performance with multithread, in my main project I have a gain equal to x1 in performance. $$anonymous$$y project use a my custom datatype that include 2 float and 1 vector3.

avatar image tobs89 Baste · Nov 04, 2015 at 03:43 PM 0
Share

I made a three thread version that I write on the main post and it runs like 6 thread ones. How is it possible?

avatar image Bonfire-Boy · Nov 04, 2015 at 03:47 PM 0
Share

Possibly just an aside, I don't know if it would impact on what you're looking at, but it seems odd to me to have an OnGUI function, (especially with string stuff being done in it), when looking at performance. Why not just store the time when the process starts and then look at how much time has passed when it's finished?

avatar image tobs89 Bonfire-Boy · Nov 04, 2015 at 03:56 PM 0
Share

Yes I know but that it's only an extreme needs of debugging obviously in the project should not be present. But actually I don't want to know the better fps i get, but I would like to know if doing multi thread I gain something or not, and if not why. So I put the same function in the OnGui on every version so they slowdown equally each version.

avatar image Bonfire-Boy tobs89 · Nov 04, 2015 at 04:20 PM 0
Share

I guess my point is that the load on your CPUs is made up of (load due to the calculation) plus (other stuff including the engine). You're only modifying the first part of that. If you had 2 cores, you wouldn't expect putting the calculation into two threads to double the overall speed, would you? Because some of the processor time is being used for other stuff, you don't actually have 2 whole cores available to the calculation.

So the percentage change in performance overall isn't actually a measure of how much you gain from multithreading the calculation. You'd need to subtract the (constant) load due to the other stuff in order to work that out, or $$anonymous$$imise the other stuff (by eg removing the OnGUI calls) in order to make the overall performance gain a better measure of the gain obtained from restructuring the calculation.

avatar image _dns_ · Nov 04, 2015 at 04:16 PM 1
Share

Hi, I can see 2 things that are not cache friendly and must cripple the performances:

First, when accessing a table in memory, it's best to access it in a "linear" way so cache usage is maximized = for a 1d array: going from 0 to max and not the other direction will prevent cache miss. For your case: a 3 dimensional array is better accessed like this: if loops are: for i, then for j, then for k : array[i,j,k] = value. This is even more important for the final sum/divide operation on the main thread.

Second: all the threads write to the same matrix array. This is the worst for multi-core cache usage. Here is a good read about that: https://fgiesen.wordpress.com/2014/07/07/cache-coherency/. The way cache works is that if a thread modify a cache line, all other cores must discard this same cache line. The way the matrix is wrote too: all threads will disable cache lines all the time, resulting in a lot of cache miss & refresh. What I would do is have each thread have it's own matrix memory to write to, so cache line don't overlap for each thread. Then the main thread will sum each matrix in another one. If possible, sum one matrix at a time to maximize cache usage : a+=b, then a+=c, then a+=d and not a=a+b+c which might use more cache (well, this last optimization has to be tested, both options should be checked ;-)

It should not result in a perfect "x NbOfThread" speedup, multhreading never do that :-)

avatar image tobs89 _dns_ · Nov 04, 2015 at 05:12 PM 0
Share

For the first part I'm ok with what you say and on the main thread I can do what you think, but on my project I need to move me on the matrix a long three different dimensions in the three for loop so I can't change order.

The second part I will try to that but I'm afraid to be memory limited to have 6 big matrix by one million of cell.

0 Replies

· Add your reply
  • Sort: 

Your answer

Hint: You can notify a user about this post by typing @username

Up to 2 attachments (including images) can be used with a maximum of 524.3 kB each and 1.0 MB total.

Follow this Question

Answers Answers and Comments

6 People are following this question.

avatar image avatar image avatar image avatar image avatar image avatar image

Related Questions

allocate specific cpu-cores for Unity 1 Answer

Unity Jobs and BURST low performance on Html5 1 Answer

Can i call an external method from a job? 0 Answers

Calculating InverseTransformPoint outside unity 1 Answer

GPU warm up 1 Answer


Enterprise
Social Q&A

Social
Subscribe on YouTube social-youtube Follow on LinkedIn social-linkedin Follow on Twitter social-twitter Follow on Facebook social-facebook Follow on Instagram social-instagram

Footer

  • Purchase
    • Products
    • Subscription
    • Asset Store
    • Unity Gear
    • Resellers
  • Education
    • Students
    • Educators
    • Certification
    • Learn
    • Center of Excellence
  • Download
    • Unity
    • Beta Program
  • Unity Labs
    • Labs
    • Publications
  • Resources
    • Learn platform
    • Community
    • Documentation
    • Unity QA
    • FAQ
    • Services Status
    • Connect
  • About Unity
    • About Us
    • Blog
    • Events
    • Careers
    • Contact
    • Press
    • Partners
    • Affiliates
    • Security
Copyright © 2020 Unity Technologies
  • Legal
  • Privacy Policy
  • Cookies
  • Do Not Sell My Personal Information
  • Cookies Settings
"Unity", Unity logos, and other Unity trademarks are trademarks or registered trademarks of Unity Technologies or its affiliates in the U.S. and elsewhere (more info here). Other names or brands are trademarks of their respective owners.
  • Anonymous
  • Sign in
  • Create
  • Ask a question
  • Spaces
  • Default
  • Help Room
  • META
  • Moderators
  • Explore
  • Topics
  • Questions
  • Users
  • Badges