Differences
This shows you the differences between two versions of the page.
— |
random:net_list_speed [2011/07/07 05:13] (current) grant created |
||
---|---|---|---|
Line 1: | Line 1: | ||
+ | ====== .NET List Speed ====== | ||
+ | |||
+ | <code> | ||
+ | // | ||
+ | // Written by Grant Jenks | ||
+ | // http://www.grantjenks.com/ | ||
+ | // | ||
+ | // DISCLAIMER | ||
+ | // THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE | ||
+ | // LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR | ||
+ | // OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, | ||
+ | // EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | ||
+ | // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE | ||
+ | // ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. | ||
+ | // SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY | ||
+ | // SERVICING, REPAIR OR CORRECTION. | ||
+ | // | ||
+ | // Copyright | ||
+ | // This work is licensed under the | ||
+ | // Creative Commons Attribution-NonCommercial-NoDerivs 3.0 Unported License. | ||
+ | // To view a copy of this license, visit | ||
+ | // http://creativecommons.org/licenses/by-nc-nd/3.0/ | ||
+ | // or send a letter to Creative Commons, 171 Second Street, Suite 300, | ||
+ | // San Francisco, California, 94105, US.A | ||
+ | // | ||
+ | // Description | ||
+ | // Recently I wondered about the performance of List<T> vs. LinkedList<T>. | ||
+ | // If all I need is insertion and traversal (no O(1) random access as List<T> | ||
+ | // provides, nor O(1) item removal as LinkedList<T> provides), then what's | ||
+ | // the performance comparison of using a LinkedList<T> vs. a List<T>. | ||
+ | // | ||
+ | // Data | ||
+ | // Object Count Append (ms) Accumulate (ms) | ||
+ | // List 1 0 0 | ||
+ | // List 10 0 0 | ||
+ | // List 100 0 0 | ||
+ | // List 1000 0 0 | ||
+ | // List 10000 0 0 | ||
+ | // List 100000 3 1 | ||
+ | // List 1000000 17 4 | ||
+ | // List 10000000 94 46 | ||
+ | // List 100000000 919 468 | ||
+ | // LinkedList 1 0 1 | ||
+ | // LinkedList 10 0 0 | ||
+ | // LinkedList 100 0 0 | ||
+ | // LinkedList 1000 0 0 | ||
+ | // LinkedList 10000 0 0 | ||
+ | // LinkedList 100000 58 1 | ||
+ | // LinkedList 1000000 138 12 | ||
+ | // LinkedList 10000000 1815 100 | ||
+ | // LinkedList 100000000 1848201 392487 | ||
+ | // | ||
+ | // Discussion of Data | ||
+ | // Based on the above, a few points are clear: | ||
+ | // 1. If you've fewer than 10,000 elements, don't worry about the choice. | ||
+ | // 2. For larger lists, the List<T> data type is faster by a factor of 2 to 20. | ||
+ | // 3. For really large lists, the List<T> data type is faster by 3-4 magnitudes. | ||
+ | // Note that the times for LinkedList above are not inaccurate at 100,000,000 | ||
+ | // elements. With that many elements, Append took 30.8 minutes and Accumulate | ||
+ | // took 6.5 minutes. The parallel List<T> operation took no more than 1 second. | ||
+ | // | ||
+ | // Analysis | ||
+ | // Based solely on the performance characteristics described on MSDN, that is: | ||
+ | // 1. Random removal/insertion from a List<T> object is an O(n) operation | ||
+ | // 2. Random removal/insertion from a LinkedList<T> object is an O(1) operation | ||
+ | // 3. Random access into a List<T> object is an O(1) operation | ||
+ | // 4. Random access into a LinkedList<T> object is an O(n) operation | ||
+ | // I'm inferring that List<T> objects would be better termed ArrayList<T> | ||
+ | // because they are array-backed. In contrast, the LinkedList<T> object uses | ||
+ | // a LinkedListNode<T> to chain together items in a list. | ||
+ | // | ||
+ | // To be honest, I thought the LinkedList<T> datatype would be much faster. I | ||
+ | // wasn't sure whether it would be faster than the List<T> datatype (hence this | ||
+ | // program). It seems clear that object creation in .NET is expensive. On every | ||
+ | // insert, O(n), to LinkedList<T>, a new LinkedListNode<T> is created to | ||
+ | // contain the data. Instead, the List<T> object does an allocation only | ||
+ | // O(log(n)) times (assuming they double the size of the backing array when the | ||
+ | // capacity is exhausted). I believe this accounts for the majority of the | ||
+ | // extra time spent appending to the LinkedList<T> datatype. This object | ||
+ | // creation causes LinkedList<T> to be 2-20 times slower. | ||
+ | // | ||
+ | // With 100,000,000 items, a scary thing happens. From 100,000 to 10,000,000 | ||
+ | // items, the LinkedList<T> datatype scales linearly. This is to be expected. | ||
+ | // So what happens to violate this linear scaling at 100,000,000? One word: | ||
+ | // paging. It's not trivial to determine the size of a LinkedListNode<T> but | ||
+ | // we could guess at 8 methods, 4 fields, padding and GC tracking that the | ||
+ | // size may be on the order of 12 * 8 = ~100 bytes. At 100,000,000 items, | ||
+ | // that'll require 10,000,000,000 (ten gigabytes) of memory. My laptop has only | ||
+ | // four gigabytes of memory and so needs to page out about six gigabytes of | ||
+ | // data. In contrast, the List<T> requires only ~4 bytes/item so at | ||
+ | // 100,000,000 items, it consumes 400 megabytes and will fit entirely in | ||
+ | // memory. The takeaway, and to be expected: the disk is about 1,000 times | ||
+ | // slower than memory. | ||
+ | // | ||
+ | |||
+ | using System; | ||
+ | using System.Diagnostics; | ||
+ | using System.Collections.Generic; | ||
+ | |||
+ | class CSharpListSpeed | ||
+ | { | ||
+ | |||
+ | // | ||
+ | // Time and report speed of 'List' | ||
+ | // | ||
+ | |||
+ | static void TimeList(int iterations) | ||
+ | { | ||
+ | int accum = 0; | ||
+ | Stopwatch stopwatchAppend = new Stopwatch(); | ||
+ | Stopwatch stopwatchAccum = new Stopwatch(); | ||
+ | List<int> list = new List<int>(); | ||
+ | |||
+ | stopwatchAppend.Start(); | ||
+ | |||
+ | for (int i = 0; i < iterations; i++) | ||
+ | { | ||
+ | list.Add(i); | ||
+ | } | ||
+ | |||
+ | stopwatchAppend.Stop(); | ||
+ | |||
+ | stopwatchAccum.Start(); | ||
+ | |||
+ | foreach (int value in list) | ||
+ | { | ||
+ | accum += value; | ||
+ | } | ||
+ | |||
+ | stopwatchAccum.Stop(); | ||
+ | |||
+ | Console.WriteLine("List\t{0}\t{1}\t{2}", iterations, | ||
+ | stopwatchAppend.ElapsedMilliseconds, stopwatchAccum.ElapsedMilliseconds); | ||
+ | } | ||
+ | |||
+ | // | ||
+ | // Time and report speed of 'LinkedList' | ||
+ | // | ||
+ | |||
+ | static void TimeLinkedList(int iterations) | ||
+ | { | ||
+ | int accum = 0; | ||
+ | Stopwatch stopwatchAppend = new Stopwatch(); | ||
+ | Stopwatch stopwatchAccum = new Stopwatch(); | ||
+ | LinkedList<int> list = new LinkedList<int>(); | ||
+ | |||
+ | stopwatchAppend.Start(); | ||
+ | |||
+ | for (int i = 0; i < iterations; i++) | ||
+ | { | ||
+ | list.AddLast(i); | ||
+ | } | ||
+ | |||
+ | stopwatchAppend.Stop(); | ||
+ | |||
+ | stopwatchAccum.Start(); | ||
+ | |||
+ | foreach (int value in list) | ||
+ | { | ||
+ | accum += value; | ||
+ | } | ||
+ | |||
+ | stopwatchAccum.Stop(); | ||
+ | |||
+ | Console.WriteLine("LinkedList\t{0}\t{1}\t{2}", iterations, | ||
+ | stopwatchAppend.ElapsedMilliseconds, stopwatchAccum.ElapsedMilliseconds); | ||
+ | } | ||
+ | |||
+ | // | ||
+ | // Time and report speed of 'List' and 'LinkedList'. | ||
+ | // | ||
+ | |||
+ | static int Main(String[] args) | ||
+ | { | ||
+ | // Pause at the beginning so that tools can be attached. | ||
+ | |||
+ | Console.WriteLine("Press enter when ready..."); | ||
+ | Console.ReadLine(); | ||
+ | |||
+ | Console.WriteLine("Object\tCount\tAppend (ms)\tAccumulate (ms)"); | ||
+ | |||
+ | for (int i = 1; i <= 100000000; i *= 10) | ||
+ | { | ||
+ | TimeList(i); | ||
+ | } | ||
+ | |||
+ | for (int i = 1; i <= 100000000; i *= 10) | ||
+ | { | ||
+ | TimeLinkedList(i); | ||
+ | } | ||
+ | |||
+ | return 0; | ||
+ | } | ||
+ | } | ||
+ | </code> | ||