Using StreamReader/StreamWriter or not to use them

 

This is outdated version, see Corrections and Questions for current version

/*

* For learning purpose of functions FromBase64CharArray and ToBase64CharArray, I made following

* simple UUEncoder/UUDecoder test case program. There is two implementations of decoder/encoder.

*

* Overall differencies between implementations are as following:

*

* Implementation One is taken from Microsotf's sample.

* EncoderOne uses FileStream for input and StreamWriter for output phase.

* DecoderOne uses StreamReader for input and FileStream for output phase.

*

* Implementation Two is by mostly developed by me.

* EncoderTwo and DecoderTwo uses FileStream for both the input and the output phase.

* Besides that, the both routines use small working buffers.

*

* I have three questions conserning the implementations:

*

* 1. Which one algorithm is better and why

* 2. What is optimal input buffer size for EncoderTwo and DecoderTwo

* ( see the buffer size requirements / dependies in the code)

* 3. Is there more terse or simple implementations avaible

*

* Regards

* Peca

*/

 

using System;

using System.IO;

using System.Text;

namespace base64

{

class Program

{

public static void DecoderOne(string inputFileName, string outputFileName)

{

StreamReader inFile;

long rdlen = 0;

char[] base64CharArray;

try {

inFile = new StreamReader(inputFileName, Encoding.ASCII);

rdlen = inFile.BaseStream.Length;

base64CharArray = new char[rdlen]; // WHAT IS UPPER BOUND FOR THIS TO WORK

inFile.Read(base64CharArray, 0, (int)rdlen);

inFile.Close();

}

catch (System.Exception exp) {

// Error creating stream or reading from it.

Console.WriteLine("{0}", exp.Message);

return;

}

// Convert the Base64 UUEncoded input into binary output.

byte[] binaryData;

try {

binaryData = Convert.FromBase64CharArray( base64CharArray,

0,

base64CharArray.Length);

}

catch ( ArgumentNullException ) {

Console.WriteLine("Base 64 character array is null.");

return;

}

catch ( System.FormatException ) {

Console.WriteLine("Base 64 Char Array length is not " +

"4 or is not an even multiple of 4." );

return;

}

// Write out the decoded data.

FileStream outFile;

try {

outFile = new FileStream(outputFileName, FileMode.Create, FileAccess.Write);

outFile.Write( binaryData, 0, binaryData.Length );

outFile.Close();

Console.WriteLine( "DecoderOne: {0} bytes read and {1} bytes written", rdlen, binaryData.Length );

}

catch (System.Exception exp) {

// Error creating stream or writing to it.

Console.WriteLine("{0}", exp.Message);

}

}

public static void EncoderOne( string inputFileName, string outputFileName )

{

// Read in the binary data.

FileStream inFile;

byte[] binaryData = null;

char[] base64CharArray = null;

long rdlen = 0;

try

{

inFile = new FileStream( inputFileName, FileMode.Open, FileAccess.Read );

rdlen = inFile.Length;

binaryData = new byte[rdlen]; // WHAT IS UPPER BOUND FOR THIS TO WORK   

base64CharArray = new char[2 * rdlen];

inFile.Read( binaryData, 0, ( int ) rdlen );

inFile.Close();

}

catch ( FileNotFoundException )

{

Console.WriteLine( "Input file does not exist." );

return;

}

catch ( Exception e )

{

Console.WriteLine( "{0}", e.Message );

return;

}

 

int charCount;

try

{

charCount = Convert.ToBase64CharArray( binaryData, 0, binaryData.Length, base64CharArray, 0 );

}

catch ( ArgumentNullException )

{

Console.WriteLine( "Base 64 character array is null." );

return;

}

catch ( FormatException )

{

Console.WriteLine( "Base 64 Char Array length is not " +

"4 or is not an even multiple of 4." );

return;

}

 

StreamWriter outFile;

try

{

outFile = new StreamWriter(outputFileName, false, Encoding.ASCII);

outFile.Write( base64CharArray, 0, charCount );

outFile.Close();

Console.WriteLine( "EncoderOne: {0} bytes read and {1} bytes written", rdlen, charCount );

}

catch ( Exception e )

{

// Error creating stream or writing to it.

Console.WriteLine( "{0}", e.Message );

return;

}

}

private static void DecoderTwo( String inName, String outName)

{

//Create the file streams to handle the input and output files.

FileStream fin = null, fout = null;

try

{

fin = new FileStream( inName, FileMode.Open, FileAccess.Read );

fout = new FileStream( outName, FileMode.OpenOrCreate, FileAccess.Write );

}

 

catch ( FileNotFoundException e )

{

Console.WriteLine( e.Message );

return;

}

catch ( Exception e )

{

Console.WriteLine( e.Message );

fin.Close();

return;

}

fout.SetLength( 0 );

 

const int binmult = 9;

const int binsize = 32 * binmult;

const int binmed = 24 * binmult;

//

// Buffer size requirements:

//

byte[] buff_in = new byte[binsize];

char[] buff_med = new char[binmed];

byte[] buff_out = new byte[binmed];

long rdlen = 0;

long rtlen = 0;

long totlen = fin.Length;

int len;

try

{

while ( rdlen < totlen )

{

len = fin.Read( buff_in, 0, binsize );

//--

for ( int x = 0 ; x < len ; x++ )

{

buff_med[x] = Convert.ToChar( buff_in[x] );

}

buff_out = Convert.FromBase64CharArray( buff_med, 0, len );

fout.Write( buff_out, 0, buff_out.Length );

rdlen += len;

rtlen += buff_out.Length;

}

Console.WriteLine( "DecoderTwo: {0} bytes read and {1} bytes written", rdlen, rtlen );

}

catch ( FormatException e )

{

Console.WriteLine( e.Message );

}

catch ( IndexOutOfRangeException e )

{

Console.WriteLine( e.Message );

}

finally

{

fout.Close();

fin.Close();

}

}

private static void EncoderTwo( String inName, String outName)

{

FileStream fin = null, fout = null;

try

{

fin = new FileStream( inName, FileMode.Open, FileAccess.Read );

fout = new FileStream( outName, FileMode.OpenOrCreate, FileAccess.Write );

//

// TODO: check the free disksapce for the output file

//

}

catch ( FileNotFoundException e )

{

Console.WriteLine( e.Message );

return;

}

catch ( Exception e )

{

Console.WriteLine( e.Message );

fin.Close();

return;

}

fout.SetLength( 0 );

const int binmult = 9;

const int binsize = 24 * binmult;

const int binmed = 32 * binmult;

byte[] buff_in = new byte[binsize];

char[] buff_med = new char[binmed];

byte[] buff_out = new byte[binmed];

long rdlen = 0;

int rtlen = 0;

long totlen = fin.Length;

int len;

int charCount = 0;

int maxCharCount = 0;

try

{

//Read from the input file, then decode and write to the output file.

while ( rdlen < totlen )

{

len = fin.Read( buff_in, 0, binsize );

// Console.WriteLine( "read: {0} bytes", len );

charCount = Convert.ToBase64CharArray( buff_in, 0, len, buff_med, 0 );

if ( charCount > maxCharCount )

maxCharCount = charCount;

for ( int x = 0 ; x < charCount ; x++ )

{

buff_out[x] = Convert.ToByte( buff_med[x] );

}

fout.Write( buff_out, 0, charCount );

rdlen += len;

rtlen += charCount;

}

Console.WriteLine( "EncoderTwo: {0} bytes read and {1} bytes written", rdlen, rtlen );

}

catch ( FormatException e )

{

Console.WriteLine( e.Message );

}

finally

{

fout.Close();

fin.Close();

}

}

 

static void Main( string[] args )

{

Console.WriteLine( "UUEncode/UUDecode file demonstration" );

Console.WriteLine( "====================================" );

Console.WriteLine( "Enter source filename to do encode/decode tests:\n" );

string i_file = Console.ReadLine();

// TODO: check input file existense here

Console.WriteLine( "Encoder / Decoder 1 tests" );

EncoderOne( i_file, i_file + ".enc1" );

DecoderOne( i_file + ".enc1", i_file + ".dec1" );

Console.WriteLine( "Encoder / Decoder 2 tests" );

EncoderTwo( i_file, i_file + ".enc2" );

DecoderTwo( i_file + ".enc2", i_file + ".dec2" );

Console.WriteLine( "Test runs done. Press a key...\n" );

Console.ReadKey();

}

}

}




Answer this question

Using StreamReader/StreamWriter or not to use them

  • prujohn

    Nobody seem to be interested in or…

    Someone has made the adaptive code and is now writing a full article to Dr.Dobb’s Portal …  earning her/his “free” pay check

    Not exactly related to this, but there is an answer in this forum commonly asked question of generics usage. Look the video in Dr. Dobb's | Generics in C# | September 15, 2006



  • el-chema

    1. One is better because a boatload of programmers have debugged it for you. Two is better because it uses less memory. You decide.
    2. I doubt it makes a difference. The framework uses very small buffers in its Stream class implementations (16 bytes, IIRC). The buffering in the Windows file system driver and cache subsystem is what counts.
    3. Doubtful.


  • cdolor

    Thank for all of my thread viewers and ‘nobugz’ specially. It’s time to lock this thread and put it in the some bit-heaven, where all the good bits goes (small portion of bad bits seem to be recycled back to Microsoft and the rest bad bits goes ... well, maybe your next project reuses them ). As a last comment in general, I don’t understand C# compiler. C# compiler generates ‘callvirt’ IL code for all methods, which make a null pointer check. You don’t need it for static methods, because compiler can verify that situation and generate simpler ‘call’ IL code. This behavior is also true for the VB compiler, but C++/CLR and Fortran generate correct IL code with ‘call’ and ‘callvirt’. Is C# compiler lazy or don’t trust itself Maybe just lazy, like we all humans used to be.< xml:namespace prefix = o ns = "urn:schemas-microsoft-com:office:office" />

    Regards

    Peca



  • Fredrik Kronander

     

    Some numbers for my encoder/decoder

    I use Visual C” vcs.img (450Mb) as test file in two cases, where OS working set is identical:

    1.     using external USB hard disk

    2.     using internal hard disk, what is used by OS

    Test case 1, when source and destination is in the external USB hard disk.

    EncodeBase64/DecodeBase64 test ver 1.2 by Peca, 2006

    Note: files are written in the same path as source, but have

        .b64 extension for encoded file

        .bak extension for decoded file

    Enter source filename:

    e:\images\vcs.img

    ENCODER: RUN 0 MULT 10

    ENCODER: IN 451837952 bytes, OUT 602450604

    ENCODER: ELAPSED TIME 00:01:37.1510085

    DECODER: RUN 0 MULT 10

    DECODER: IN 602450604 bytes, OUT 451837952

    DECODER: ELAPSED TIME 00:01:46.0439940

    ENCODER: RUN 1 MULT 11

    ENCODER: IN 451837952 bytes, OUT 602450604

    ENCODER: ELAPSED TIME 00:01:30.4658895

    DECODER: RUN 1 MULT 11

    DECODER: IN 602450604 bytes, OUT 451837952

    DECODER: ELAPSED TIME 00:01:41.0218545

    ENCODER: RUN 2 MULT 12

    ENCODER: IN 451837952 bytes, OUT 602450604

    ENCODER: ELAPSED TIME 00:01:26.4651690

    DECODER: RUN 2 MULT 12

    DECODER: IN 602450604 bytes, OUT 451837952

    DECODER: ELAPSED TIME 00:01:36.6744765

    ENCODER: RUN 3 MULT 13

    ENCODER: IN 451837952 bytes, OUT 602450604

    ENCODER: ELAPSED TIME 00:01:21.2076930

    DECODER: RUN 3 MULT 13

    DECODER: IN 602450604 bytes, OUT 451837952

    DECODER: ELAPSED TIME 00:01:29.8145640

    ENCODER: RUN 4 MULT 14

    ENCODER: IN 451837952 bytes, OUT 602450604

    ENCODER: ELAPSED TIME 00:01:18.4783755

    DECODER: RUN 4 MULT 14

    DECODER: IN 602450604 bytes, OUT 451837952

    DECODER: ELAPSED TIME 00:01:24.7113750

    Test done. Press a key to exit...

     

    Test case 2, when source and destination is in the same internal hard disk as is operating system.

    EncodeBase64/DecodeBase64 test ver 1.2 by Peca, 2006

    Note: files are written in the same path as source, but have

        .b64 extension for encoded file

        .bak extension for decoded file

    Enter source filename:

    c:\burn\vcs.img

    ENCODER: RUN 0 MULT 10

    ENCODER: IN 451837952 bytes, OUT 602450604

    ENCODER: ELAPSED TIME 00:02:45.6007290

    DECODER: RUN 0 MULT 10

    DECODER: IN 602450604 bytes, OUT 451837952

    DECODER: ELAPSED TIME 00:02:48.6405735

    ENCODER: RUN 1 MULT 11

    ENCODER: IN 451837952 bytes, OUT 602450604

    ENCODER: ELAPSED TIME 00:02:56.3510175

    DECODER: RUN 1 MULT 11

    DECODER: IN 602450604 bytes, OUT 451837952

    DECODER: ELAPSED TIME 00:02:39.3433170

    ENCODER: RUN 2 MULT 12

    ENCODER: IN 451837952 bytes, OUT 602450604

    ENCODER: ELAPSED TIME 00:02:52.3024485

    DECODER: RUN 2 MULT 12

    DECODER: IN 602450604 bytes, OUT 451837952

    DECODER: ELAPSED TIME 00:02:28.9260150

    ENCODER: RUN 3 MULT 13

    ENCODER: IN 451837952 bytes, OUT 602450604

    ENCODER: ELAPSED TIME 00:02:55.6498905

    DECODER: RUN 3 MULT 13

    DECODER: IN 602450604 bytes, OUT 451837952

    DECODER: ELAPSED TIME 00:02:23.5298760

    ENCODER: RUN 4 MULT 14

    ENCODER: IN 451837952 bytes, OUT 602450604

    ENCODER: ELAPSED TIME 00:03:00.5157900

    DECODER: RUN 4 MULT 14

    DECODER: IN 602450604 bytes, OUT 451837952

    DECODER: ELAPSED TIME 00:02:24.1480005

    Test done. Press a key to exit...

     

    Conclusions

    ·         Looking elapsed times - show, that linear buffer size differences result non-linear performance gain

    ·         Using medium that is not OS’s home gives better performance

     

     



  • finialscraps

    I am thinking…

    You watched that Scott’s video Maybe all that counts on it, is that you get huge performance gain by using generics, because boxing/unboxing of value types are very time consuming operations. So, can I use generics in my program to get O(1) performance gain



  • DQM

    Yet more...

    Can someone device a adaptive code for my program to work optimal, independent of an  environment



  • yatingg

    A note for tuners

    As a good backgrounder for tuning an application, read the excellent Microsoft’s patterns & practices (2004)  document “Improving .NET Application and Performance Scalability” ISBN 0-7356-1851-8 http://download.microsoft.com/download/a/7/e/a7ea6fd9-2f56-439e-a8de-024c968f26d1/ScaleNet.pdf. If you are not interested in  tuning, the document has an bigger audience and is worth of reading in any case.



  • 92869

    As an addition to your excellent answer, there are good notes for StreamReader/StreamWriter constructors, which take the buffer size as a parameter:

     

    ·         The buffer size, in number of 16-bit characters, is set by the bufferSize parameter. If bufferSize is less than the minimum allowable size (128 characters), the minimum allowable size is used.

    ·         When reading from a Stream, it is more efficient to use a buffer that is the same size as the internal buffer of the stream.

     

    So, what is the internal buffer size of the stream

     

    Stupid thinking part II

     

    Reading from the keyboard, the default seem to be okay

     

    Reading from the pipe,  -- don't know 

     

    Reading from floppy disk, it seems natural to think sector wise, -> 512 bytes is 256 unicode chars -> use bufferSize as 256 or multiple of it. What about setting bufferSize to 4352 , so the whole track get buffered

     

    Reading from CD/DVD 2048 byte chunk corresponds 1024 bufferSize or multiple of it

     

    etc.

     



  • atypoli

    Corrections and Questions

    I introduce here my newest considerations related to this thread:

    ·         Why Microsoft’s sample don’t work and my code works

    ·         What are optimal buffer sizes

    ·         Encoding/decoding times.

    ·         Question about process priority.

    ·         New test case source code

     

    Why Microsoft’s sample don’t work and my code works

    MSFT’s sample can’t handle new byte[] constructor of size over giga byte needed for file as I use in my benchmark test ( you get arithmetic oveflow exception). File I use is Vista RC1 ISO image (vista_5600.16384.060829-2230_x86fre_client_lr1cfre_en_dvd.iso), which is exactly  2 709 782 528 bytes in size and has MD5: 22486e815a38feffd9667317dfeec55a. My code works by using working buffer of size, that I can determine.

    What are optimal buffer sizes

    Optimal buffer size is different for encoder and decoder, but it is nearly optimal if you select “multiplier” factor 12, which corresponds input buffer size for encoder 288 and for decoder 384. Deviating from that, you get about 10% overall thought output difference. (This is related to FromBase64CharArray and ToBase64CharArray methods.)

    As a side note:

    I use three buffers in encoder/decoder.< xml:namespace prefix = o ns = "urn:schemas-microsoft-com:office:office" />

     

    Using in decoder:

     

      byte[] buff_in = new byte[buff_in_size];

      char[] buff_med = new char[buff_in_size];

      byte[] buff_out = new byte[buff_out_size];

     

    Using in encoder:

     

      byte[] buff_in = new byte[buff_in_size];

      char[] buff_med = new char[buff_out_size];

      byte[] buff_out = new byte[buff_out_size]

     

    Think, why buffer size  must be or is different in buff_med case

     

    Encoding/decoding times

    Vista RC1 ISO image need about 10 minutes to encrypt and slightly more to decrypt by using factor of 12 using my AMD64 Athlon 3600 and Vista RC1. The processing speed correspond about 3.4 Mb per second. What is the upper limit

    Question about process priority

    How can I elevate priority from Normal to High or Realtime using C# and CLR You can change priority from TaskManager's process property for current process, but is stated priority property percistent

    New test case source code

    [code language=”C#”]

    /*

     * Base64 Encoder / Decoder sample benchmark program for determing optimal buffer sizes,

     * when using CLR for supported platforms.

     *

     * * Version 1.2

     * (C)Peca, 2006

     */

     

     

    using System;

    using System.IO;

    using System.Text;

    namespace TestBase64

        {

        class Program

            {

                private static void DecodeBase64( String inName, String outName,int binmult)

                {

               

                FileStream fin = null, fout = null;

                try

                    {

                    fin = new FileStream( inName, FileMode.Open, FileAccess.Read );

                    fout = new FileStream( outName, FileMode.OpenOrCreate, FileAccess.Write );

                    }

     

     

                catch ( FileNotFoundException e )

                    {

                    Console.WriteLine( e.Message );

                    return;

                    }

     

                catch ( Exception e )

                    {

                    Console.WriteLine( e.Message );

                    fin.Close();

                    return;

                    }

                            

                fout.SetLength( 0 );

     

     

                if ( binmult == 0 )

                    binmult = 9; // see encoder for statistics

     

     

                int buff_in_size = 32 * binmult; // these values are swapped for decoder

                int buff_out_size = 24 * binmult;

     

     

                byte[] buff_in = new byte[buff_in_size];

                char[] buff_med = new char[buff_in_size];

                byte[] buff_out = new byte[buff_out_size];

     

                long rdlen = 0;

                long rtlen = 0;

                long totlen = fin.Length;

                int len;

               

                try

                    {

                    while ( rdlen < totlen )

                        {

                        len = fin.Read( buff_in, 0, buff_in_size );

                        //--

                        for ( int x = 0 ; x < len ; x++ )

                            {

                            buff_med[x] = Convert.ToChar( buff_in[x] );

                            }

     

                        buff_out = Convert.FromBase64CharArray( buff_med, 0, len );

     

                        fout.Write( buff_out, 0, buff_out.Length );

                        rdlen += len;

                        rtlen += buff_out.Length;

                        Console.Write( "\r{0}", rtlen );

                        }

                    Console.WriteLine( "\rDECODER: INPUT {0}, OUTPUT {1} bytes{2}", rdlen, rtlen, Environment.NewLine );

                    }

                catch ( FormatException e )

                    {

                    Console.WriteLine( e.Message );

                    }

                catch ( IndexOutOfRangeException e )

                    {

                    Console.WriteLine( e.Message );

                    }

                finally

                    {

                    fout.Close();

                    fin.Close();

                    }

                }

     

            private static void EncodeBase64( String inName, String outName, int binmult )

                {

                FileStream fin = null, fout = null;

                try

                    {

                    fin = new FileStream( inName, FileMode.Open, FileAccess.Read );

                    fout = new FileStream( outName, FileMode.OpenOrCreate, FileAccess.Write );

                    }

                catch ( FileNotFoundException e )

                    {

                    Console.WriteLine( e.Message );

                    return;

                    }

                catch ( Exception e )

                    {

                    Console.WriteLine( e.Message );

                    fin.Close();

                    return;

                    }

                fout.SetLength( 0 );

     

               

                if ( binmult == 0 )

                    binmult = 12; // optimal

     

                int buff_in_size = 24 * binmult;

                int buff_out_size = 32 * binmult;

     

                byte[] buff_in = new byte[buff_in_size];

                char[] buff_med = new char[buff_out_size];

                byte[] buff_out = new byte[buff_out_size];

     

                long rdlen = 0;

                long rtlen = 0;

                long totlen = fin.Length;

                int len;

                int charCount = 0;

               

               

                try

                    {

                   

                    while ( rdlen < totlen )

                        {

                      

                        len = fin.Read( buff_in, 0, buff_in_size );

     

                                                            

                        charCount = Convert.ToBase64CharArray( buff_in, 0, len, buff_med, 0 );

     

                       

     

                        for ( int x = 0 ; x < charCount ; x++ )

                            {

                            buff_out[x] = Convert.ToByte( buff_med[x] );

                            }

                        fout.Write( buff_out, 0, charCount );

                        rdlen += len;

                        rtlen += charCount;

                        Console.Write( "\r{0}", rtlen );

                        }

                    Console.WriteLine( "\rENCODER: INPUT {0}, OUTPUT {1} bytes{2}", rdlen, rtlen, Environment.NewLine );

                    }

                catch ( FormatException e )

                    {

                    Console.WriteLine( e.Message );

                    }

                finally

                    {

                    fout.Close();

                    fin.Close();

                    }

                }

         

     

     

            static void Main( string[] args )

                {

     

                DateTime time1;

                DateTime time2;

                Console.WriteLine( "{0}==================================================================", Environment.NewLine );

                Console.WriteLine( " EncodeBase64/DecodeBase64 test ver 1.2 by Peca, 2006" );

                Console.WriteLine( "=================================================================={0}", Environment.NewLine );

                Console.WriteLine( "Note: files are written in the same path as source, but have{0}", Environment.NewLine );

                Console.WriteLine( "     .b64 extension for encoded file{0}", System.Environment.NewLine );

                Console.WriteLine( "     .bak extension for decoded file{0}", System.Environment.NewLine );

                Console.WriteLine( "=================================================================={0}", Environment.NewLine );

                Console.Write( "Enter source filename:{0}", System.Environment.NewLine );

     

    &n