Coding Challenges - Build Your Own wc Tool
2024-10-31
This is my attempt at the coding challenge which I found at this website. Your task is to build simple program which works like the wc tool. Wc stands for word count.
From the Linux man page, wc tool's description is: Print newline, word, and byte counts for each FILE, and a total line if more than one FILE is specified. With no FILE, or when FILE is -, read standard input.
Here's what the output look's like when calling my wc tool with the sample text file provided by the challenge:
I have divided my code into three main parts:
- Parse arguments
- Process text
- Display results
The Parse Arguments function takes in the arguments provided by the user as a string array. It's output is Arguments class which contains:
- Count options
- Filenames
- Whether to print help
- Whether to print version
private class Arguments
{
public CountOption CountOption { get; set; } = CountOption.None;
public List FileNames { get; set; } = default!;
public bool PrintVersion { get; set; }
public bool PrintHelp { get; set; }
}
For the count options I chose not to use booleans but enumeration as flags. I like the enumeration more because I can easily combine multiple options into one variable.
[Flags]
private enum CountOption
{
None = 0,
Byte = 1 << 0,
Line = 1 << 1,
Word = 1 << 2,
Char = 1 << 3,
MaxLineLength = 1 << 4,
Default = Byte | Line | Word
}
After the arguments are parsed, files are processed one by one. If no file is specified then the standard input is used.
Process text function returns object which contains file statistics. To easily add results from multiple files into total I have defined operator overload for addition
private class ProcessingResult
{
public ProcessingResult() { }
public ProcessingResult(int byteCount, int lineCount, int wordCount, int chartCount, int maxLineLength)
{
ByteCount = byteCount;
LineCount = lineCount;
WordCount = wordCount;
CharCount = chartCount;
MaxLineLength = maxLineLength;
}
public int ByteCount { get; private set; }
public int LineCount { get; private set; }
public int WordCount { get; private set; }
public int CharCount { get; private set; }
public int MaxLineLength { get; private set; }
public static ProcessingResult operator +(ProcessingResult a, ProcessingResult b)
{
var maxLineLength = a.MaxLineLength >= b.MaxLineLength ? a.MaxLineLength : b.MaxLineLength;
return new ProcessingResult(a.ByteCount + b.ByteCount, a.LineCount + b.LineCount, a.WordCount + b.WordCount, a.CharCount + b.CharCount, maxLineLength);
}
}
When all files are processed - or the standard input is read - the processing results are displayed. If there is more than one file the total is also displayed.
The files are processed sequentially and processing output of one file is not connected in anyway with the processing outputs of other files so one future improvement could be to process the files in parallel.
Here's the complete code
using System.Text;
namespace WordCount;
internal class Program
{
private const string version = "1.00";
static void Main(string[] args)
{
var supportedCountOptions = new Dictionary
{
{ "-c", CountOption.Byte },
{ "--bytes", CountOption.Byte },
{ "-m", CountOption.Char },
{ "--chars", CountOption.Char },
{ "-l", CountOption.Line },
{ "--lines", CountOption.Line },
{ "-L", CountOption.MaxLineLength },
{ "--max-line-length", CountOption.MaxLineLength },
{ "-w", CountOption.Word },
{ "--words", CountOption.Word }
};
var arguments = new Arguments();
try
{
arguments = ParseArguments(args);
}
catch (Exception ex)
{
Console.WriteLine(ex.Message);
return;
}
if (arguments.PrintHelp)
{
PrintHelp();
return;
}
if (arguments.PrintVersion)
{
PrintVersion();
return;
}
if (arguments.FileNames.Count == 0)
{
using var textReader = Console.In;
var encoding = Console.InputEncoding;
var processingResult = ProcessText(textReader, encoding, arguments.CountOption);
DisplayResult(arguments.CountOption, new List { processingResult }, new List());
}
else
{
var total = new ProcessingResult();
var processingResults = new List();
var processedFilesCount = 0;
for (var i = 0; i < arguments.FileNames.Count; i++)
{
var fileName = arguments.FileNames[i];
if (!File.Exists(fileName))
{
Console.Write($"wc: {fileName} no such file");
if (i != arguments.FileNames.Count - 1)
Console.WriteLine();
continue;
}
using var fileStream = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.Read, 4096, FileOptions.SequentialScan);
using var textReader = new StreamReader(fileStream);
var encoding = textReader.CurrentEncoding;
var processingResult = ProcessText(textReader, encoding, arguments.CountOption);
total += processingResult;
processingResults.Add(processingResult);
processedFilesCount++;
}
if (processedFilesCount > 1)
{
processingResults.Add(total);
arguments.FileNames.Add("total");
}
DisplayResult(arguments.CountOption, processingResults, arguments.FileNames);
}
void DisplayResult(CountOption countOption, List processingResults, List fileNames)
{
if (processingResults.Count == 0)
return;
var maxLineStrLength = (countOption & CountOption.Line) > 0 ? processingResults.Max(x => x.LineCount).ToString().Length : 0;
var maxWordStrLength = (countOption & CountOption.Word) > 0 ? processingResults.Max(x => x.WordCount).ToString().Length : 0;
var maxCharStrLength = (countOption & CountOption.Char) > 0 ? processingResults.Max(x => x.CharCount).ToString().Length : 0;
var maxByteStrLength = (countOption & CountOption.Byte) > 0 ? processingResults.Max(x => x.ByteCount).ToString().Length : 0;
var maxMaxLineStrLength = (countOption & CountOption.MaxLineLength) > 0 ? processingResults.Max(x => x.MaxLineLength).ToString().Length : 0;
var sb = new StringBuilder();
for (var i = 0; i < processingResults.Count; i++)
{
if ((countOption & CountOption.Line) > 0)
sb.Append(($" {processingResults[i].LineCount.ToString().PadLeft(maxLineStrLength)} "));
if ((countOption & CountOption.Word) > 0)
sb.Append($" {processingResults[i].WordCount.ToString().PadLeft(maxWordStrLength)} ");
if ((countOption & CountOption.Char) > 0)
sb.Append($" {processingResults[i].CharCount.ToString().PadLeft(maxCharStrLength)} ");
if ((countOption & CountOption.Byte) > 0)
sb.Append($" {processingResults[i].ByteCount.ToString().PadLeft(maxByteStrLength)} ");
if ((countOption & CountOption.MaxLineLength) > 0)
sb.Append($" {processingResults[i].MaxLineLength.ToString().PadLeft(maxMaxLineStrLength)} ");
if (fileNames.Count == 1 || i == fileNames.Count - 1)
sb.Append(fileNames[i]);
else if (i < fileNames.Count)
sb.AppendLine(fileNames[i]);
}
Console.Write(sb);
}
ProcessingResult ProcessText(TextReader textReader, Encoding encoding, CountOption countOption)
{
var byteCount = 0;
var lineCount = 0;
var wordCount = 0;
var charCount = 0;
var maxLineLength = 0;
var newLineBytes = encoding.GetByteCount(Environment.NewLine);
var newLineLength = Environment.NewLine.Length;
if (textReader is StreamReader)
byteCount += encoding.GetPreamble().Length;
var line = textReader.ReadLine();
while (line != null)
{
byteCount += encoding.GetByteCount(line);
byteCount += newLineBytes;
lineCount++;
charCount += line.Length + newLineLength;
if ((countOption & CountOption.Word) > 0)
foreach (var w in line.Split((char[]?)null, StringSplitOptions.RemoveEmptyEntries))
wordCount++;
if ((countOption & CountOption.MaxLineLength) > 0)
if (line.Length > maxLineLength)
maxLineLength = line.Length;
line = textReader.ReadLine();
}
return new ProcessingResult(byteCount, lineCount, wordCount, charCount, maxLineLength);
}
Arguments ParseArguments(string[] args)
{
var fileNames = new List();
var countOption = CountOption.None;
var printVersion = false;
var printHelp = false;
var useDefaultCountOption = true;
foreach (var arg in args)
{
if (string.IsNullOrEmpty(arg))
throw new ArgumentException($"wc: empty argument provided");
if ((arg[0] == '-') || (arg.Length > 1 && arg[1] == '-'))
{
if (supportedCountOptions.TryGetValue(arg, out CountOption value))
{
countOption |= value;
useDefaultCountOption = false;
}
else if (arg == "--version")
{
printVersion = true;
break;
}
else if (arg == "--help")
{
printHelp = true;
break;
}
else
throw new ArgumentException($"wc: invalid option {arg}{Environment.NewLine}Try 'wc --help' for more information");
}
else
fileNames.Add(arg);
}
if (useDefaultCountOption)
countOption = CountOption.Default;
return new Arguments { CountOption = countOption, FileNames = fileNames, PrintVersion = printVersion, PrintHelp = printHelp };
}
void PrintVersion()
{
Console.WriteLine($"wc {version}");
}
void PrintHelp()
{
var sb = new StringBuilder();
sb.AppendLine("Usage: wc [OPTION]... [FILE]...");
sb.AppendLine("Print new line, word, and byte counts for each FILE, and a total line if");
sb.AppendLine("more than one FILE is specified. A word is a non-zero-length sequence of");
sb.AppendLine("characters delimited by white space.");
sb.AppendLine();
sb.AppendLine("With no FILE, read standard input.");
sb.AppendLine();
sb.AppendLine("The options below may be used to select which counts are printed, always in");
sb.AppendLine("the following order: new line, word, character, byte, maximum line length.");
sb.AppendLine(" -c, --bytes print the byte counts");
sb.AppendLine(" -m, --chars print the character counts");
sb.AppendLine(" -l, --lines print the newline counts");
sb.AppendLine(" -L, --max-line-length print the maximum display width");
sb.AppendLine(" -w, --words print the word counts");
sb.AppendLine(" --help display this help and exit");
sb.AppendLine(" --version output version information and exit");
Console.Write(sb);
}
}
[Flags]
private enum CountOption
{
None = 0,
Byte = 1 << 0,
Line = 1 << 1,
Word = 1 << 2,
Char = 1 << 3,
MaxLineLength = 1 << 4,
Default = Byte | Line | Word
}
private class ProcessingResult
{
public ProcessingResult() { }
public ProcessingResult(int byteCount, int lineCount, int wordCount, int chartCount, int maxLineLength)
{
ByteCount = byteCount;
LineCount = lineCount;
WordCount = wordCount;
CharCount = chartCount;
MaxLineLength = maxLineLength;
}
public int ByteCount { get; private set; }
public int LineCount { get; private set; }
public int WordCount { get; private set; }
public int CharCount { get; private set; }
public int MaxLineLength { get; private set; }
public static ProcessingResult operator +(ProcessingResult a, ProcessingResult b)
{
var maxLineLength = a.MaxLineLength >= b.MaxLineLength ? a.MaxLineLength : b.MaxLineLength;
return new ProcessingResult(a.ByteCount + b.ByteCount, a.LineCount + b.LineCount, a.WordCount + b.WordCount, a.CharCount + b.CharCount, maxLineLength);
}
}
private class Arguments
{
public CountOption CountOption = CountOption.None;
public List FileNames = default!;
public bool PrintVersion;
public bool PrintHelp;
}
}