This is a continuation of my previous article on http://support.ongetc.com/?q=content/pdf-pages-counting-using-vb-script-0 where it used VB script to count pages in PDF files but it has some limitation. The limitation is it can't count PDF files that scan from a scanner with OCR. So I set out to look for a solution! There are many options out there:
. Free or paid utilities but they don't give me what I want whether it is too much or too little confusing!
. There are library or framework that you can download and compile into your code but yet it requires some dependency!
. I decide to convert my VB script version from the previous article into C# and it works very well!
Here is the complete source code of that utility!
Usage:
. CountPDFPages -h (get some help)
. CountPDFPages (count all PDF files on current folder)
. CountPDFPages fullPathToPDFFolder
It works really fast! It will produce a text file that contains a listing of all PDF files with the page count next to it then you can use it to import into Excel or something similar.
Enjoy!
#region CountPDFPages
//
// Authors: Chanh Ong
// Date: 4/27/2010
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS IN THE SOFTWARE.
#endregion
using System;
using System.IO;
using System.Text;
using System.Text.RegularExpressions;
namespace CountPDFPages
{
class CountPDFPages
{
static string meName = "CountPDFPages";
static string meHelp = "[" + meName.ToUpper()+"_HELP]";
static void Main(string[] args)
{
string filePath = ".\\";
Console.WriteLine("\nBy: Chanh Ong\nUse /h to get Help\n");
if (args.Length > 0)
{
if (args[0].ToLower() == "/h" || args[0] == "/?") filePath = meHelp;
else filePath = args[0];
}
if (filePath == meHelp) showHelp();
else processPDFFiles(filePath);
}
public static void processPDFFiles(string filePath)
{
int countFile = 0;
countFile = getFiles(filePath);
if (countFile == 0) Console.WriteLine("Nothing to do!");
System.Console.WriteLine("\nProcessed " + countFile + " files");
}
public static int getFiles(string sfolder)
{
string fullFilePath, sb, pages, sPath = "";
int countFile = 0;
try
{
// Make a reference to a directory.
DirectoryInfo Dinfo = new DirectoryInfo(sfolder);
// Get a reference to each file in that directory.
FileInfo[] fileInfoArray = Dinfo.GetFiles();
if (sfolder != null) sPath = Path.GetFullPath(sfolder);
if (sPath.Substring(sPath.Length - 1) != "\\") sPath = sPath + "\\";
try
{
using (StreamWriter outFile = new StreamWriter(sPath + @"pagecount.txt"))
{
foreach (FileInfo oneFileInfo in fileInfoArray)
{
if (oneFileInfo.Extension.ToLower() == ".pdf")
{
countFile++;
fullFilePath = sPath + oneFileInfo.Name;
pages = getNoOfPDFPages(fullFilePath).ToString();
sb = oneFileInfo.Name + "," + pages;
outFile.WriteLine(sb.ToString());
System.Console.WriteLine("This \"" + fullFilePath + "\" file has " + pages + " pages");
}
}
}
}
catch (System.SystemException e)
{
Console.WriteLine("\n" + e.Message + "\n");
}
}
catch (System.IO.DirectoryNotFoundException e)
{
Console.WriteLine("\n" + e.Message + "\n");
}
return countFile;
}
public static int getNoOfPDFPages(string fileName)
{
if (File.Exists(fileName) == false) return 0;
int result = 0;
FileStream fs = new FileStream(fileName, FileMode.Open, FileAccess.Read);
StreamReader r = new StreamReader(fs);
string pdfText = r.ReadToEnd();
System.Text.RegularExpressions.Regex regx = new Regex(@"/Type\s*/Page[^s]");
System.Text.RegularExpressions.MatchCollection matches = regx.Matches(pdfText);
result = matches.Count;
return result;
}
public static void showHelp()
{
System.Console.WriteLine("Usage:\n");
System.Console.WriteLine(meName + " (assume current folder)\n");
System.Console.WriteLine(" or\n");
System.Console.WriteLine(meName + " FullFolderPath (ex: \"p:\\folder\\subfolder\")\n");
return;
}
}
}

