I'm trying to compress PDFs using iTextSharp. There are a lot of pages with color images stored as JPEGs (DCTDECODE)...so I'm converting them to black and white PNGs and replacing them in the document (the PNG is much smaller than a JPG for black and white format)
I have the following methods:
private static bool TryCompressPdfImages(PdfReader reader)
{
try
{
int n = reader.XrefSize;
for (int i = 0; i < n; i++)
{
PdfObject obj = reader.GetPdfObject(i);
if (obj == null || !obj.IsStream())
{
continue;
}
var dict = (PdfDictionary)PdfReader.GetPdfObject(obj);
var subType = (PdfName)PdfReader.GetPdfObject(dict.Get(PdfName.SUBTYPE));
if (!PdfName.IMAGE.Equals(subType))
{
continue;
}
var stream = (PRStream)obj;
try
{
var image = new PdfImageObject(stream);
Image img = image.GetDrawingImage();
if (img == null) continue;
using (img)
{
int width = img.Width;
int height = img.Height;
using (var msImg = new MemoryStream())
using (var bw = img.ToBlackAndWhite())
{
bw.Save(msImg, ImageFormat.Png);
msImg.Position = 0;
stream.SetData(msImg.ToArray(), false, PdfStream.NO_COMPRESSION);
stream.Put(PdfName.TYPE, PdfName.XOBJECT);
stream.Put(PdfName.SUBTYPE, PdfName.IMAGE);
stream.Put(PdfName.FILTER, PdfName.FLATEDECODE);
stream.Put(PdfName.WIDTH, new PdfNumber(width));
stream.Put(PdfName.HEIGHT, new PdfNumber(height));
stream.Put(PdfName.BITSPERCOMPONENT, new PdfNumber(8));
stream.Put(PdfName.COLORSPACE, PdfName.DEVICERGB);
stream.Put(PdfName.LENGTH, new PdfNumber(msImg.Length));
}
}
}
catch (Exception ex)
{
Trace.TraceError(ex.ToString());
}
finally
{
// may or may not help
reader.RemoveUnusedObjects();
}
}
return true;
}
catch (Exception ex)
{
Trace.TraceError(ex.ToString());
return false;
}
}
public static Image ToBlackAndWhite(this Image image)
{
image = new Bitmap(image);
using (Graphics gr = Graphics.FromImage(image))
{
var grayMatrix = new[]
{
new[] {0.299f, 0.299f, 0.299f, 0, 0},
new[] {0.587f, 0.587f, 0.587f, 0, 0},
new[] {0.114f, 0.114f, 0.114f, 0, 0},
new [] {0f, 0, 0, 1, 0},
new [] {0f, 0, 0, 0, 1}
};
var ia = new ImageAttributes();
ia.SetColorMatrix(new ColorMatrix(grayMatrix));
ia.SetThreshold((float)0.8); // Change this threshold as needed
var rc = new Rectangle(0, 0, image.Width, image.Height);
gr.DrawImage(image, rc, 0, 0, image.Width, image.Height, GraphicsUnit.Pixel, ia);
}
return image;
}
I've tried varieties of COLORSPACEs and BITSPERCOMPONENTs, but always get "Insufficient data for an image", "Out of memory", or "An error exists on this page" upon trying to open the resulting PDF...so I must be doing it wrong. I'm pretty sure FLATEDECODE is the right thing to use.
Any assistance would be much appreciated.
See Question&Answers more detail:os