0

I want to implement PDF automation through Python in which when my below code gets executed its taking any random images from the whole PDF its not following sequence of the images page wise in the given PDF.

Please find my attached PDF images executed from below code.enter image description here

from PIL import Image
import fitz
import os


def start():
    doc = fitz.open("cs2103g0052_019_549291_ca_cs_sb_sb_fy22q2wk7_oa_showcase-premium-fr_XXXxXXX_jsos.pdf")
    try:
        os.mkdir("Extract Images")
    except:
        pass
    DIR = "Extract Images"
    for i in range(len(doc)):
        for img in doc.getPageImageList(i):
            xref = img[0]
            pix = fitz.Pixmap(doc, xref)
            if pix.n < 5:  # this is GRAY or RGB
                # file_path = os.path.join(DIR, "screenshot%d.png" % (count + 1))
                pix.writePNG(os.path.join(DIR, "p%s-%s.png" % (i, xref)))
            else:  # CMYK: convert to RGB first
                pix1 = fitz.Pixmap(fitz.csRGB, pix)
                pix1.writePNG(os.path.join(DIR, "p%s-%s.png" % (i, xref)))
                pix1 = None
            pix = None

start()

Expected Output :- I want to extract images in sequence as given in PDF.
For Example – First it should pick Page 1 images then Page 2 and so on in sequence wise manner.