pdf_print_content_streams.go hosted by Oembed Proxy for GitHub
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
/*
 * List all content streams for all pages in a pdf file.
 *
 * Run as: go run pdf_print_content_streams.go input.pdf [page]
 * The page number is optional (by default all pages are processed).
 */

package main

import (
	"fmt"
	"os"

	"strconv"

	"github.com/unidoc/unipdf/v3/common/license"
	"github.com/unidoc/unipdf/v3/contentstream"
	"github.com/unidoc/unipdf/v3/model"
)

func init() {
	// Make sure to load your metered License API key prior to using the library.
	// If you need a key, you can sign up and create a free one at https://cloud.unidoc.io
	err := license.SetMeteredKey(os.Getenv(`UNIDOC_LICENSE_API_KEY`))
	if err != nil {
		panic(err)
	}
}

func main() {
	if len(os.Args) < 2 {
		fmt.Printf("Usage: go run pdf_list_content_streams.go input.pdf [page]\n")
		os.Exit(1)
	}

	inputPath := os.Args[1]
	pageNum := -1

	if len(os.Args) >= 3 {
		val, err := strconv.ParseInt(os.Args[2], 10, 64)
		if err != nil {
			fmt.Printf("Error: %v\n", err)
			os.Exit(1)
		}
		pageNum = int(val)
	}

	fmt.Println(inputPath)
	err := listContentStreams(inputPath, pageNum)
	if err != nil {
		fmt.Printf("Error: %v\n", err)
		os.Exit(1)
	}
}

func listContentStreams(inputPath string, targetPageNum int) error {
	pdfReader, f, err := model.NewPdfReaderFromFile(inputPath, nil)
	if err != nil {
		return err
	}
	defer f.Close()

	numPages, err := pdfReader.GetNumPages()
	if err != nil {
		return err
	}

	fmt.Printf("--------------------\n")
	fmt.Printf("Content streams:\n")
	fmt.Printf("--------------------\n")
	for i := 0; i < numPages; i++ {
		pageNum := i + 1
		if pageNum != targetPageNum && targetPageNum != -1 {
			continue
		}
		fmt.Printf("Page %d\n", pageNum)

		page, err := pdfReader.GetPage(pageNum)
		if err != nil {
			return err
		}

		contentStreams, err := page.GetContentStreams()
		if err != nil {
			return err
		}
		fmt.Printf("Page %d has %d content streams:\n", pageNum, len(contentStreams))

		pageContentStr := ""

		// If the value is an array, the effect shall be as if all of the streams in the array were concatenated,
		// in order, to form a single stream.
		for _, cstream := range contentStreams {
			pageContentStr += cstream
		}
		fmt.Printf("%s\n", pageContentStr)

		cstreamParser := contentstream.NewContentStreamParser(pageContentStr)
		operations, err := cstreamParser.Parse()
		if err != nil {
			return err
		}

		fmt.Printf("=== Full list\n")
		for idx, op := range *operations {
			fmt.Printf("Operation %d: %s - Params: %v\n", idx+1, op.Operand, op.Params)
		}
	}

	return nil
}