pdf

package

v0.0.0-...-71ffc55 Latest Latest Go to latest Published: Dec 23, 2025 License: MIT Imports: 15 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/fjacquet/pdf2md

Links

Open Source Insights

Documentation ¶

Overview ¶

Package pdf provides low-level PDF parsing and manipulation functionality.

Index ¶

Variables
func DecodeStream(data []byte, filter Name, params Dictionary) ([]byte, error)
func DecodeStreamFromDict(data []byte, dict Dictionary) ([]byte, error)
type Array
- func (a Array) String() string
type Boolean
- func (b Boolean) String() string
type ColorSpaceInfo
type Dictionary
- func (d Dictionary) String() string
type Font
- func (f *Font) CalculateWidth(s string) float64
- func (f *Font) DecodeString(s string) string
- func (f *Font) GetWidth(code int) float64
type FontManager
- func NewFontManager(r *Reader) *FontManager
- func (fm *FontManager) CalculateWidth(fontName Name, s string) float64
- func (fm *FontManager) DecodeString(fontName Name, s string) string
- func (fm *FontManager) LoadFonts(resources Dictionary) error
type GraphicsState
type HexString
- func (s HexString) String() string
type IndirectObject
type IndirectRef
- func (r IndirectRef) String() string
type Integer
- func (i Integer) String() string
type Interpreter
- func NewInterpreter(fm *FontManager, resources Dictionary) *Interpreter
- func (in *Interpreter) Process(content []byte) ([]types.TextBlock, []types.Image, []types.VectorGraphic, error)
type Matrix
- func IdentityMatrix() Matrix
- func (m Matrix) Multiply(other Matrix) Matrix
- func (m Matrix) Transform(x, y float64) (float64, float64)
type Name
- func (n Name) String() string
type Null
- func (n Null) String() string
type Object
type Parser
- func NewParser(t *Tokenizer) *Parser
- func (p *Parser) ParseObject() (Object, error)
type Reader
- func NewReader(path string) (*Reader, error)
- func (r *Reader) Close() error
- func (r *Reader) ExtractContent(page Dictionary) ([]byte, error)
- func (r *Reader) ExtractLinks(pageDict Dictionary) ([]types.Link, error)
- func (r *Reader) GetPage(pageIndex int) (Dictionary, error)
- func (r *Reader) GetPageCount() (int, error)
- func (r *Reader) ReadObject(objNum int) (Object, error)
- func (r *Reader) Resolve(obj Object) (Object, error)
type Real
- func (r Real) String() string
type Stream
- func (s Stream) String() string
type StringLiteral
- func (s StringLiteral) String() string
type Token
type TokenType
type Tokenizer
- func NewTokenizer(r io.Reader) *Tokenizer
- func (t *Tokenizer) NextToken() (Token, error)
- func (t *Tokenizer) ReadStream(length int64) ([]byte, error)
- func (t *Tokenizer) ReadUntilEI() ([]byte, error)
- func (t *Tokenizer) UnreadToken(tok Token)
type XrefEntry

Constants ¶

This section is empty.

Variables ¶

View Source

var (
	WinAnsiEncoding = []string{}/* 256 elements not displayed */

	MacRomanEncoding = []string{}/* 256 elements not displayed */

	StandardEncoding = []string{}/* 256 elements not displayed */

)

Standard encodings

View Source

var GlyphToUnicode = map[string]string{}/* 564 elements not displayed */

GlyphToUnicode maps glyph names to Unicode strings

Functions ¶

func DecodeStream ¶

func DecodeStream(data []byte, filter Name, params Dictionary) ([]byte, error)

DecodeStream decodes a stream based on its filter and optional parameters

func DecodeStreamFromDict ¶

func DecodeStreamFromDict(data []byte, dict Dictionary) ([]byte, error)

DecodeStreamFromDict decodes stream data using filter(s) specified in the dictionary. Handles both single filters and arrays of filters.

Types ¶

func (Array) String ¶

func (a Array) String() string

func (Boolean) String ¶

func (b Boolean) String() string

type ColorSpaceInfo ¶

type ColorSpaceInfo struct {
	Components int
	IsIndexed  bool
	Palette    []color.RGBA // RGB palette for indexed colors
}

ColorSpaceInfo holds color space information including palette

type Dictionary ¶

type Dictionary map[Name]Object

Dictionary represents a dictionary object << ... >>

func (Dictionary) String ¶

func (d Dictionary) String() string

func (*Font) CalculateWidth ¶

func (f *Font) CalculateWidth(s string) float64

CalculateWidth calculates the width of the string in text space (1000 units)

func (*Font) DecodeString ¶

func (f *Font) DecodeString(s string) string

DecodeString decodes a PDF string using the font's encoding/CMap

func (*Font) GetWidth ¶

func (f *Font) GetWidth(code int) float64

GetWidth returns the width of the character code

func NewFontManager ¶

func NewFontManager(r *Reader) *FontManager

NewFontManager creates a new font manager

func (*FontManager) CalculateWidth ¶

func (fm *FontManager) CalculateWidth(fontName Name, s string) float64

CalculateWidth calculates the width of the string using the specified font

func (*FontManager) DecodeString ¶

func (fm *FontManager) DecodeString(fontName Name, s string) string

DecodeString decodes a string using the specified font

func (*FontManager) LoadFonts ¶

func (fm *FontManager) LoadFonts(resources Dictionary) error

LoadFonts loads fonts from the page resources

type GraphicsState ¶

type GraphicsState struct {
	CTM Matrix

	// Text State
	Tc    float64 // Character spacing
	Tw    float64 // Word spacing
	Th    float64 // Horizontal scaling
	Tl    float64 // Leading
	Tf    Name    // Font name
	Tfs   float64 // Font size
	Tmode int     // Text rendering mode
	Tr    float64 // Text rise
	Ts    float64 // Text knockout (unused mostly)

	// Path Construction
	CurrentPath []types.PathOperation
	LineWidth   float64
}

GraphicsState holds the current graphics state parameters

type HexString ¶

type HexString string

HexString represents a hex string object <...>

func (HexString) String ¶

func (s HexString) String() string

type IndirectObject ¶

type IndirectObject struct {
	ObjectNumber     int
	GenerationNumber int
	Object           Object
}

IndirectObject represents a definition (ObjNum GenNum obj ... endobj)

type IndirectRef ¶

type IndirectRef struct {
	ObjectNumber     int
	GenerationNumber int
}

IndirectRef represents an indirect reference (ObjNum GenNum R)

func (IndirectRef) String ¶

func (r IndirectRef) String() string

func (Integer) String ¶

func (i Integer) String() string

func (*Interpreter) Process ¶

func (in *Interpreter) Process(content []byte) ([]types.TextBlock, []types.Image, []types.VectorGraphic, error)

Process interprets the content stream

type Matrix ¶

type Matrix [6]float64

Matrix represents a 3x3 affine transformation matrix [a b c d e f] a b 0 c d 0 e f 1

func (Matrix) Multiply ¶

func (m Matrix) Multiply(other Matrix) Matrix

Multiply multiplies two matrices: m x other

func (Matrix) Transform ¶

func (m Matrix) Transform(x, y float64) (float64, float64)

Transform transforms a point (x, y)

type Name ¶

type Name string

Name represents a name object (e.g. /Type)

func (Name) String ¶

func (n Name) String() string

func (Null) String ¶

func (n Null) String() string

type Parser ¶

type Parser struct {
	// contains filtered or unexported fields
}

Parser parses PDF objects from a tokenizer

func (*Parser) ParseObject ¶

func (p *Parser) ParseObject() (Object, error)

ParseObject parses the next object from the stream

func (*Reader) Close ¶

func (r *Reader) Close() error

Close closes the file

func (*Reader) ExtractContent ¶

func (r *Reader) ExtractContent(page Dictionary) ([]byte, error)

ExtractContent extracts the raw content stream from a page

func (*Reader) ExtractLinks ¶

func (r *Reader) ExtractLinks(pageDict Dictionary) ([]types.Link, error)

ExtractLinks extracts links from a page dictionary

func (*Reader) GetPage ¶

func (r *Reader) GetPage(pageIndex int) (Dictionary, error)

GetPage returns the dictionary for the specified page (1-based index)

func (*Reader) GetPageCount ¶

func (r *Reader) GetPageCount() (int, error)

GetPageCount returns the total number of pages

func (*Reader) ReadObject ¶

func (r *Reader) ReadObject(objNum int) (Object, error)

ReadObject reads an indirect object by number

func (*Reader) Resolve ¶

func (r *Reader) Resolve(obj Object) (Object, error)

Resolve resolves an indirect reference to its actual object. If the object is not a reference, it is returned as is.

func (Real) String ¶

func (r Real) String() string

func (Stream) String ¶

func (s Stream) String() string

type StringLiteral ¶

type StringLiteral string

StringLiteral represents a string object (...)

func (StringLiteral) String ¶

func (s StringLiteral) String() string

func (*Tokenizer) NextToken ¶

func (t *Tokenizer) NextToken() (Token, error)

NextToken returns the next token from the stream

func (*Tokenizer) ReadStream ¶

func (t *Tokenizer) ReadStream(length int64) ([]byte, error)

ReadStream reads stream content from the PDF.

func (*Tokenizer) ReadUntilEI ¶

func (t *Tokenizer) ReadUntilEI() ([]byte, error)

ReadUntilEI reads raw bytes until the EI (end inline image) marker is found. EI must be preceded by whitespace and followed by whitespace/delimiter/EOF. Returns the image data without the trailing whitespace before EI.

func (*Tokenizer) UnreadToken ¶

func (t *Tokenizer) UnreadToken(tok Token)

UnreadToken pushes a token back to the stream

type XrefEntry ¶

type XrefEntry struct {
	Type         int   // 0=free, 1=in-use, 2=compressed
	Offset       int64 // For Type 1
	Gen          int   // For Type 1
	StreamObjNum int   // For Type 2
	StreamIndex  int   // For Type 2
}

XrefEntry represents an entry in the cross-reference table

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL