Documentation
¶
Overview ¶
Package pdf provides low-level PDF parsing and manipulation functionality.
Index ¶
- Variables
- func DecodeStream(data []byte, filter Name, params Dictionary) ([]byte, error)
- func DecodeStreamFromDict(data []byte, dict Dictionary) ([]byte, error)
- type Array
- type Boolean
- type ColorSpaceInfo
- type Dictionary
- type Font
- type FontManager
- type GraphicsState
- type HexString
- type IndirectObject
- type IndirectRef
- type Integer
- type Interpreter
- type Matrix
- type Name
- type Null
- type Object
- type Parser
- type Reader
- func (r *Reader) Close() error
- func (r *Reader) ExtractContent(page Dictionary) ([]byte, error)
- func (r *Reader) ExtractLinks(pageDict Dictionary) ([]types.Link, error)
- func (r *Reader) GetPage(pageIndex int) (Dictionary, error)
- func (r *Reader) GetPageCount() (int, error)
- func (r *Reader) ReadObject(objNum int) (Object, error)
- func (r *Reader) Resolve(obj Object) (Object, error)
- type Real
- type Stream
- type StringLiteral
- type Token
- type TokenType
- type Tokenizer
- type XrefEntry
Constants ¶
This section is empty.
Variables ¶
var ( WinAnsiEncoding = []string{}/* 256 elements not displayed */ MacRomanEncoding = []string{}/* 256 elements not displayed */ StandardEncoding = []string{}/* 256 elements not displayed */ )
Standard encodings
var GlyphToUnicode = map[string]string{}/* 564 elements not displayed */
GlyphToUnicode maps glyph names to Unicode strings
Functions ¶
func DecodeStream ¶
func DecodeStream(data []byte, filter Name, params Dictionary) ([]byte, error)
DecodeStream decodes a stream based on its filter and optional parameters
func DecodeStreamFromDict ¶
func DecodeStreamFromDict(data []byte, dict Dictionary) ([]byte, error)
DecodeStreamFromDict decodes stream data using filter(s) specified in the dictionary. Handles both single filters and arrays of filters.
Types ¶
type ColorSpaceInfo ¶
type ColorSpaceInfo struct {
Components int
IsIndexed bool
Palette []color.RGBA // RGB palette for indexed colors
}
ColorSpaceInfo holds color space information including palette
type Dictionary ¶
Dictionary represents a dictionary object << ... >>
func (Dictionary) String ¶
func (d Dictionary) String() string
type Font ¶
type Font struct {
BaseFont string
Subtype string
ToUnicode map[int]string
Encoding map[int]string // Custom encoding map (code -> glyph name)
Widths []float64
FirstChar int
LastChar int
CIDWidths map[int]float64
DefaultWidth float64
}
Font represents a PDF font
func (*Font) CalculateWidth ¶
CalculateWidth calculates the width of the string in text space (1000 units)
func (*Font) DecodeString ¶
DecodeString decodes a PDF string using the font's encoding/CMap
type FontManager ¶
FontManager manages fonts for a page
func NewFontManager ¶
func NewFontManager(r *Reader) *FontManager
NewFontManager creates a new font manager
func (*FontManager) CalculateWidth ¶
func (fm *FontManager) CalculateWidth(fontName Name, s string) float64
CalculateWidth calculates the width of the string using the specified font
func (*FontManager) DecodeString ¶
func (fm *FontManager) DecodeString(fontName Name, s string) string
DecodeString decodes a string using the specified font
func (*FontManager) LoadFonts ¶
func (fm *FontManager) LoadFonts(resources Dictionary) error
LoadFonts loads fonts from the page resources
type GraphicsState ¶
type GraphicsState struct {
CTM Matrix
// Text State
Tc float64 // Character spacing
Tw float64 // Word spacing
Th float64 // Horizontal scaling
Tl float64 // Leading
Tf Name // Font name
Tfs float64 // Font size
Tmode int // Text rendering mode
Tr float64 // Text rise
Ts float64 // Text knockout (unused mostly)
// Path Construction
CurrentPath []types.PathOperation
LineWidth float64
}
GraphicsState holds the current graphics state parameters
type IndirectObject ¶
IndirectObject represents a definition (ObjNum GenNum obj ... endobj)
type IndirectRef ¶
IndirectRef represents an indirect reference (ObjNum GenNum R)
func (IndirectRef) String ¶
func (r IndirectRef) String() string
type Interpreter ¶
type Interpreter struct {
FontManager *FontManager
Resources Dictionary
Stack []Object
// Graphics State Stack
State GraphicsState
StateStack []GraphicsState
// Text Object State (reset at BT)
Tm Matrix
Tlm Matrix
TextBlocks []types.TextBlock
Images []types.Image
Graphics []types.VectorGraphic
}
Interpreter interprets content streams
func NewInterpreter ¶
func NewInterpreter(fm *FontManager, resources Dictionary) *Interpreter
NewInterpreter creates a new interpreter
type Matrix ¶
type Matrix [6]float64
Matrix represents a 3x3 affine transformation matrix [a b c d e f] a b 0 c d 0 e f 1
type Parser ¶
type Parser struct {
// contains filtered or unexported fields
}
Parser parses PDF objects from a tokenizer
func (*Parser) ParseObject ¶
ParseObject parses the next object from the stream
type Reader ¶
type Reader struct {
Trailer Dictionary
XrefTable map[int]XrefEntry
Root Dictionary
// contains filtered or unexported fields
}
Reader reads a PDF file
func (*Reader) ExtractContent ¶
func (r *Reader) ExtractContent(page Dictionary) ([]byte, error)
ExtractContent extracts the raw content stream from a page
func (*Reader) ExtractLinks ¶
func (r *Reader) ExtractLinks(pageDict Dictionary) ([]types.Link, error)
ExtractLinks extracts links from a page dictionary
func (*Reader) GetPage ¶
func (r *Reader) GetPage(pageIndex int) (Dictionary, error)
GetPage returns the dictionary for the specified page (1-based index)
func (*Reader) GetPageCount ¶
GetPageCount returns the total number of pages
func (*Reader) ReadObject ¶
ReadObject reads an indirect object by number
type Stream ¶
type Stream struct {
Dictionary Dictionary
Data []byte // Raw data (possibly compressed)
}
Stream represents a stream object
type StringLiteral ¶
type StringLiteral string
StringLiteral represents a string object (...)
func (StringLiteral) String ¶
func (s StringLiteral) String() string
type TokenType ¶
type TokenType int
TokenType represents the type of a token
const ( TokenError TokenType = iota TokenEOF TokenKeyword // obj, endobj, stream, endstream, xref, trailer, startxref, true, false, null, R TokenNumeric // 123, -12.34 TokenInteger // 123 (integer only - for inline image parsing) TokenReal // -12.34 (real only - for inline image parsing) TokenName // /Name TokenString // (String) TokenHexString // <Hex> TokenArrayStart // [ TokenArrayEnd // ] TokenDictStart // << TokenDictEnd // >> )
Token types for PDF parsing.
type Tokenizer ¶
type Tokenizer struct {
// contains filtered or unexported fields
}
Tokenizer reads tokens from a reader
func (*Tokenizer) ReadStream ¶
ReadStream reads stream content from the PDF.
func (*Tokenizer) ReadUntilEI ¶
ReadUntilEI reads raw bytes until the EI (end inline image) marker is found. EI must be preceded by whitespace and followed by whitespace/delimiter/EOF. Returns the image data without the trailing whitespace before EI.
func (*Tokenizer) UnreadToken ¶
UnreadToken pushes a token back to the stream