Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Rakefile
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ Rake::RDocTask.new("doc") do |rdoc|
rdoc.options << "--inline-source"
end

# a gemspec for packaging this library
# a gemspec for packaging this library
# RSpec files aren't included, as they depend on the PDF files,
# which will make the gem filesize irritatingly large
spec = Gem::Specification.new do |spec|
Expand Down
6 changes: 3 additions & 3 deletions TODO
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ v0.8
- optimise PDF::Reader::Reference#from_buffer
- ruby-prof shows the match() call in this function is a real killer
- add extra callbacks
- list implemented features
- list implemented features
- encrypted? tagged? bookmarks? annotated? optimised?
- Allow more than just page content and metadata to be parsed (see spec section 3.6.1)
- bookmarks?
Expand Down Expand Up @@ -33,12 +33,12 @@ Sometime
- Ship some extra receivers in the standard package, particuarly ones that are useful for running
rspec over generated PDF files

- When we encounter Identity-H encoded text with no ToUnicode CMap, render the glyphs and treat them as images, as there's no
- When we encounter Identity-H encoded text with no ToUnicode CMap, render the glyphs and treat them as images, as there's no
sensible way to convert them to unicode

- Add support for additional filters: ASCIIHexDecode, ASCII85Decode, LZWDecode, RunLengthDecode, CCITTFaxDecode, JBIG2Decode, DCTDecode, JPXDecode, Crypt?

- Add support for additional encodings:
- Add support for additional encodings:
- PDFDocEncoding
- Identity-V(I *think* this relates to vertical text. Not sure how we'd support it sensibly)

Expand Down
16 changes: 8 additions & 8 deletions lib/pdf/reader.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
Expand All @@ -37,26 +37,26 @@ module PDF
# on receivers.
#
# = Parsing a file
#
#
# PDF::Reader.file("somefile.pdf", receiver)
#
# = Parsing a String
#
#
# This is useful for processing a PDF that is already in memory
#
# PDF::Reader.string(pdf_string, receiver)
#
# = Parsing an IO object
#
#
# This can be a useful alternative to the first 2 options in some situations
#
# pdf = PDF::Reader.new
# pdf.parse(File.new("somefile.pdf"), receiver)
#
# = Parsing parts of a file
#
#
# Both PDF::Reader#file and PDF::Reader#string accept a 3 argument that specifies which
# parts of the file to process. By default, all options are enabled, so this can be useful
# parts of the file to process. By default, all options are enabled, so this can be useful
# to cut down processing time if you're only interested in say, metadata.
#
# As an example, the following call will disable parsing the contents of pages in the file,
Expand All @@ -65,7 +65,7 @@ module PDF
# PDF::Reader.new("somefile.pdf", receiver, {:metadata => true, :pages => false})
#
# Available options are currently:
#
#
# :metadata
# :pages
class Reader
Expand Down
2 changes: 1 addition & 1 deletion lib/pdf/reader/content.rb
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,7 @@ def walk_pages (page)
contents.each do |content|
obj = @xref.object(content)
content_stream(obj, fonts)
end
end
end

resources.pop if res
Expand Down
6 changes: 3 additions & 3 deletions lib/pdf/reader/glyphlist.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# ###################################################################################
# Copyright (c) 1997,1998,2002,2007 Adobe Systems Incorporated
#
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this documentation file to use, copy, publish, distribute,
# sublicense, and/or sell copies of the documentation, and to permit
Expand All @@ -9,14 +9,14 @@
# allowed; and
# - The above copyright notice and this permission notice shall be
# included in all copies of the documentation.
#
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this documentation file, to create their own derivative works
# from the content of this document to use, copy, publish, distribute,
# sublicense, and/or sell the derivative works, and to permit others to do
# the same, provided that the derived work is not represented as being a
# copy or version of this document.
#
#
# Adobe shall not be liable to any party for any loss of revenue or profit
# or for indirect, incidental, special, consequential, or other similar
# damages, whether based on tort (including without limitation negligence
Expand Down
2 changes: 1 addition & 1 deletion lib/pdf/reader/register_receiver.rb
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def series(*methods)
match = idx and break if count == 0
end

if match
if match
return callbacks[match, methods.size]
else
return nil
Expand Down
6 changes: 3 additions & 3 deletions lib/pdf/reader/text_receiver.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
Expand Down Expand Up @@ -156,7 +156,7 @@ def super_show_text (string)

x = (@tm[2,0]/TS_UNITS_PER_H_CHAR).to_i
y = (ury - (@tm[2,1]/TS_UNITS_PER_V_CHAR)).to_i

#puts "rendering '#{string}' to #{x}x#{y}"

place = (@output[y] ||= (" " * urx.to_i))
Expand Down
2 changes: 1 addition & 1 deletion specs/buffer_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@
buf.token.should eql("James \\(Code Monkey")
buf.token.should eql(")")
end

specify "should correctly return an indirect reference" do
buf = parse_string("aaa 1 0 R bbb")

Expand Down
2 changes: 1 addition & 1 deletion specs/cmap_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ class PDF::Reader::CMap
map = PDF::Reader::CMap.new(File.read(filename))
map.decode(0x0100).should eql(0x0100) # mapped with the bfrange operator
end

specify "should correctly load a cmap that uses the beginbfrange operator with the array syntax" do
filename = File.dirname(__FILE__) + "/data/cmap_with_bfrange_three.txt"
map = PDF::Reader::CMap.new(File.read(filename))
Expand Down
8 changes: 4 additions & 4 deletions specs/content_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ class PDF::Reader::XRef
# the content class correctly recognises all instructions
receiver = mock("receiver")
receiver.should_receive(:begin_text_object).once # BT
receiver.should_receive(:move_text_position).once # Td
receiver.should_receive(:move_text_position).once # Td
receiver.should_receive(:set_text_font_and_size).once # Tf
receiver.should_receive(:set_text_rendering_mode).once # Tr
receiver.should_receive(:show_text).once # Tj
Expand All @@ -27,7 +27,7 @@ class PDF::Reader::XRef

# process the instructions
content = PDF::Reader::Content.new(receiver, nil)
content.content_stream(instructions)
content.content_stream(instructions)
end

specify "should send the correct callbacks when processing instructions containing 2 text blocks" do
Expand All @@ -36,7 +36,7 @@ class PDF::Reader::XRef
# the content class correctly recognises all instructions
receiver = mock("receiver")
receiver.should_receive(:begin_text_object).twice # BT
receiver.should_receive(:move_text_position).twice # Td
receiver.should_receive(:move_text_position).twice # Td
receiver.should_receive(:set_text_font_and_size).twice # Tf
receiver.should_receive(:set_text_rendering_mode).twice # Tr
receiver.should_receive(:show_text).twice # Tj
Expand All @@ -47,7 +47,7 @@ class PDF::Reader::XRef

# process the instructions
content = PDF::Reader::Content.new(receiver, nil)
content.content_stream(instructions)
content.content_stream(instructions)
end

specify "should send the correct callbacks when processing instructions containing an inline image" do
Expand Down
2 changes: 1 addition & 1 deletion specs/data/cmap_with_bfrange.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/CIDInit /ProcSet findresource begin 12 dict begin begincmap /CIDSystemInfo <<
/CIDInit /ProcSet findresource begin 12 dict begin begincmap /CIDSystemInfo <<
/Registry (TT1+0) /Ordering (T42UV) /Supplement 0 >> def
/CMapName /TT1+0 def
/CMapType 2 def
Expand Down
Loading