PL/SQL: PdfPageCount

Está meio ruim de ler aqui na página por causa do tamanho das letras e do tamanho das linhas. Sugiro copiar e colar para um editor com sintaxe colorida. Obs.: eu testei com vários arquivos PDF, mas é certo que há erros à espreita. Considere-se avisado. Última atualização: 2010-09-19.

create or replace package pk_pdf as
  function number_of_pages(pdf in blob) return integer;
end pk_pdf;

create or replace package body pk_pdf as

    type Pdf_Reference is record (
        obj_num number,
        gen_num number);

    type Pdf_Object is record (
        t varchar2(32767),
        num number,
        str varchar2(32767),
        dict pls_integer,
        array_ pls_integer,
        refer Pdf_Reference);

    type Dictionary_T is table of Pdf_Object index by varchar2(32767);
    type Dictionary_Ref_T is table of Dictionary_T index by pls_integer;

    type Pdf_Array is varray(125000) of Pdf_Object;
    type Array_Ref_T is table of Pdf_Array index by pls_integer;

    type Trailer_Array is varray(125000) of Dictionary_T;

    type Pdf_Token is record (
        t varchar2(32767),
        val varchar2(32767));
    type Pdf_Token_Array is varray(20) of Pdf_Token;

    type Pdf_Xref_Item is record (
        type_ varchar2(1),
        id_ number,
        byte_pos number,
        gen_num number);
    type Pdf_Xref_Table is table of Pdf_Xref_Item index by varchar2(20);
    type pdf_xref is record (
        objs Pdf_Xref_Table);

    type Pdf_Token_Reader is record (
        pdf blob,
        position number,
        tok_buf Pdf_Token_Array,
        dict_refs Dictionary_Ref_T,
        array_refs Array_Ref_T,
        xref pdf_xref);

    procedure obj_set_ref(self in out nocopy Pdf_Object, refer Pdf_Reference) is
    begin
        self.t := 'REFERENCE';
        self.refer := refer;
    end;

    procedure obj_set_dict(self in out nocopy Pdf_Object,
                           dict Dictionary_T, dict_refs in out nocopy Dictionary_Ref_T) is
        idx pls_integer;
    begin
        self.t := 'DICTIONARY';
        if dict_refs.last is null then
            idx := 1;
        else
            idx := dict_refs.last + 1;
        end if;
        dict_refs(idx) := dict;
        self.dict := idx;
    end;

    procedure obj_set_stream(self in out nocopy Pdf_Object, stream blob,
                             dict Dictionary_T, dict_refs in out nocopy Dictionary_Ref_T) is
        idx pls_integer;
    begin
        self.t := 'STREAM';
        if dict_refs.last is null then
            idx := 1;
        else
            idx := dict_refs.last + 1;
        end if;
        dict_refs(idx) := dict;
        self.dict := idx;
        -- self.stream := stream; -- Não estamos usando por enquanto.
    end;

    procedure obj_set_array(self in out nocopy Pdf_Object, array_ Pdf_Array, array_refs in out nocopy Array_Ref_T) is
        idx pls_integer;
    begin
        self.t := 'ARRAY';
        if array_refs.last is null then
            idx := 1;
        else
            idx := array_refs.last + 1;
        end if;
        array_refs(idx) := array_;
        self.dict := idx;
    end;

    procedure obj_set_token(self in out nocopy Pdf_Object, token Pdf_Token) is
    begin
        case token.t
            when 'NUMBER' then
                self.t := token.t;
                self.num := to_number(token.val); -- está sem distinção entre int e float
            when 'STRING' then
                self.t := token.t;
                self.str := token.val;
            when 'HX_STRING' then
                self.t := token.t;
                self.str := token.val;
            when 'true' then
                self.t := 'BOOLEAN';
                self.str := 'true';
            when 'false' then
                self.t := 'BOOLEAN';
                self.str := 'false';
            when 'null' then
                self.t := 'NULL';
            else
                self.t := '?';
                self.str := token.val;
        end case;
    end;

    procedure pdf_token_init(self in out nocopy Pdf_Token, type_ varchar2, value_ varchar2) is
    begin
        self.t := type_;
        self.val := value_;
    end;

    procedure skip_comment(self in out nocopy Pdf_Token_Reader);
    procedure skip_spaces(self in out nocopy Pdf_Token_Reader);
    function read_number(self in out nocopy Pdf_Token_Reader) return varchar2;
    function read_hx_string(self in out nocopy Pdf_Token_Reader) return varchar2;
    function read_string(self in out nocopy Pdf_Token_Reader) return varchar2;
    function read_name(self in out nocopy Pdf_Token_Reader) return varchar2;
    function read_word(self in out nocopy Pdf_Token_Reader) return varchar2;
    function read_code(self in out nocopy Pdf_Token_Reader) return varchar2;
    function is_digit(ch varchar2) return boolean;
    function is_space(ch varchar2) return boolean;
    function is_delimiter(ch varchar2) return boolean;

    procedure pdf_token_reader_init(self in out nocopy Pdf_Token_Reader, pdf blob, position number) is
    begin
        self.pdf := pdf;
        self.position := position;
        self.tok_buf := Pdf_Token_Array();
    end;

    function next_token(self in out nocopy Pdf_Token_Reader) return Pdf_Token is
        result Pdf_Token;
        byt raw(1);
        ch varchar2(1);
        amount integer := 1;

        function trata_word return Pdf_Token is
            word varchar2(32767);
            result Pdf_Token;
        begin
            word := read_word(self);
            if word in ('obj', 'endobj', 'stream', 'endstream', 'true', 'false', 'null', 'xref', 'trailer') then
                pdf_token_init(result, word, null);
                return result;
            end if;
            raise_application_error(-20001, 'Erro no PDF no byte ' || self.position || ', lendo: ' + word);
        end;

    begin
        if self.tok_buf.last is not null then
            result := self.tok_buf(self.tok_buf.last);
            self.tok_buf.trim;
            return result;
        end if;

        loop
            dbms_lob.read(self.pdf, amount, self.position, byt);
            ch := utl_raw.cast_to_varchar2(byt);
            if ch = '%' then
                skip_comment(self);
            elsif is_space(ch) then
                skip_spaces(self);
            else
                exit;
            end if;
        end loop;

        dbms_lob.read(self.pdf, amount, self.position, byt);
        ch := utl_raw.cast_to_varchar2(byt);

        if is_digit(ch) or ch in ('.', '+', '-') then
            pdf_token_init(result, 'NUMBER', read_number(self));
            return result;
        elsif ch = '<' then
            self.position := self.position + 1;
            dbms_lob.read(self.pdf, amount, self.position, byt);
            ch := utl_raw.cast_to_varchar2(byt);
            if ch = '<' then
                self.position := self.position + 1;
                pdf_token_init(result, '<<', null);
                return result;
            end if;
            pdf_token_init(result, 'HX_STRING', read_hx_string(self));
            return result;
        elsif ch = '>' then
            self.position := self.position + 1;
            dbms_lob.read(self.pdf, amount, self.position, byt);
            ch := utl_raw.cast_to_varchar2(byt);
            if ch = '>' then
                self.position := self.position + 1;
                pdf_token_init(result, '>>', null);
                return result;
            end if;
            pdf_token_init(result, '>', null); -- Não deve acontecer, porque o readHxString vai consumir o '>'
            return result;
        elsif ch = '(' then
            self.position := self.position + 1;
            pdf_token_init(result, 'STRING', read_string(self));
            return result;
        elsif ch = '/' then
            self.position := self.position + 1;
            pdf_token_init(result, 'NAME', read_name(self));
            return result;
        elsif ch = '[' then
            self.position := self.position + 1;
            pdf_token_init(result, '[', null);
            return result;
        elsif ch = ']' then
            self.position := self.position + 1;
            pdf_token_init(result, ']', null);
            return result;
        elsif ch = '{' then
            self.position := self.position + 1;
            pdf_token_init(result, 'CODE', read_code(self));
            return result;
        elsif ch = 'R' then
            byt := dbms_lob.substr(self.pdf, amount, self.position + 1);
            ch := utl_raw.cast_to_varchar2(byt);
            if ch is null or is_delimiter(ch) then
                self.position := self.position + 1;
                pdf_token_init(result, 'R', null);
                return result;
            else
                return trata_word;
            end if;
        else
            return trata_word;
        end if;
    end;

    procedure unread_token(self in out nocopy Pdf_Token_Reader, token Pdf_Token) is
    begin
        self.tok_buf.extend;
        self.tok_buf(self.tok_buf.last) := token;
    end;

    procedure skip_comment(self in out nocopy Pdf_Token_Reader) is
        ch varchar2(1);
        amount number := 1;
        byt raw(1);
    begin
        loop
            dbms_lob.read(self.pdf, amount, self.position, byt);
            ch := utl_raw.cast_to_varchar2(byt);
            if ch = chr(13) then
                self.position := self.position + 1;
                dbms_lob.read(self.pdf, amount, self.position, byt);
                ch := utl_raw.cast_to_varchar2(byt);
                if ch = chr(10) then
                    self.position := self.position + 1;
                end if;
                return;
            elsif ch = chr(10) then
                self.position := self.position + 1;
                return;
            else
                self.position := self.position + 1;
            end if;
        end loop;
    end;

    procedure skip_spaces(self in out nocopy Pdf_Token_Reader) is
        amount number := 1;
        ch varchar2(1);
        byt raw(1);
    begin
        loop
            dbms_lob.read(self.pdf, amount, self.position, byt);
            ch := utl_raw.cast_to_varchar2(byt);
            if is_space(ch) then
                self.position := self.position + 1;
            else
                return;
            end if;
        end loop;
    end;

    function read_number(self in out nocopy Pdf_Token_Reader) return varchar2 is
        amount number := 1;
        ch varchar2(1);
        byt raw(1);
        result varchar2(32767) := '';
    begin
        loop
            dbms_lob.read(self.pdf, amount, self.position, byt);
            ch := utl_raw.cast_to_varchar2(byt);
            exit when not is_digit(ch) and ch not in ('.', '+', '-'); 
            result := result || ch;
            self.position := self.position + 1;
        end loop;
        return result;
    end;

    function read_number(str varchar2, pos pls_integer) return number is
        len pls_integer := 0;
    begin
        while substr(str, pos + len, 1) in ('0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '.', '+', '-') loop
            len := len + 1;
        end loop;
        return to_number(substr(str, pos, len));
    end;

    function read_hx_string(self in out nocopy Pdf_Token_Reader) return varchar2 is
        --end_position integer;
        amount number := 1;
        ch varchar2(1);
        byt raw(1);
        result varchar2(32767) := '';
    begin
        /*
        -- tem um erro, e estou com preguiça de debugar porque já tenho
        -- uma versão funcionando, mais abaixo.
        end_position := dbms_lob.instr(self.pdf, '>', self.position, 1);
        result := dbms_lob.substr(self.pdf, end_position - self.position, self.position);
        self.position := end_position + 1;
        return result;
        */
        -- /*
        loop
            dbms_lob.read(self.pdf, amount, self.position, byt);
            ch := utl_raw.cast_to_varchar2(byt);
            exit when ch = '>'; 
            result := result || ch;
            self.position := self.position + 1;
        end loop;
        return result;
        -- */
    end;

    function read_string(self in out nocopy Pdf_Token_Reader) return varchar2 is
        par_level pls_integer := 0;
        escaped boolean := false;
        amount number := 1;
        ch varchar2(1);
        byt raw(1);
        result varchar2(32767);
    begin
        loop
            dbms_lob.read(self.pdf, amount, self.position, byt);
            ch := utl_raw.cast_to_varchar2(byt);
            /* Será que essas concatenações são eficientes?
            Em algumas linguagens, ficar adicionando caracter por caracter
            gera muitas cópias de strings temporárias que dão trabalho para o
            coletor de lixo. */
            if escaped then
                if ch = 'n' then
                    result := result || chr(10);
                elsif ch = 'r' then
                    result := result || chr(13);
                elsif ch = 'b' then
                    result := result || chr(8);
                elsif ch = 'f' then
                    result := result || chr(12);
                elsif is_digit(ch) then
                    -- Not supported yet
                    result := result || '\' || ch;
                elsif ch = '(' then
                    result := result || '(';
                elsif ch = ')' then
                    result := result || ')';
                elsif ch = chr(10) then
                    null;
                elsif ch = chr(13) then
                    dbms_lob.read(self.pdf, amount, self.position + 1, byt);
                    ch := utl_raw.cast_to_varchar2(byt);
                    if ch = chr(10) then
                        self.position := self.position + 1;
                    end if;
                end if;
                escaped := false;
            elsif ch = '\' then
                escaped := true;
            elsif ch = '(' then
                par_level := par_level + 1;
                result := result || '(';
            elsif ch = ')' then
                par_level := par_level - 1;
                if par_level < 0 then
                    self.position := self.position + 1;
                    return result;
                end if;
            else
                result := result || ch;
            end if;
            self.position := self.position + 1;
        end loop;
    end;

    function read_name(self in out nocopy Pdf_Token_Reader) return varchar2 is
    begin
        return read_word(self);
    end;

    function read_word(self in out nocopy Pdf_Token_Reader) return varchar2 is
        amount number := 1;
        ch varchar2(1);
        byt raw(1);
        result varchar2(32767) := '';
    begin
        loop
            dbms_lob.read(self.pdf, amount, self.position, byt);
            ch := utl_raw.cast_to_varchar2(byt);
            exit when is_delimiter(ch); 
            result := result || ch;
            self.position := self.position + 1;
        end loop;
        return result;
    end;

    function read_code(self in out nocopy Pdf_Token_Reader) return varchar2 is
        amount number := 1;
        ch varchar2(1);
        byt raw(1);
        par_level pls_integer := 0;
        result varchar2(32767) := '';
    begin
        loop
            dbms_lob.read(self.pdf, amount, self.position, byt);
            ch := utl_raw.cast_to_varchar2(byt);
            if ch = '{' then
                par_level := par_level + 1;
                result := result || ch;
            elsif ch = '}' then
                par_level := par_level - 1;
                if par_level < 0 then
                    return result;
                else
                    result := result || ch;
                end if;
            end if;
            self.position := self.position + 1;
        end loop;
    end;

    procedure parse_xref(self in out nocopy pdf_xref, token_reader in out nocopy Pdf_Token_Reader);
    procedure add_block(self in out nocopy pdf_xref, start_id number, nr_of_lines number, str_block varchar2);
    procedure add_obj(self in out nocopy pdf_xref, id_ number, str_line varchar2);
    function get_obj(self in out nocopy pdf_xref, refer Pdf_Reference) return Pdf_Xref_Item;

    procedure parse_xref(self in out nocopy pdf_xref, token_reader in out nocopy Pdf_Token_Reader) is
        token Pdf_Token;
        start_id number;
        nr_of_lines number;
        str_xref varchar2(32767);
        amount integer;
        ch varchar2(1);
        byt raw(1);

        function get_number return number is
        begin
            token := next_token(token_reader);
            if token.t <> 'NUMBER' then
                raise_application_error(-20001, 'Não encontrado número no local esperado do PDF');
            end if;
            return to_number(token.val);
        end;

    begin
        token := next_token(token_reader);
        if token.t <> 'xref' then
            raise_application_error(-20001, 'Não encontrado xref no local esperado do PDF');
        end if;

        loop
            start_id := get_number;
            nr_of_lines := get_number;
            skip_spaces(token_reader);
            amount := 20 * nr_of_lines;
            str_xref := utl_raw.cast_to_varchar2(dbms_lob.substr(token_reader.pdf, amount, token_reader.position));
            token_reader.position := token_reader.position + amount;
            add_block(self, start_id, nr_of_lines, str_xref);
            skip_spaces(token_reader);
            amount := 1;
            dbms_lob.read(token_reader.pdf, amount, token_reader.position, byt);
            ch := utl_raw.cast_to_varchar2(byt);
            if not is_digit(ch) then
                return; -- deixa o token_reader na posição para ler o trailer
            end if;

        end loop;
    end;

    procedure add_block(self in out nocopy pdf_xref, start_id number, nr_of_lines number, str_block varchar2) is
    begin
        for i in 0..(nr_of_lines-1) loop
            add_obj(self, i + start_id, substr(str_block, 20*i + 1, 20));
        end loop;
    end;

    procedure add_obj(self in out nocopy pdf_xref, id_ number, str_line varchar2) is
        xref_item Pdf_Xref_Item;
        type_ varchar2(1);
        byte_pos number;
        gen_num number;
    begin
        xref_item.type_ := substr(str_line, 18, 1);
        if xref_item.type_ = 'n' then
            xref_item.byte_pos := read_number(str_line, 1);
        end if;
        xref_item.gen_num := read_number(str_line, 12);
        xref_item.id_ := id_;
        if not self.objs.exists(id_ || ' ' || xref_item.gen_num) then
            self.objs(id_ || ' ' || xref_item.gen_num) := xref_item;
        end if;
    end;

    function get_obj(self in out nocopy pdf_xref, refer Pdf_Reference) return Pdf_Xref_Item is
    begin
        return self.objs(refer.obj_num || ' ' || refer.gen_num);
    end;

    function is_digit(ch varchar2) return boolean is
    begin
        return ch in ('0', '1', '2', '3', '4', '5', '6', '7', '8', '9');
    end;

    function is_space(ch varchar2) return boolean is
    begin
        return ch in (chr(0), chr(9), chr(10), chr(12), chr(13), ' '); -- , \t, \n, \f, \r, ' '
    end;

    function is_delimiter(ch varchar2) return boolean is
    begin
        return is_space(ch) or ch in ('(', ')', '<', '>', '[', ']', '{', '}', '/', '%');
    end;

    procedure expect(t1 varchar2, t2 varchar2) is
    begin
        if t1 <> t2 then
            raise_application_error(-20001,
                'Não encontrado token no local esperado do PDF (' || t1 || '<>' || t2 || ')');
        end if;
    end;

    function parse_object(token_reader in out nocopy Pdf_Token_Reader) return Pdf_Object;

    function parse_array(token_reader in out nocopy Pdf_Token_Reader, array_refs in out nocopy Array_Ref_T)
            return Pdf_Object is
        result Pdf_Object;
        token Pdf_Token;
        array_ Pdf_Array := Pdf_Array();
    begin
        loop
            token := next_token(token_reader);
            if token.t = ']' then
                obj_set_array(result, array_, array_refs);
                return result;
            end if;
            unread_token(token_reader, token);
            array_.extend;
            array_(array_.last) := parse_object(token_reader);
        end loop;
    end;

    function parse_dict(token_reader in out nocopy Pdf_Token_Reader) return Dictionary_T is
        result Dictionary_T;
        token Pdf_Token;
        obj Pdf_Object;
    begin
        loop
            token := next_token(token_reader);
            if token.t = '>>' then
                return result;
            end if;
            expect(token.t, 'NAME');
            obj := parse_object(token_reader);
            if obj.t = '?' and obj.str = '>>' then
                raise_application_error(-20001, 'Token >> inesperado em pos = ' || token_reader.position);
            end if;
            result(token.val) := obj; 
        end loop;
    end;
    
    function find_indirect_object(token_reader in out nocopy Pdf_Token_Reader,
                                  refer Pdf_Reference)
                           return Pdf_Object;

    function read_stream(token_reader in out nocopy Pdf_Token_Reader, dict Dictionary_T)
        return Pdf_Object is
        byt raw(1);
        ch varchar2(1);
        amount integer := 1;
        token Pdf_Token;
        result Pdf_Object;
        len Pdf_Object;
    begin
        dbms_lob.read(token_reader.pdf, amount, token_reader.position, byt);
        ch := utl_raw.cast_to_varchar2(byt);
        if ch = chr(13) then
            token_reader.position := token_reader.position + 2; -- skip \r\n
        elsif ch = chr(10) then
            token_reader.position := token_reader.position + 1; -- skip \n
        end if;

        len := dict('Length');

        if len.t = 'REFERENCE' then
            declare
                saved_pos number := token_reader.position;
            begin
                len := find_indirect_object(token_reader, len.refer);
                token_reader.position := saved_pos;
            end;
        end if;

        if len.t <> 'NUMBER' then
            raise_application_error(-20001, 'Erro: /Length de stream não é um número. Pos = ' || token_reader.position);
        end if;
        -- Apenas pula o stream, pois não temos nenhum uso para ele.
        -- se precisar melhorar, o stream pode ser guardado num blob dentro do
        -- Pdf_Object. 
        token_reader.position := token_reader.position + len.num;
        token := next_token(token_reader);
        expect(token.t, 'endstream');
        obj_set_stream(result, null, dict, token_reader.dict_refs);
        return result;
    end;

    function new_pdf_reference(obj_num number, gen_num number) return Pdf_Reference is
        result Pdf_Reference;
    begin
        result.obj_num := obj_num;
        result.gen_num := gen_num;
        return result; 
    end;

    function parse_object(token_reader in out nocopy Pdf_Token_Reader) return Pdf_Object is
        token1 Pdf_Token;
        token2 Pdf_Token;
        token3 Pdf_Token;
        token4 Pdf_Token;
        dict Dictionary_T;
        result Pdf_Object;

        function obj return Pdf_Object is
            token Pdf_Token;
            result Pdf_Object;
        begin
            result := parse_object(token_reader);
            token := next_token(token_reader);
            expect(token.t, 'endobj');
            return result;
        end;
    begin
        token1 := next_token(token_reader);
        if token1.t = 'NUMBER' then
            -- pode ser número, objeto indireto, ou referência para objeto indireto
            token2 := next_token(token_reader);
            if token2.t = 'NUMBER' then
                token3 := next_token(token_reader);
                if token3.t = 'obj' then
                    return obj();
                elsif token3.t = 'R' then
                    obj_set_ref(result, new_pdf_reference(obj_num=>token1.val, gen_num=>token2.val));
                    return result;
                else
                    unread_token(token_reader, token3);
                    unread_token(token_reader, token2);
                end if;
            else
                unread_token(token_reader, token2);
            end if;
            obj_set_token(result, token1);
            return result;
        elsif token1.t = '[' then
            return parse_array(token_reader, token_reader.array_refs);
        elsif token1.t = '<<' then
            dict := parse_dict(token_reader);
            token4 := next_token(token_reader);
            if token4.t = 'stream' then
                return read_stream(token_reader, dict);
            else
                unread_token(token_reader, token4);
            end if;
            obj_set_dict(result, dict, token_reader.dict_refs);
            return result;
        elsif token1.t = 'obj' then
            return obj();
        else
            obj_set_token(result, token1);
            return result; 
        end if;
    end;

    function parse_trailer(token_reader in out nocopy Pdf_Token_Reader) return Dictionary_T is
        token Pdf_Token;
    begin
        token := next_token(token_reader);
        expect(token.t, 'trailer');
        token := next_token(token_reader);
        expect(token.t, '<<');
        return parse_dict(token_reader);
    end;

    function read_indirect_obj(token_reader in out nocopy Pdf_Token_Reader, xref_item Pdf_Xref_Item)
            return Pdf_Object is
    begin
        if xref_item.type_ = 'n' then
            token_reader.position := xref_item.byte_pos;
            return parse_object(token_reader);
        else
            return null;
        end if; 
    end;

    function find_indirect_object(token_reader in out nocopy Pdf_Token_Reader,
                                  refer Pdf_Reference)
                           return Pdf_Object is
        xref_item Pdf_Xref_Item;
    begin
        xref_item := get_obj(token_reader.xref, refer);
        return read_indirect_obj(token_reader, xref_item);
    end;


    function number_of_pages(pdf in blob) return integer is
        amount integer := 5;
        position number := 1;
        comeco_pdf_raw raw(5);
        comeco_pdf varchar2(5);
        final_pdf_raw raw(400);
        final_pdf varchar2(400);
        startxref_str varchar(100);
        startxref integer;
        token Pdf_Token;
        token_reader Pdf_Token_Reader;

        trailer Dictionary_T;
        trailers Trailer_Array;

        catalog Pdf_Object;
        
        len_pdf constant number := dbms_lob.getlength(pdf);

        function linearized return integer is
            first_obj Pdf_Object;
        begin
            token_reader.position := 1;
            first_obj := parse_object(token_reader);
            if token_reader.dict_refs(first_obj.dict).exists('Linearized') then
                return token_reader.dict_refs(first_obj.dict)('N').num;
            else
                raise_application_error(-20001, 'Não achou o número de páginas (/Linearized)');
            end if;
        end;

    begin
        dbms_lob.read(pdf, amount, position, comeco_pdf_raw);
        --dbms_output.put_line(utl_raw.cast_to_varchar2(comeco_pdf_raw));
        comeco_pdf := utl_raw.cast_to_varchar2(comeco_pdf_raw);
        if comeco_pdf <> '%PDF-' then
            return null;
        end if;
        amount := 400;
        if len_pdf < amount then
            amount := len_pdf;
        end if;
        position := len_pdf - (amount - 1);
        dbms_lob.read(pdf, amount, position, final_pdf_raw);
        final_pdf := utl_raw.cast_to_varchar2(final_pdf_raw);
        declare
            startxref_pattern constant varchar2(50) := '.*startxref\s+(\d+)\s+%%EOF.*$';
        begin
            startxref_str := regexp_replace(final_pdf, startxref_pattern, '\1', 1, 1, 'n');
        end;
        --dbms_output.put_line('[' || startxref_str || ']');
        startxref := to_number(startxref_str);
        if startxref is null or startxref = 0 then
            return linearized;
        else
            position := startxref;
            pdf_token_reader_init(token_reader, pdf, position);
            token := next_token(token_reader);
            if token.t = 'xref' then
                unread_token(token_reader, token);
                trailers := Trailer_Array();
                loop
                    parse_xref(token_reader.xref, token_reader);
                    trailer := parse_trailer(token_reader);
                    trailers.extend;
                    trailers(trailers.last) := trailer;
                    if trailer.exists('Prev') then
                        token_reader.position := trailer('Prev').num;
                    else
                        exit;
                    end if;
                end loop;

                for i in 1 .. trailers.last loop
                    if trailers(i).exists('Root') then
                        declare
                            catalog_num Pdf_Reference;
                            root Pdf_Object;
                        begin
                            root := trailers(i)('Root');
                            expect(root.t, 'REFERENCE');
                            catalog_num := root.refer;
                            catalog := find_indirect_object(token_reader, catalog_num);
                            exit;
                        end;
                    end if;
                end loop;

                declare
                    pages Pdf_Object;
                    pages_num Pdf_Reference;
                    count_ Pdf_Object;
                begin
                    pages := token_reader.dict_refs(catalog.dict)('Pages');
                    expect(pages.t, 'REFERENCE');
                    pages_num := pages.refer;
                    pages := find_indirect_object(token_reader, pages_num);
                    count_ := token_reader.dict_refs(pages.dict)('Count');
                    expect(count_.t, 'NUMBER');
                    return count_.num;
                end;
            else
                return linearized;
            end if;
        end if;
        return 0;
    exception
        when dbms_lob.invalid_argval then
            dbms_output.put_line('INVALID_ARGVAL - ' || SQLERRM);
            return null;
        when no_data_found then
            dbms_output.put_line('NO_DATA_FOUND - ' || SQLERRM);
            return null;
        when others then
            dbms_output.put_line('Outro erro - ' || SQLERRM);
            return null;
    end;
end pk_pdf;

Um pensamento sobre “PL/SQL: PdfPageCount

  1. Python vs. PL/SQL vs. F# « Visions of hope

Deixe um comentário

Preencha os seus dados abaixo ou clique em um ícone para log in:

Logotipo do WordPress.com

Você está comentando utilizando sua conta WordPress.com. Sair / Alterar )

Imagem do Twitter

Você está comentando utilizando sua conta Twitter. Sair / Alterar )

Foto do Facebook

Você está comentando utilizando sua conta Facebook. Sair / Alterar )

Foto do Google+

Você está comentando utilizando sua conta Google+. Sair / Alterar )

Conectando a %s