Subversion Repositories cheapmusic

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
103 - 1
<?php
2
//
3
//  FPDI - Version 1.2
4
//
5
//    Copyright 2004-2007 Setasign - Jan Slabon
6
//
7
//  Licensed under the Apache License, Version 2.0 (the "License");
8
//  you may not use this file except in compliance with the License.
9
//  You may obtain a copy of the License at
10
//
11
//      http://www.apache.org/licenses/LICENSE-2.0
12
//
13
//  Unless required by applicable law or agreed to in writing, software
14
//  distributed under the License is distributed on an "AS IS" BASIS,
15
//  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
//  See the License for the specific language governing permissions and
17
//  limitations under the License.
18
//
19
 
20
if (!defined ('PDF_TYPE_NULL'))
21
    define ('PDF_TYPE_NULL', 0);
22
if (!defined ('PDF_TYPE_NUMERIC'))
23
    define ('PDF_TYPE_NUMERIC', 1);
24
if (!defined ('PDF_TYPE_TOKEN'))
25
    define ('PDF_TYPE_TOKEN', 2);
26
if (!defined ('PDF_TYPE_HEX'))
27
    define ('PDF_TYPE_HEX', 3);
28
if (!defined ('PDF_TYPE_STRING'))
29
    define ('PDF_TYPE_STRING', 4);
30
if (!defined ('PDF_TYPE_DICTIONARY'))
31
    define ('PDF_TYPE_DICTIONARY', 5);
32
if (!defined ('PDF_TYPE_ARRAY'))
33
    define ('PDF_TYPE_ARRAY', 6);
34
if (!defined ('PDF_TYPE_OBJDEC'))
35
    define ('PDF_TYPE_OBJDEC', 7);
36
if (!defined ('PDF_TYPE_OBJREF'))
37
    define ('PDF_TYPE_OBJREF', 8);
38
if (!defined ('PDF_TYPE_OBJECT'))
39
    define ('PDF_TYPE_OBJECT', 9);
40
if (!defined ('PDF_TYPE_STREAM'))
41
    define ('PDF_TYPE_STREAM', 10);
42
 
43
 
44
class pdf_parser {
45
 
46
	/**
47
     * Filename
48
     * @var string
49
     */
50
    var $filename;
51
 
52
    /**
53
     * File resource
54
     * @var resource
55
     */
56
    var $f;
57
 
58
    /**
59
     * PDF Context
60
     * @var object pdf_context-Instance
61
     */
62
    var $c;
63
 
64
    /**
65
     * xref-Data
66
     * @var array
67
     */
68
    var $xref;
69
 
70
    /**
71
     * root-Object
72
     * @var array
73
     */
74
    var $root;
75
 
76
    // mPDF 4.0 Added flag to show success on loading file
77
    var $success;
78
    var $errormsg;
79
 
80
    /**
81
     * Constructor
82
     *
83
     * @param string $filename  Source-Filename
84
     */
85
	function pdf_parser($filename) {
86
        $this->filename = $filename;
87
	  // mPDF 4.0
88
	  $this->success = true;
89
 
90
        $this->f = @fopen($this->filename, "rb");
91
 
92
        if (!$this->f) {
93
            $this->success = false;
94
            $this->errormsg = sprintf("Cannot open %s !", $filename);
95
		return false;
96
	  }
97
	// mPDF 5.0 Removed pass by reference =&
98
        $this->c = new pdf_context($this->f);
99
        // Read xref-Data
100
	  $offset = $this->pdf_find_xref();
101
        if ($offset===false) {
102
            $this->success = false;
103
            $this->errormsg = sprintf("Cannot open %s !", $filename);
104
		return false;
105
	  }
106
        $this->pdf_read_xref($this->xref, $offset);
107
        if ($this->success == false) { return false; }
108
 
109
        // Check for Encryption
110
        $this->getEncryption();
111
        if ($this->success == false) { return false; }
112
 
113
        // Read root
114
        $this->pdf_read_root();
115
        if ($this->success == false) { return false; }
116
    }
117
 
118
    /**
119
     * Close the opened file
120
     */
121
    function closeFile() {
122
    	if (isset($this->f)) {
123
    	    fclose($this->f);
124
    		unset($this->f);
125
    	}
126
    }
127
 
128
      /**
129
     * Print Error and die
130
     *
131
     * @param string $msg  Error-Message
132
     */
133
    function error($msg) {
134
    	die("<b>PDF-Parser Error:</b> ".$msg);
135
    }
136
 
137
    /**
138
     * Check Trailer for Encryption
139
     */
140
    function getEncryption() {
141
        if (isset($this->xref['trailer'][1]['/Encrypt'])) {
142
	 	// mPDF 4.0
143
           	$this->success = false;
144
            $this->errormsg = sprintf("File is encrypted!");
145
		return false;
146
        }
147
    }
148
 
149
	/**
150
     * Find/Return /Root
151
     *
152
     * @return array
153
     */
154
    function pdf_find_root() {
155
        if ($this->xref['trailer'][1]['/Root'][0] != PDF_TYPE_OBJREF) {
156
	 	// mPDF 4.0
157
           	$this->success = false;
158
            $this->errormsg = sprintf("Wrong Type of Root-Element! Must be an indirect reference");
159
		return false;
160
        }
161
        return $this->xref['trailer'][1]['/Root'];
162
    }
163
 
164
    /**
165
     * Read the /Root
166
     */
167
    function pdf_read_root() {
168
        // read root
169
	  $root = $this->pdf_find_root();
170
        if ($root ===false) {
171
            $this->success = false;
172
		return false;
173
	  }
174
        $this->root = $this->pdf_resolve_object($this->c, $root);
175
    }
176
 
177
    /**
178
     * Find the xref-Table
179
     */
180
    function pdf_find_xref() {
181
       	fseek ($this->f, -min(filesize($this->filename),1500), SEEK_END);
182
        $data = fread($this->f, 1500);
183
 
184
        $pos = strlen($data) - strpos(strrev($data), strrev('startxref'));
185
        $data = substr($data, $pos);
186
 
187
        if (!preg_match('/\s*(\d+).*$/s', $data, $matches)) {
188
	 	// mPDF 4.0
189
           	$this->success = false;
190
            $this->errormsg = sprintf("Unable to find pointer to xref table");
191
		return false;
192
    	}
193
 
194
    	return (int) $matches[1];
195
    }
196
 
197
    /**
198
     * Read xref-table
199
     *
200
     * @param array $result Array of xref-table
201
     * @param integer $offset of xref-table
202
     * @param integer $start start-position in xref-table
203
     * @param integer $end end-position in xref-table
204
     */
205
    function pdf_read_xref(&$result, $offset, $start = null, $end = null) {
206
        if (is_null ($start) || is_null ($end)) {
207
		fseek($this->f, $o_pos = $offset);
208
            $data = trim(fgets($this->f,1024));
209
 
210
            if (strlen($data) == 0)
211
                $data = trim(fgets($this->f,1024));
212
 
213
            if ($data !== 'xref') {
214
            	fseek($this->f, $o_pos);
215
            	$data = trim(_fgets($this->f, true));
216
            	if ($data !== 'xref') {
217
            	    if (preg_match('/(.*xref)(.*)/m', $data, $m)) { // xref 0 128 - in one line
218
                        fseek($this->f, $o_pos+strlen($m[1]));
219
            	    } elseif (preg_match('/(x|r|e|f)+/', $data, $m)) { // correct invalid xref-pointer
220
            	        $tmpOffset = $offset-4+strlen($m[0]);
221
            	        $this->pdf_read_xref($result, $tmpOffset, $start, $end);
222
            	        return;
223
                    } else {
224
	 			// mPDF 4.0
225
           			$this->success = false;
226
            		$this->errormsg = sprintf("Unable to find xref table - Maybe a Problem with 'auto_detect_line_endings'");
227
				return;
228
            	    }
229
            	}
230
    		}
231
 
232
    		$o_pos = ftell($this->f);
233
    	    $data = explode(' ', trim(fgets($this->f,1024)));
234
			if (count($data) != 2) {
235
    	        fseek($this->f, $o_pos);
236
    	        $data = explode(' ', trim(_fgets($this->f, true)));
237
 
238
            	if (count($data) != 2) {
239
            	    if (count($data) > 2) { // no lineending
240
            	        $n_pos = $o_pos+strlen($data[0])+strlen($data[1])+2;
241
            	        fseek($this->f, $n_pos);
242
            	    } else {
243
	 			// mPDF 4.0
244
           			$this->success = false;
245
            		$this->errormsg = sprintf("Unexpected header in xref table");
246
				return;
247
            	    }
248
            	}
249
            }
250
            $start = $data[0];
251
            $end = $start + $data[1];
252
        }
253
 
254
        if (!isset($result['xref_location'])) {
255
            $result['xref_location'] = $offset;
256
    	}
257
 
258
    	if (!isset($result['max_object']) || $end > $result['max_object']) {
259
    	    $result['max_object'] = $end;
260
    	}
261
 
262
    	for (; $start < $end; $start++) {
263
    		$data = ltrim(fread($this->f, 20)); // Spezifications says: 20 bytes including newlines
264
    		$offset = substr($data, 0, 10);
265
    		$generation = substr($data, 11, 5);
266
 
267
    	    if (!isset ($result['xref'][$start][(int) $generation])) {
268
    	    	$result['xref'][$start][(int) $generation] = (int) $offset;
269
    	    }
270
    	}
271
 
272
    	$o_pos = ftell($this->f);
273
        $data = fgets($this->f,1024);
274
		if (strlen(trim($data)) == 0)
275
		    $data = fgets($this->f, 1024);
276
 
277
        if (preg_match("/trailer/",$data)) {
278
            if (preg_match("/(.*trailer[ \n\r]*)/",$data,$m)) {
279
            	fseek($this->f, $o_pos+strlen($m[1]));
280
    		}
281
 
282
			// mPDF 5.0 Removed pass by reference =&
283
			$c = new pdf_context($this->f);
284
    	    $trailer = $this->pdf_read_value($c);
285
 
286
    	    if (isset($trailer[1]['/Prev'])) {
287
    	    	$this->pdf_read_xref($result, $trailer[1]['/Prev'][1]);
288
    		    $result['trailer'][1] = array_merge($result['trailer'][1], $trailer[1]);
289
    	    } else {
290
    	        $result['trailer'] = $trailer;
291
            }
292
    	} else {
293
    	    $data = explode(' ', trim($data));
294
 
295
    		if (count($data) != 2) {
296
            	fseek($this->f, $o_pos);
297
        		$data = explode(' ', trim (_fgets ($this->f, true)));
298
 
299
        		if (count($data) != 2) {
300
	 			// mPDF 4.0
301
           			$this->success = false;
302
            		$this->errormsg = sprintf("Unexpected data in xref table");
303
				return;
304
        		}
305
		    }
306
 
307
		    $this->pdf_read_xref($result, null, (int) $data[0], (int) $data[0] + (int) $data[1]);
308
    	}
309
    }
310
 
311
 
312
    /**
313
     * Reads an Value
314
     *
315
     * @param object $c pdf_context
316
     * @param string $token a Token
317
     * @return mixed
318
     */
319
    function pdf_read_value(&$c, $token = null) {
320
    	if (is_null($token)) {
321
    	    $token = $this->pdf_read_token($c);
322
    	}
323
 
324
        if ($token === false) {
325
    	    return false;
326
    	}
327
 
328
       	switch ($token) {
329
            case	'<':
330
    			// This is a hex string.
331
    			// Read the value, then the terminator
332
 
333
                $pos = $c->offset;
334
 
335
    			while(1) {
336
 
337
                    $match = strpos ($c->buffer, '>', $pos);
338
 
339
    				// If you can't find it, try
340
    				// reading more data from the stream
341
 
342
    				if ($match === false) {
343
    					if (!$c->increase_length()) {
344
    						return false;
345
    					} else {
346
                        	continue;
347
                    	}
348
    				}
349
 
350
    				$result = substr ($c->buffer, $c->offset, $match - $c->offset);
351
    				$c->offset = $match+1;
352
 
353
    				return array (PDF_TYPE_HEX, $result);
354
                }
355
 
356
                break;
357
    		case	'<<':
358
    			// This is a dictionary.
359
 
360
    			$result = array();
361
 
362
    			// Recurse into this function until we reach
363
    			// the end of the dictionary.
364
    			while (($key = $this->pdf_read_token($c)) !== '>>') {
365
    				if ($key === false) {
366
    					return false;
367
    				}
368
 
369
    				if (($value =   $this->pdf_read_value($c)) === false) {
370
    					return false;
371
    				}
372
                    $result[$key] = $value;
373
    			}
374
 
375
    			return array (PDF_TYPE_DICTIONARY, $result);
376
 
377
    		case	'[':
378
    			// This is an array.
379
 
380
    			$result = array();
381
 
382
    			// Recurse into this function until we reach
383
    			// the end of the array.
384
    			while (($token = $this->pdf_read_token($c)) !== ']') {
385
                    if ($token === false) {
386
    					return false;
387
    				}
388
 
389
    				if (($value = $this->pdf_read_value($c, $token)) === false) {
390
                        return false;
391
    				}
392
 
393
    				$result[] = $value;
394
    			}
395
 
396
                return array (PDF_TYPE_ARRAY, $result);
397
 
398
    		case	'('		:
399
                // This is a string
400
 
401
    			$pos = $c->offset;
402
 
403
    			while(1) {
404
 
405
                    // Start by finding the next closed
406
    				// parenthesis
407
 
408
    				$match = strpos ($c->buffer, ')', $pos);
409
 
410
    				// If you can't find it, try
411
    				// reading more data from the stream
412
 
413
    				if ($match === false) {
414
    					if (!$c->increase_length()) {
415
                            return false;
416
    					} else {
417
                            continue;
418
                        }
419
    				}
420
 
421
    				// Make sure that there is no backslash
422
    				// before the parenthesis. If there is,
423
    				// move on. Otherwise, return the string.
424
                    $esc = preg_match('/([\\\\]+)$/', $tmpresult = substr($c->buffer, $c->offset, $match - $c->offset), $m);
425
 
426
                    if ($esc === 0 || strlen($m[1]) % 2 == 0) {
427
    				    $result = $tmpresult;
428
                        $c->offset = $match + 1;
429
                        return array (PDF_TYPE_STRING, $result);
430
    				} else {
431
    					$pos = $match + 1;
432
 
433
    					if ($pos > $c->offset + $c->length) {
434
    						$c->increase_length();
435
    					}
436
    				}
437
                }
438
 
439
            case "stream":
440
            	$o_pos = ftell($c->file)-strlen($c->buffer);
441
		        $o_offset = $c->offset;
442
 
443
		        $c->reset($startpos = $o_pos + $o_offset);
444
 
445
		        $e = 0; // ensure line breaks in front of the stream
446
		        if ($c->buffer[0] == chr(10) || $c->buffer[0] == chr(13))
447
		        	$e++;
448
		        if ($c->buffer[1] == chr(10) && $c->buffer[0] != chr(10))
449
		        	$e++;
450
 
451
		        if ($this->actual_obj[1][1]['/Length'][0] == PDF_TYPE_OBJREF) {
452
				// mPDF 5.0 Removed pass by reference =&
453
		        	$tmp_c = new pdf_context($this->f);
454
		        	$tmp_length = $this->pdf_resolve_object($tmp_c,$this->actual_obj[1][1]['/Length']);
455
		        	$length = $tmp_length[1][1];
456
		        } else {
457
		        	$length = $this->actual_obj[1][1]['/Length'][1];
458
		        }
459
 
460
		        if ($length > 0) {
461
    		        $c->reset($startpos+$e,$length);
462
    		        $v = $c->buffer;
463
		        } else {
464
		            $v = '';
465
		        }
466
		        $c->reset($startpos+$e+$length+9); // 9 = strlen("endstream")
467
 
468
		        return array(PDF_TYPE_STREAM, $v);
469
 
470
    		default	:
471
            	if (is_numeric ($token)) {
472
                    // A numeric token. Make sure that
473
    				// it is not part of something else.
474
    				if (($tok2 = $this->pdf_read_token ($c)) !== false) {
475
                        if (is_numeric ($tok2)) {
476
 
477
    						// Two numeric tokens in a row.
478
    						// In this case, we're probably in
479
    						// front of either an object reference
480
    						// or an object specification.
481
    						// Determine the case and return the data
482
    						if (($tok3 = $this->pdf_read_token ($c)) !== false) {
483
                                switch ($tok3) {
484
    								case	'obj'	:
485
                                        return array (PDF_TYPE_OBJDEC, (int) $token, (int) $tok2);
486
    								case	'R'		:
487
    									return array (PDF_TYPE_OBJREF, (int) $token, (int) $tok2);
488
    							}
489
    							// If we get to this point, that numeric value up
490
    							// there was just a numeric value. Push the extra
491
    							// tokens back into the stack and return the value.
492
    							array_push ($c->stack, $tok3);
493
    						}
494
    					}
495
 
496
    					array_push ($c->stack, $tok2);
497
    				}
498
 
499
    				return array (PDF_TYPE_NUMERIC, $token);
500
    			} else {
501
 
502
                    // Just a token. Return it.
503
    				return array (PDF_TYPE_TOKEN, $token);
504
    			}
505
 
506
         }
507
    }
508
 
509
    /**
510
     * Resolve an object
511
     *
512
     * @param object $c pdf_context
513
     * @param array $obj_spec The object-data
514
     * @param boolean $encapsulate Must set to true, cause the parsing and fpdi use this method only without this para
515
     */
516
    function pdf_resolve_object(&$c, $obj_spec, $encapsulate = true) {
517
        // Exit if we get invalid data
518
    	if (!is_array($obj_spec)) {
519
            return false;
520
    	}
521
 
522
    	if ($obj_spec[0] == PDF_TYPE_OBJREF) {
523
 
524
    		// This is a reference, resolve it
525
    		if (isset($this->xref['xref'][$obj_spec[1]][$obj_spec[2]])) {
526
 
527
    			// Save current file position
528
    			// This is needed if you want to resolve
529
    			// references while you're reading another object
530
    			// (e.g.: if you need to determine the length
531
    			// of a stream)
532
 
533
    			$old_pos = ftell($c->file);
534
 
535
    			// Reposition the file pointer and
536
    			// load the object header.
537
 
538
    			$c->reset($this->xref['xref'][$obj_spec[1]][$obj_spec[2]]);
539
 
540
    			$header = $this->pdf_read_value($c,null,true);
541
 
542
    			if ($header[0] != PDF_TYPE_OBJDEC || $header[1] != $obj_spec[1] || $header[2] != $obj_spec[2]) {
543
	 			// mPDF 4.0
544
           			$this->success = false;
545
            		$this->errormsg = sprintf("Unable to find object ({$obj_spec[1]}, {$obj_spec[2]}) at expected location");
546
				return false;
547
    			}
548
 
549
    			// If we're being asked to store all the information
550
    			// about the object, we add the object ID and generation
551
    			// number for later use
552
				$this->actual_obj =& $result;
553
    			if ($encapsulate) {
554
    				$result = array (
555
    					PDF_TYPE_OBJECT,
556
    					'obj' => $obj_spec[1],
557
    					'gen' => $obj_spec[2]
558
    				);
559
    			} else {
560
    				$result = array();
561
    			}
562
 
563
    			// Now simply read the object data until
564
    			// we encounter an end-of-object marker
565
    			while(1) {
566
                    $value = $this->pdf_read_value($c);
567
					if ($value === false || count($result) > 4) {
568
						// in this case the parser coudn't find an endobj so we break here
569
						break;
570
    				}
571
 
572
    				if ($value[0] == PDF_TYPE_TOKEN && $value[1] === 'endobj') {
573
    					break;
574
    				}
575
 
576
                    $result[] = $value;
577
    			}
578
 
579
    			$c->reset($old_pos);
580
 
581
                if (isset($result[2][0]) && $result[2][0] == PDF_TYPE_STREAM) {
582
                    $result[0] = PDF_TYPE_STREAM;
583
                }
584
 
585
    			return $result;
586
    		}
587
    	} else {
588
    		return $obj_spec;
589
    	}
590
    }
591
 
592
 
593
 
594
    /**
595
     * Reads a token from the file
596
     *
597
     * @param object $c pdf_context
598
     * @return mixed
599
     */
600
    function pdf_read_token(&$c)
601
    {
602
    	// If there is a token available
603
    	// on the stack, pop it out and
604
    	// return it.
605
 
606
    	if (count($c->stack)) {
607
    		return array_pop($c->stack);
608
    	}
609
 
610
    	// Strip away any whitespace
611
 
612
    	do {
613
    		if (!$c->ensure_content()) {
614
    			return false;
615
    		}
616
    		$c->offset += _strspn($c->buffer, " \n\r\t", $c->offset);
617
    	} while ($c->offset >= $c->length - 1);
618
 
619
    	// Get the first character in the stream
620
 
621
    	$char = $c->buffer[$c->offset++];
622
 
623
    	switch ($char) {
624
 
625
    		case '['	:
626
    		case ']'	:
627
    		case '('	:
628
    		case ')'	:
629
 
630
    			// This is either an array or literal string
631
    			// delimiter, Return it
632
 
633
    			return $char;
634
 
635
    		case '<'	:
636
    		case '>'	:
637
 
638
    			// This could either be a hex string or
639
    			// dictionary delimiter. Determine the
640
    			// appropriate case and return the token
641
 
642
    			if ($c->buffer[$c->offset] == $char) {
643
    				if (!$c->ensure_content()) {
644
    				    return false;
645
    				}
646
    				$c->offset++;
647
    				return $char . $char;
648
    			} else {
649
    				return $char;
650
    			}
651
 
652
    		default		:
653
 
654
    			// This is "another" type of token (probably
655
    			// a dictionary entry or a numeric value)
656
    			// Find the end and return it.
657
 
658
    			if (!$c->ensure_content()) {
659
    				return false;
660
    			}
661
 
662
    			while(1) {
663
 
664
    				// Determine the length of the token
665
 
666
    				$pos = _strcspn($c->buffer, " []<>()\r\n\t/", $c->offset);
667
    				if ($c->offset + $pos <= $c->length - 1) {
668
    					break;
669
    				} else {
670
    					// If the script reaches this point,
671
    					// the token may span beyond the end
672
    					// of the current buffer. Therefore,
673
    					// we increase the size of the buffer
674
    					// and try again--just to be safe.
675
 
676
    					$c->increase_length();
677
    				}
678
    			}
679
 
680
    			$result = substr($c->buffer, $c->offset - 1, $pos + 1);
681
 
682
    			$c->offset += $pos;
683
    			return $result;
684
    	}
685
    }
686
 
687
 
688
}
689
 
690
?>