PDF - page rotation / remove page / joint page
複数のPDFファイルを繋げる、指定したページを回転や削除を行う。結果をPDFファイルとしてダウンロードする。
Sample Site
さくらインターネット・レンタルサーバスタンダードプランへ実装した作例。
(動作テスト用のためpdfファイルサイズを約1Mbyteに制限)
https://yanmos.jpn.org/editpdf/editpdf
Local環境ではuvicornで実行し確認、レンタルサーバではcgi経由でa2wsgiを利用した。
Structure
flowchart LR
subgraph fastAPI
cgi[main.py]
end
subgraph uikit
html[main.html]
end
infile((.pdf file))
outfile((.pdf file))
html-->cgi-->outfile
infile-->html
Library, Framework
使用したライブラリ、フレームワークは下記の通り。
UIkit・・・Page appearance
fastAPI・・・python web framework
Ghostscript・・・PostScript interpreter
pypdf・・・PDF python library
Directory Tree
デプロイ時のディレクトリ構成。
editpdf/
├── static/
│ ├── css/
│ │ └── uikit.min.css
│ └── js/
│ ├── uikit-icons.min.js
│ └── uikit.min.js
├── templates/
│ └── main.html
├── work/
└── main.py
Source code
ソースコードを下記に示す。実際の運用版とは細部が異なる。
main.html
<!DOCTYPE html>
<html lang="ja">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<link href="{{ url_for('static', path='/css/uikit.min.css') }}" rel="stylesheet">
<script type=text/javascript src="{{ url_for('static', path='/js/uikit.min.js') }}"></script>
<script type=text/javascript src="{{ url_for('static', path='/js/uikit-icons.min.js') }}"></script>
<link rel="shortcut icon" href="#">
<title>Edit PDF Pages - .pdf</title>
<style>
.uk-navbar {
background-color: darkred;
}
.uk-navbar * {
color: white;
}
</style>
</head>
<body class="text-center">
<header>
<div class="uk-container uk-width-expand">
<nav class="uk-container uk-navbar">
<div class="uk-navbar-left">
<a class="uk-navbar-item uk-logo" href="#">PDF</a>
<ul class="uk-navbar-nav uk-visible@s"></ul>
</div>
</nav>
</div>
</header>
<main>
<div class="uk-container">
<ul uk-tab>
<li><a href="#">FEW PAGES</a></li>
<li><a href="#">MANY PAGES</a></li>
</ul>
<div class="uk-switcher uk-margin">
<div>
<form id="work" class="uk-form">
<p class="uk-margin-top">Drag and drop the .pdf file to the area below or click the icon to select it.</p>
<div class="js-upload uk-placeholder uk-text-center uk-background-muted uk-padding-small">
<div class="uk-text-right">
<div uk-form-custom>
<input id="inputpdf" type="file" />
<span class="upload-icon"><span uk-icon='icon: cloud-upload; ratio: 1.2'></span></span>
</div>
</div>
</div>
<progress id="js-progressbar" class="uk-progress" value="0" max="100" hidden></progress>
<div id="downloadlink" class="uk-margin-top"></div>
</form>
<form id="exec" class="uk-form uk-margin-small-bottom" action="/editpdf/editpdf/output" method="POST" hidden="hidden">
<div class="uk-margin-top">
<p id="outputtext" class="uk-margin-top uk-text-small"></p>
<table class="uk-table">
<tbody id="thums">
</tbody>
</table>
<button id="execbutton" class="uk-button uk-button-default" type="submit">DOWNLOAD</button>
</div>
</form>
</div>
<div>
<form id="work2" class="uk-form">
<p class="uk-margin-top">Drag and drop the .pdf file to the area below or click the icon to select it</p>
<div class="js-upload2 uk-placeholder uk-text-center uk-background-muted uk-padding-small">
<div class="uk-text-right">
<div uk-form-custom>
<input id="inputpdf2" type="file" />
<span class="upload-icon"><span uk-icon='icon: cloud-upload; ratio: 1.2'></span></span>
</div>
</div>
</div>
<progress id="js-progressbar2" class="uk-progress" value="0" max="100" hidden></progress>
<div id="downloadlink2" class="uk-margin-top"></div>
</form>
<form id="exec2" class="uk-form-horizontal uk-margin-small-bottom" action="/editpdf/editpdf/output2" method="POST" hidden="hidden">
<p>ページ指定はカンマ区切りかハイフンでページ範囲を指定。例: 1,2,5,10-20,25</p>
<div class="uk-margin" id="editform">
</div>
<div class="uk-margin-top uk-text-center">
<button id="execbutton2" class="uk-button uk-button-default" type="submit">DOWNLOAD</button>
</div>
</form>
</div>
</div>
</div>
</main>
<footer>
<div class="uk-container uk-text-right">
yam.ktm@gmail.com
</div>
</footer>
<script>
// Clear cookie
window.addEventListener("load", function (e) {
document.cookie = "editpdftmpdir=; path=/; max-age=0"
});
// Disable drag & drop
window.addEventListener('dragover', function (e) {
e.preventDefault();
}, false);
window.addEventListener('drop', function (e) {
e.preventDefault();
e.stopPropagation();
}, false);
// Display thumsnail
function display_thum(c) {
document.getElementById("exec").removeAttribute('hidden');
var tbody = document.getElementById("thums");
var files = JSON.parse(c)["files"]
var thums = files["thums"];
var thumsp = files["thumsp"];
var nfile = files["nfile"];
var thum = thums[nfile - 1]
for (i = 0; i < thum.length; i++) {
const bname = thum[i].substring(0, thum[i].lastIndexOf("."));
var tr = document.createElement("tr");
var tdi = document.createElement("td");
var tdf = document.createElement("td");
tds = `<div class="uk-margin uk-grid-small uk-child-width-auto uk-grid">`;
imghtml = `<img class="uk-box-shadow-small" src="${thumsp[nfile - 1][i]}">`;
radiohtml = `
<label><input id="${bname}_nc" class="uk-radio" type="radio" name="${bname}_r" value="nc" checked> N.C.</label>
<label><input id="${bname}_cw" class="uk-radio" type="radio" name="${bname}_r" value="cw"> CW 90 deg.</label>
<label><input id="${bname}_ccw" class="uk-radio" type="radio" name="${bname}_r" value="ccw"> CCW 90 deg.</label>
<label><input id="${bname}_180" class="uk-radio" type="radio" name="${bname}_r" value="180"> Turn 180 deg.</label>
<label><input id="${bname}_del" class="uk-radio" type="radio" name="${bname}_r" value="del"> Remove</label>
`;
tde = `</div>`;
tdi.innerHTML = tds + imghtml + tde;
tdf.innerHTML = tds + radiohtml + tde;
tr.appendChild(tdi);
tr.appendChild(tdf);
tbody.appendChild(tr);
}
}
var bar = document.getElementById('js-progressbar');
UIkit.upload('.js-upload', {
url: '/editpdf/editpdf/upload',
method: 'post',
multiple: false,
allow: '*.pdf',
mime: 'application/pdf',
name: 'file',
error: function (e) {
console.log('Error', arguments, e);
},
beforeSend: function (e) {
return true; // true:continue upload
},
loadStart: function (e) {
bar.removeAttribute('hidden');
bar.max = e.total;
bar.value = e.loaded;
},
progress: function (e) {
bar.max = e.total;
bar.value = e.loaded;
},
loadEnd: function (e) {
bar.max = e.total;
bar.value = e.loaded;
display_thum(e.target.response);
},
completeAll: function (e) {
setTimeout(function () {
bar.setAttribute('hidden', 'hidden');
}, 1000);
}
});
var bar = document.getElementById('js-progressbar2');
UIkit.upload('.js-upload2', {
url: '/editpdf/editpdf/upload2',
method: 'post',
multiple: false,
allow: '*.pdf',
mime: 'application/pdf',
name: 'file',
error: function (e) {
console.log('Error', arguments, e);
},
beforeSend: function (e) {
return true; // true:continue upload
},
loadStart: function (e) {
bar.removeAttribute('hidden');
bar.max = e.total;
bar.value = e.loaded;
},
progress: function (e) {
bar.max = e.total;
bar.value = e.loaded;
},
loadEnd: function (e) {
bar.max = e.total;
bar.value = e.loaded;
display_editform(e.target.response);
},
completeAll: function (e) {
setTimeout(function () {
bar.setAttribute('hidden', 'hidden');
}, 1000);
}
});
window.addEventListener("load", (e) => {
document.getElementById("execbutton").addEventListener("click", (event) => {
event.preventDefault();
document.getElementById("exec").submit();
});
});
window.addEventListener("load", (e) => {
document.getElementById("execbutton2").addEventListener("click", (event) => {
event.preventDefault();
document.getElementById("exec2").submit();
});
});
function display_editform(c) {
document.getElementById("exec2").removeAttribute('hidden');
var divform = document.getElementById("editform");
var files = JSON.parse(c)["files"]
var filenames = files["filenames"];
var nfile = files["nfile"];
var i = nfile - 1;
const bname = filenames[i];
const istr = String(i).padStart(3, '0');
var div = document.createElement("div");
var tds = `<div class="uk-margin">`;
var formhtml = `
<h3 class="uk-header">${bname}</h3>
<div class="uk-margin-top">
<label class="uk-form-label" for="cw-pages">
CW 90 degree pages
</label>
<div class="uk-form-controls">
<input id="cw-pages-${istr}" class="uk-input" type="text" placeholder="Page numbers" aria-label="" name="cw-${istr}">
</div>
</div>
<div class="uk-margin-top">
<label class="uk-form-label" for="ccw-pages">
CCW 90 degree pages
</label>
<div class="uk-form-controls">
<input id="ccw-pages-${istr}" class="uk-input" type="text" placeholder="Page numbers" aria-label="" name="ccw-${istr}">
</div>
</div>
<div class="uk-margin-top">
<label class="uk-form-label" for="turn-pages">
Turn 180 degree pages
</label>
<div class="uk-form-controls">
<input id="turn-pages-${istr}" class="uk-input" type="text" placeholder="Page numbers" aria-label="" name="turn-${istr}">
</div>
</div>
<div class="uk-margin-top">
<label class="uk-form-label" for="ext-pages">
Extract pages
</label>
<div class="uk-form-controls">
<input id="ext-pages-${istr}" class="uk-input" type="text" placeholder="Page numbers" aria-label="" name="ext-${istr}">
</div>
</div>
<div class="uk-margin-top">
<label class="uk-form-label" for="del-pages">
Delete pages
</label>
<div class="uk-form-controls">
<input id="del-pages-${istr}" class="uk-input" type="text" placeholder="Page numbers" aria-label="" name="del-${istr}">
</div>
</div>
`;
tde = `</div>`;
div.innerHTML = tds + formhtml + tde;
divform.appendChild(div);
}
</script>
</body>
</html>
main.py
import os
import socket
import platform
import pypdf
import subprocess
import glob
import json
import tempfile
from fastapi import FastAPI, Request, Response
from fastapi.responses import HTMLResponse, FileResponse, JSONResponse
from fastapi.staticfiles import StaticFiles
from fastapi.templating import Jinja2Templates
PORT = 8000
WORK_DIR = 'work'
INFO_FILE = 'info.json'
OUT_FILE = 'output.pdf'
COOKIE_KEY = "editpdftmpdir"
# THUM_IMG = {'code': 'png256', 'ext': 'png'}
THUM_IMG = {'code': 'jpeg', 'ext': 'jpg'}
NULL_IMG = 'nullimg.png'
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
PLATFORM = platform.system()
if PLATFORM == 'Windows': # windows
GS_CMD = 'C:\\Program Files\\gs\\gs10.04.0\\bin\\gswin64c.exe'
DIR_SEP = '\\'
elif PLATFORM == 'Darwin': # mac
GS_CMD = 'gs'
DIR_SEP = '/'
elif PLATFORM == 'FreeBSD': # freebsd
GS_CMD = 'gs'
DIR_SEP = '/'
elif PLATFORM == 'Linux': # Linux
GS_CMD = 'gs'
DIR_SEP = '/'
else:
raise('Error: Unknown platform.')
app = FastAPI()
app.mount("/static", StaticFiles(directory="static"), name="static")
templates = Jinja2Templates(directory="templates")
@app.get("/editpdf", response_class=HTMLResponse)
async def start_editpdf(request: Request):
os.chdir(BASE_DIR)
if COOKIE_KEY in request.cookies:
request.cookies[COOKIE_KEY] = None
return templates.TemplateResponse("main.html", {"request": request})
@app.post("/editpdf/upload", response_class=JSONResponse)
async def upload_file_pdf(request: Request, response: Response):
if COOKIE_KEY in request.cookies:
tmpdir = request.cookies[COOKIE_KEY]
else:
tmpdir = None
infor = dict()
if tmpdir == None:
workdirpath = tempfile.mkdtemp(dir=os.path.join(BASE_DIR, WORK_DIR))
tmpdir = workdirpath.split(DIR_SEP)[-1]
infopath = os.path.join(workdirpath, INFO_FILE)
infor['mode'] = 'fewpages'
infor['nfile'] = 1
infor['infiles'] = list()
infor['thums'] = list()
infor['thumsp'] = list()
response.set_cookie(key=COOKIE_KEY, value=tmpdir)
else:
workdirpath = os.path.join(BASE_DIR, WORK_DIR, tmpdir)
infopath = os.path.join(workdirpath, INFO_FILE)
with open(infopath, "r") as finfo:
infor = json.load(finfo)
if infor['mode'] == 'fewpages':
infor['nfile'] += 1
else:
workdirpath = tempfile.mkdtemp(dir=os.path.join(BASE_DIR, WORK_DIR))
tmpdir = workdirpath.split(DIR_SEP)[-1]
infopath = os.path.join(workdirpath, INFO_FILE)
infor['mode'] = 'fewpages'
infor['nfile'] = 1
infor['infiles'] = list()
infor['thums'] = list()
infor['thumsp'] = list()
response.set_cookie(key=COOKIE_KEY, value=tmpdir)
os.chdir(workdirpath)
infile = str(infor['nfile']) + '.pdf'
infor['infiles'].append(infile)
with open(os.path.join(workdirpath, infile), 'wb') as fp:
fp.write(uploadf.file.read())
thums = get_thums(workdirpath, infile, infor['nfile'])
infor['thums'].append(thums)
thumsp = [f"/editpdf/editpdf/img/{tmpdir}/" + th for th in thums]
infor['thumsp'].append(thumsp)
with open(infopath, "w") as finfo:
json.dump(infor, finfo)
return {"files": infor}
@app.get("/editpdf/img/{dname}/{fname}", response_class=FileResponse)
async def download_img_file(dname, fname, request: Request):
os.chdir(BASE_DIR)
if fname.split('.')[-1] == THUM_IMG['ext']:
fpath = os.path.join(BASE_DIR, WORK_DIR, dname, fname)
else:
fpath = os.path.join(BASE_DIR, NULL_IMG)
return FileResponse(fpath)
@app.post("/editpdf/output", response_class=FileResponse)
async def output_page(request: Request):
tmpdir = request.cookies[COOKIE_KEY]
form = await request.form()
workdirpath = os.path.join(BASE_DIR, WORK_DIR, tmpdir)
os.chdir(workdirpath)
infopath = os.path.join(workdirpath, INFO_FILE)
with open(infopath, "r") as finfo:
infor = json.load(finfo)
pdfw = pypdf.PdfWriter()
for j in range(infor['nfile']):
with open(infor['infiles'][j], "rb") as inpdf:
pdfr = pypdf.PdfReader(inpdf)
for i in range(pdfr.get_num_pages()):
page = pdfr.get_page(i)
rname = os.path.splitext(infor['thums'][j][i])[0] + "_r"
if form[rname] != 'del':
if form[rname] != 'nc':
if form[rname] == 'cw':
angle = 90
elif form[rname] == 'ccw':
angle = 270
elif form[rname] == '180':
angle = 180
else:
angle = 0
if angle != 0:
page.rotate(angle)
pdfw.add_page(page)
outfile = os.path.join(workdirpath, OUT_FILE)
with open(outfile, "wb") as outpdf:
pdfw.write(outpdf)
return FileResponse(outfile)
def get_thums(workdirpath, infile, nfile):
os.chdir(workdirpath)
cmd = [
GS_CMD,
'-q', '-dSAFER', '-dBATCH', '-dNOPAUSE',
'-sDEVICE={}'.format(THUM_IMG['code']),
'-r16',
'-sOutputFile={}_%03d.{}'.format(nfile, THUM_IMG['ext']),
os.path.join(workdirpath, infile)
]
subprocess.run(cmd)
fs = sorted(glob.glob('{}_*.{}'.format(nfile, THUM_IMG['ext'])))
return fs
@app.post("/editpdf/upload2", response_class=JSONResponse)
async def upload_file_pdf2(request: Request, response: Response):
if COOKIE_KEY in request.cookies:
tmpdir = request.cookies[COOKIE_KEY]
else:
tmpdir = None
form = await request.form()
uploadf = form['file']
infor = dict()
if tmpdir == None:
workdirpath = tempfile.mkdtemp(dir=os.path.join(BASE_DIR, WORK_DIR))
tmpdir = workdirpath.split(DIR_SEP)[-1]
infopath = os.path.join(workdirpath, INFO_FILE)
infor['mode'] = 'manypages'
infor['nfile'] = 1
infor['infiles'] = list()
infor['filenames'] = list()
response.set_cookie(key=COOKIE_KEY, value=tmpdir)
else:
workdirpath = os.path.join(BASE_DIR, WORK_DIR, tmpdir)
infopath = os.path.join(workdirpath, INFO_FILE)
with open(infopath, "r") as finfo:
infor = json.load(finfo)
if infor['mode'] == 'manypages':
infor['nfile'] += 1
else:
workdirpath = tempfile.mkdtemp(dir=os.path.join(BASE_DIR, WORK_DIR))
tmpdir = workdirpath.split(DIR_SEP)[-1]
infopath = os.path.join(workdirpath, INFO_FILE)
infor['mode'] = 'manypages'
infor['nfile'] = 1
infor['infiles'] = list()
infor['filenames'] = list()
response.set_cookie(key=COOKIE_KEY, value=tmpdir)
os.chdir(workdirpath)
infile = str(infor['nfile']) + '.pdf'
infor['infiles'].append(infile)
infor['filenames'].append(uploadf.filename)
with open(os.path.join(workdirpath, infile), 'wb') as fp:
fp.write(uploadf.file.read())
with open(infopath, "w") as finfo:
json.dump(infor, finfo)
return {"files": infor}
@app.post("/editpdf/output2", response_class=FileResponse)
async def output_page(request: Request):
tmpdir = request.cookies[COOKIE_KEY]
form = await request.form()
workdirpath = os.path.join(BASE_DIR, WORK_DIR, tmpdir)
os.chdir(workdirpath)
infopath = os.path.join(workdirpath, INFO_FILE)
with open(infopath, "r") as finfo:
infor = json.load(finfo)
pdfw = pypdf.PdfWriter()
for j in range(infor['nfile']):
jstr = f"{j:03}"
cwpages = parse_ranges(form['cw-' + jstr])
ccwpages = parse_ranges(form['ccw-' + jstr])
turnpages = parse_ranges(form['turn-' + jstr])
extpages = parse_ranges(form['ext-' + jstr])
delpages = parse_ranges(form['del-' + jstr])
fpath = infor['infiles'][j]
with open(fpath, "rb") as inpdf:
pdfr = pypdf.PdfReader(inpdf)
for i in range(pdfr.get_num_pages()):
page = pdfr.get_page(i)
if i in cwpages:
angle = 90
elif i in ccwpages:
angle = 270
elif i in turnpages:
angle = 180
else:
angle = 0
if angle != 0:
page.rotate(angle)
if len(delpages) == 0:
if len(extpages) == 0 or i in extpages:
pdfw.add_page(page)
elif not i in delpages:
pdfw.add_page(page)
outfile = os.path.join(workdirpath, OUT_FILE)
with open(outfile, "wb") as outpdf:
pdfw.write(outpdf)
return FileResponse(outfile)
def parse_ranges(rstr):
result = []
if rstr != "":
rstr = rstr.replace(" ", "") # 空白を削除
rstrs = rstr.split(",") # カンマで分割
for r in rstrs:
if "-" in r:
start, end = map(int, r.split("-")) # ハイフンで分割して数値に変換
result.extend(range(start, end + 1)) # 範囲をリストに追加(endは含まれないので+1)
else:
result.append(int(r)) # 単一の数値の場合
result = [i-1 for i in result]
return result
if __name__ == "__main__":
pass
2025/01/12 ページ指定で編集するタブを追加
2024/06/11 サムネールをpng256からjpegへ変更
2024/06/08 複数PDFファイルの結合機能を追加
2024/05/06 Windows環境での機能不全を修正
2024/05/04 初版