PDF - page rotation / remove page / joint page
複数のPDFファイルを繋げる、指定したページを回転や削除を行う。結果をPDFファイルとしてダウンロードする。機能を限定し最小構成で実現することを試みる。
Sample Site
さくらインターネット・レンタルサーバスタンダードプランへ実装した作例。
(動作テスト用のためpdfファイルサイズを約1Mbyteに制限)
https://yanmos.jpn.org/editpdf/editpdf
Local環境ではuvicornで実行し確認、レンタルサーバではcgi経由でa2wsgiを利用した。
Structure
flowchart LR
subgraph fastAPI
cgi[main.py]
end
subgraph uikit
html[main.html]
end
infile((.pdf file))
outfile((.pdf file))
html-->cgi-->outfile
infile-->html
Library, Framework
使用したライブラリ、フレームワークは下記の通り。
UIkit・・・Page appearance
fastAPI・・・python web framework
Ghostscript・・・PostScript interpreter
pypdf・・・PDF python library
Directory Tree
デプロイ時のディレクトリ構成。
editpdf/
├── static/
│ ├── css/
│ │ └── uikit.min.css
│ └── js/
│ ├── uikit-icons.min.js
│ └── uikit.min.js
├── templates/
│ └── main.html
├── work/
└── main.py
Source code
ソースコードを下記に示す。実際の運用版とは細部が異なる。
main.html
<!DOCTYPE html>
<html lang="ja">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<link href="{{ url_for('static', path='/css/uikit.min.css') }}" rel="stylesheet">
<script type=text/javascript src="{{ url_for('static', path='/js/uikit.min.js') }}"></script>
<script type=text/javascript src="{{ url_for('static', path='/js/uikit-icons.min.js') }}"></script>
<script type=text/javascript src="{{ url_for('static', path='/js/util.js') }}"></script>
<link rel="shortcut icon" href="#">
<title>Edit PDF Pages - .pdf</title>
<style>
.uk-navbar {
background-color: darkred;
}
.uk-navbar * {
color: white;
}
</style>
</head>
<body class="text-center">
<header>
<div class="uk-container uk-width-expand">
<nav class="uk-container uk-navbar">
<div class="uk-navbar-left">
<a class="uk-navbar-item uk-logo" href="#">PDF</a>
<ul class="uk-navbar-nav uk-visible@s"></ul>
</div>
</nav>
</div>
</header>
<main>
<div class="uk-container">
<form id="work" class="uk-form">
<p class="uk-margin-top">Drag and drop the .pdf file to the area below or click the icon to select it</p>
<div class="js-upload uk-placeholder uk-text-center uk-background-muted uk-padding-small">
<div class="uk-text-right">
<div uk-form-custom>
<input id="inputpdf" type="file" />
<span class="upload-icon"><span uk-icon='icon: cloud-upload; ratio: 1.2'></span></span>
</div>
</div>
</div>
<progress id="js-progressbar" class="uk-progress" value="0" max="100" hidden></progress>
<div id="downloadlink" class="uk-margin-top"></div>
</form>
<form id="exec" class="uk-form uk-margin-small-bottom" action="/editpdf/editpdf/output" method="POST"
hidden="hidden">
<div class="uk-margin-top">
<p id="outputtext" class="uk-margin-top uk-text-small"></p>
<table class="uk-table">
<tbody id="thums">
</tbody>
</table>
<button id="execbutton" class="uk-button uk-button-default" type="submit">DOWNLOAD</button>
</div>
</form>
</div>
</main>
<footer>
<div class="uk-container uk-text-right">
yam.ktm@gmail.com
</div>
</footer>
<script>
// Clear cookie
window.addEventListener("load", function (e) {
document.cookie = "editpdftmpdir=; path=/; max-age=0"
});
// Disable drag & drop
window.addEventListener('dragover', function (e) {
e.preventDefault();
}, false);
window.addEventListener('drop', function (e) {
e.preventDefault();
e.stopPropagation();
}, false);
function display_thum(c) {
document.getElementById("exec").removeAttribute('hidden');
var tbody = document.getElementById("thums");
var files = JSON.parse(c)["files"]
var thums = files["thums"];
var thumsp = files["thumsp"];
var nfile = files["nfile"];
var thum = thums[nfile - 1]
for (i = 0; i < thum.length; i++) {
const bname = thum[i].substring(0, thum[i].lastIndexOf("."));
var tr = document.createElement("tr");
var tdi = document.createElement("td");
var tdf = document.createElement("td");
tds = `<div class="uk-margin uk-grid-small uk-child-width-auto uk-grid">`;
imghtml = `<img class="uk-box-shadow-small" src="${thumsp[nfile - 1][i]}">`;
radiohtml = `
<label><input id="${bname}_nc" class="uk-radio" type="radio" name="${bname}_r" value="nc" checked> N.C.</label>
<label><input id="${bname}_cw" class="uk-radio" type="radio" name="${bname}_r" value="cw"> CW 90 deg.</label>
<label><input id="${bname}_ccw" class="uk-radio" type="radio" name="${bname}_r" value="ccw"> CCW 90 deg.</label>
<label><input id="${bname}_180" class="uk-radio" type="radio" name="${bname}_r" value="180"> Turn 180 deg.</label>
<label><input id="${bname}_del" class="uk-radio" type="radio" name="${bname}_r" value="del"> Remove</label>
`;
tde = `</div>`;
tdi.innerHTML = tds + imghtml + tde;
tdf.innerHTML = tds + radiohtml + tde;
tr.appendChild(tdi);
tr.appendChild(tdf);
tbody.appendChild(tr);
}
}
var bar = document.getElementById('js-progressbar');
UIkit.upload('.js-upload', {
url: '/editpdf/editpdf/upload',
method: 'post',
multiple: false,
allow: '*.pdf',
mime: 'application/pdf',
name: 'file',
error: function (e) {
console.log('Error', arguments, e);
},
beforeSend: function (e) {
return true; // true:continue upload
},
loadStart: function (e) {
bar.removeAttribute('hidden');
bar.max = e.total;
bar.value = e.loaded;
},
progress: function (e) {
bar.max = e.total;
bar.value = e.loaded;
},
loadEnd: function (e) {
bar.max = e.total;
bar.value = e.loaded;
display_thum(e.target.response);
},
completeAll: function (e) {
setTimeout(function () {
bar.setAttribute('hidden', 'hidden');
}, 1000);
}
});
</script>
</body>
</html>
main.py
import os
import socket
import platform
import pypdf
import subprocess
import glob
import json
import tempfile
from fastapi import FastAPI, Request, Response
from fastapi.responses import HTMLResponse, FileResponse, JSONResponse
from fastapi.staticfiles import StaticFiles
from fastapi.templating import Jinja2Templates
WORK_DIR = 'work'
INFO_FILE = 'info.json'
OUT_FILE = 'output.pdf'
COOKIE_KEY = "editpdftmpdir"
BASE_DIR = os.path.dirname(__file__)
PLATFORM = platform.system()
if PLATFORM == 'Windows': # windows
GS_CMD = 'C:\\Program Files\\gs\\gs9.52\\bin\\gswin64c.exe'
DIR_SEP = '\\'
elif PLATFORM == 'Darwin': # my mac
GS_CMD = 'gs'
DIR_SEP = '/'
elif PLATFORM == 'FreeBSD': # freebsd
GS_CMD = 'gs'
DIR_SEP = '/'
elif PLATFORM == 'Linux': # Linux
GS_CMD = 'gs'
DIR_SEP = '/'
else:
raise('Error: Unknown platform.')
app = FastAPI()
app.mount("/static", StaticFiles(directory="static"), name="static")
templates = Jinja2Templates(directory="templates")
@app.get("/editpdf", response_class=HTMLResponse)
async def start_editpdf(request: Request):
os.chdir(BASE_DIR)
if COOKIE_KEY in request.cookies:
request.cookies[COOKIE_KEY] = None
return templates.TemplateResponse("main.html", {"request": request})
@app.post("/editpdf/upload", response_class=JSONResponse)
async def upload_file_pdf(request: Request, response: Response):
if COOKIE_KEY in request.cookies:
tmpdir = request.cookies[COOKIE_KEY]
else:
tmpdir = None
form = await request.form()
uploadf = form['file']
infor = dict()
if tmpdir == None:
workdirpath = tempfile.mkdtemp(dir=os.path.join(BASE_DIR, WORK_DIR))
tmpdir = workdirpath.split(DIR_SEP)[-1]
infopath = os.path.join(workdirpath, INFO_FILE)
infor['nfile'] = 1
infor['infiles'] = list()
infor['thums'] = list()
infor['thumsp'] = list()
response.set_cookie(key=COOKIE_KEY, value=tmpdir)
else:
workdirpath = os.path.join(BASE_DIR, WORK_DIR, tmpdir)
infopath = os.path.join(workdirpath, INFO_FILE)
with open(infopath, "r") as finfo:
infor = json.load(finfo)
infor['nfile'] += 1
os.chdir(workdirpath)
infile = str(infor['nfile']) + '.pdf'
infor['infiles'].append(infile)
with open(os.path.join(workdirpath, infile), 'wb') as fp:
fp.write(uploadf.file.read())
thums = get_thums(workdirpath, infile, infor['nfile'])
infor['thums'].append(thums)
thumsp = [f"/editpdf/editpdf/img/{tmpdir}/" + th for th in thums]
infor['thumsp'].append(thumsp)
with open(infopath, "w") as finfo:
json.dump(infor, finfo)
return {"files": infor}
@app.get("/editpdf/img/{dname}/{fname}", response_class=FileResponse)
async def download_img_file(dname, fname, request: Request):
os.chdir(BASE_DIR)
fpath = os.path.join(BASE_DIR, WORK_DIR, dname, fname)
return FileResponse(fpath)
@app.post("/editpdf/output", response_class=FileResponse)
async def output_page(request: Request):
tmpdir = request.cookies[COOKIE_KEY]
form = await request.form()
workdirpath = os.path.join(BASE_DIR, WORK_DIR, tmpdir)
os.chdir(workdirpath)
infopath = os.path.join(workdirpath, INFO_FILE)
with open(infopath, "r") as finfo:
infor = json.load(finfo)
pdfw = pypdf.PdfWriter()
for j in range(infor['nfile']):
with open(infor['infiles'][j], "rb") as inpdf:
pdfr = pypdf.PdfReader(inpdf)
for i in range(pdfr.get_num_pages()):
page = pdfr.get_page(i)
rname = os.path.splitext(infor['thums'][j][i])[0] + "_r"
if form[rname] != 'del':
if form[rname] != 'nc':
if form[rname] == 'cw':
angle = 90
elif form[rname] == 'ccw':
angle = 270
elif form[rname] == '180':
angle = 180
else:
angle = 0
if angle != 0:
page.rotate(angle)
pdfw.add_page(page)
outfile = os.path.join(workdirpath, OUT_FILE)
with open(outfile, "wb") as outpdf:
pdfw.write(outpdf)
return FileResponse(outfile)
def get_thums(workdirpath, infile, nfile):
os.chdir(workdirpath)
outopt = '-sOutputFile={}_%03d.jpg'.format(nfile)
cmd = [
GS_CMD,
'-q', '-dSAFER', '-dBATCH', '-dNOPAUSE', '-sDEVICE=jpeg', '-r16',
outopt,
os.path.join(workdirpath, infile)
]
subprocess.run(cmd)
fs = sorted(glob.glob('{}_*.jpg'.format(nfile)))
return fs
if __name__ == "__main__":
pass
2024/06/11 サムネールをpng256からjpegへ変更
2024/06/08 複数PDFファイルの結合機能を追加
2024/05/06 Windows環境での機能不全を修正
2024/05/04 初版