Module src.file

Expand source code
import threading
import fitz
import pymongo
from flask_restful import Resource
import globals
import os
from src import auth
from pathlib import Path
from webargs.flaskparser import use_kwargs, parser, abort
from webargs import fields
from bson import json_util, ObjectId
import json
import shutil
from PIL import Image
import datetime
from zoneinfo import ZoneInfo
import pytz



filesDB = globals.filesDB
linesDB = globals.linesDB


def area(a, b):
    minimum = min(a["bbox"][3], b["bbox"][3])
    maximum = max(a["bbox"][1], b["bbox"][1])
    reverse_minimum = min(b["bbox"][3], a["bbox"][3])
    reverse_maximum = max(b["bbox"][1], a["bbox"][1])
    dx = minimum - maximum
    reverse_dx = reverse_minimum - reverse_maximum
    diff = a["bbox"][3] - a["bbox"][1]
    reverse_diff = b["bbox"][3] - b["bbox"][1]
    if dx >= 0 or reverse_dx >= 0:
        prob = (dx / diff) * 10
        reverse_prob = (reverse_dx / reverse_diff) * 10
        if prob > 7 or reverse_prob > 7:
            letter_spacing = (a["bbox"][2] - a["bbox"][0]) / len(a["text"])
            if b["bbox"][0] - a["bbox"][2] < 0 or b["bbox"][0] - a["bbox"][2] > (3 * letter_spacing):
                return "new-box"
            else:
                return "same-line"
        else:
            return "new-line"
    else:
        return "new-line"


def line_builder(x):
    minimum = 0
    sort = []
    for y in x:
        if int(len(sort)) == 0:
            sort.append(y)
            minimum = y["bbox"][0]
        else:
            if y["bbox"][0] < minimum:
                sort.insert(0, y)
                minimum = y["bbox"][0]
            else:
                indx = 1
                while indx < int(len(sort)) and sort[indx]["bbox"][0] <= y["bbox"][0]:
                    indx += 1
                if indx == int(len(sort) - 1):
                    sort.append(y)
                else:
                    sort.insert(indx, y)
    return sort


def string_builder(x, w_multiplier, h_multiplier):
    xxx = []
    xx = {}
    for y in x:
        if not xx:
            xx = y
            hex_val = hex(xx["color"]).split("0x")[-1]
            while len(hex_val) < 6:
                hex_val = "0" + hex_val
            xx["color"] = "#" + hex_val
            xx["size"] = str(int(int(xx["size"]) * h_multiplier)) + "px"
            del xx["flags"]
            del xx["ascender"]
            del xx["descender"]
            del xx["origin"]
        else:
            # style = '<span style="'
            y["size"] = str(int(int(y["size"]) * h_multiplier)) + "px"
            if not str(y["text"]).isspace():
                if xx["size"] != y["size"] or xx["font"] != y["font"] or xx["color"] != y["color"]:
                    xx["bboxScaled"] = [
                        int(xx["bbox"][0] * w_multiplier),
                        int(xx["bbox"][1] * h_multiplier),
                        int(xx["bbox"][2] * w_multiplier),
                        int(xx["bbox"][3] * h_multiplier)
                    ]
                    # xx["text"] = str(xx["text"]).strip()
                    xxx.append(xx.copy())
                    xx.clear()
                    xx = y
                    hex_val = hex(xx["color"]).split("0x")[-1]
                    while len(hex_val) < 6:
                        hex_val = "0" + hex_val
                    xx["color"] = "#" + hex_val
                    del xx["flags"]
                    del xx["ascender"]
                    del xx["descender"]
                    del xx["origin"]
                else:
                    [x1, y1, w1, h1] = y["bbox"]
                    [x2, y2, w2, h2] = xx["bbox"]
                    xx["bbox"] = [
                        min(x1, x2),
                        max(y1, y2),
                        max(w1, w2),
                        min(h1, h2)]
                    xx["text"] += y["text"]
    xx["bboxScaled"] = [
        int(xx["bbox"][0] * w_multiplier),
        int(xx["bbox"][1] * h_multiplier),
        int(xx["bbox"][2] * w_multiplier),
        int(xx["bbox"][3] * h_multiplier)
    ]
    # xx["text"] = str(xx["text"]).strip()
    xxx.append(xx.copy())
    return xxx

def checkSameLine(line,newLines):
    index = 0
    while index <= len(newLines) -1:
        prob = area(newLines[index], line)
        if prob in ["same-line","new-box"]:
            return index
        index += 1
    return -1

def sort(oriLines):
    newLines = []
    newLinesTexts = []
    for line in oriLines:
        # if line["text"] == "manier van bakken die nog ":
        #     print("hello")
        # line = oriLines[lineIndex]
        i = 0
        bbox = line["bbox"]
        if len(newLines) == 0:
            newLines.append(line)
            newLinesTexts.append(line["text"])
        else:
            # for i in range(0,len(newLines)-1):
            index = checkSameLine(line,newLines)
            if checkSameLine(line,newLines) >= 0:
                i = index
                while i <= len(newLines) -1 and area(newLines[i], line) in ["same-line","new-box"]:
                    bbox_list = newLines[i]["bbox"]
                    if bbox[0] > bbox_list[0]:
                        i += 1
                    else:
                        break
            else:
                while i <= len(newLines) - 1 and bbox[3] > newLines[i]["bbox"][3]:
                    i += 1

            newLines.insert(i, line)
            newLinesTexts.insert(i, line["text"])

    return newLines


def process_pages(localFile, file):
    pdf = localFile
    doc = fitz.open(pdf)
    file_id = str(file["_id"])
    for j in range(0, int(len(doc))):
        page = doc[j]
        n = page.get_text("dict")
        sorted_blocks = []
        file_page = file["Pages"][j]
        page_id = file_page["PageID"]
        minimum = 0

        # FilePage["Height"] = n["height"]
        # FilePage["Width"] = n["width"]
        img_height = file_page["Image"]["Height"]
        img_width = file_page["Image"]["Width"]
        h_multiplier = img_height / n["height"]
        w_multiplier = img_width / n["width"]
        file_page["Height-Multipler"] = h_multiplier
        file_page["Width-Multipler"] = w_multiplier

        for block in n["blocks"]:
            if block["type"] == 0 and 0 < block["bbox"][0] < n["width"] and block["bbox"][1] < n["height"]:
                page.add_redact_annot(block["bbox"])
                for lines in block["lines"]:
                    for span in lines["spans"]:
                        baseline = int(span["bbox"][1])
                        if int(len(sorted_blocks)) == 0:
                            sorted_blocks.append(span)
                            minimum = baseline
                        elif baseline < minimum:
                            sorted_blocks.insert(0, span)
                            minimum = baseline
                        else:
                            indx = 0
                            prob = area(sorted_blocks[indx], span)
                            while indx < len(sorted_blocks) and prob != "same-line" and prob != "new-box":
                                if indx is not len(sorted_blocks):
                                    prob = area(sorted_blocks[indx], span)
                                indx += 1
                            while indx < len(sorted_blocks) and (prob == "same-line" or prob == "new-box") and \
                                    sorted_blocks[indx]["bbox"][0] < span["bbox"][0]:
                                if indx != len(sorted_blocks):
                                    prob = area(sorted_blocks[indx], span)
                                indx += 1
                            sorted_blocks.insert(indx, span)
            # print(block)
        line_builder_blocks = {}
        lines_to_be_merged = []
        index = 0
        for i in range(0, int(len(sorted_blocks))):
            length = int(len(lines_to_be_merged))
            if int(length) > 0:
                prob = area(sorted_blocks[i - 1], sorted_blocks[i])
                if prob == "new-line" or prob == "new-box":
                    line_builder_blocks[index] = lines_to_be_merged.copy()
                    index = index + 1
                    lines_to_be_merged.clear()
                    sorted_blocks[i]["PROB"] = prob
                    lines_to_be_merged.append(sorted_blocks[i])
                else:
                    sorted_blocks[i]["PROB"] = prob
                    lines_to_be_merged.append(sorted_blocks[i])
            else:
                lines_to_be_merged.append(sorted_blocks[i])

        if int(len(lines_to_be_merged)) > 0:
            line_builder_blocks[index] = lines_to_be_merged.copy()

        built_lines = {}
        for f in range(0, int(len(line_builder_blocks))):
            xx = line_builder(line_builder_blocks[f])
            built_lines[f] = xx.copy()

        final_lines = []
        page_line_index = 0
        for f in range(0, int(len(built_lines))):
            xxx = string_builder(built_lines[f], w_multiplier, h_multiplier)
            for xx in xxx:
                if not xx["text"].isspace() and len(xx["text"]) > 0:
                    if page_line_index == 0:
                        xx["PROB"] = "new-line"
                    xx["pageId"] = page_id
                    xx["fileId"] = file_id
                    xx["current"] = 0
                    xx["ocr"] = ""
                    xx["parent"] = ""
                    xx["cursor"] = 0
                    xx["pageLineIndex"] = page_line_index
                    xx["wordSpacing"] = "normal"
                    xx["letterSpacing"] = "normal"
                    xx["fontStyle"] = "normal"
                    xx["textDecoration"] = "none"
                    xx["lineHeight"] = "normal"
                    xx["textRotate"] = 0
                    xx["fontWeight"] = "400"
                    xx["customCSS"] = ""
                    xx["_id"] = ObjectId()
                    xx["whiteSpace"] = "pre"
                    final_lines.append(xx.copy())
                    page_line_index += 1
        if len(final_lines) > 0:
            sorted_lines = sort(final_lines)
            for line in range(0, len(sorted_lines) - 1):
                sorted_lines[line]["pageLineIndex"] = line
            linesDB.insert_many(sorted_lines)

            for line in final_lines:
                line["current"] = 1
                line["cursor"] = 1
                line["parent"] = str(line["_id"])
                line["_id"] = ObjectId()

            linesDB.insert_many(final_lines)
        page.apply_redactions(fitz.PDF_REDACT_IMAGE_NONE)
        # pix = page.getPixmap()
        # pix.writeImage("page-%i-new.png" % page.number)


def pdf2img(fileFolder, filePath, fileID):
    try:
        doc = fitz.open(filePath)
        shutil.copy2(filePath, os.path.join(fileFolder, str(fileID)+".pdf"))
        file_folder_redacted = os.path.join(fileFolder, "redacted")
        Path(file_folder_redacted).mkdir(parents=True, exist_ok=True)
        file = filesDB.find_one({"_id": fileID})
        total_pages = int(len(doc))
        file["TotalPages"] = total_pages
        mat = fitz.Matrix(300 / 72, 300 / 72)
        max_height = 0
        max_width = 0

        dimensions = []
        for pageNum in range(0, total_pages):
            page = doc[pageNum]

            dimensions.append(f'{page.cropbox.height}, {page.cropbox.width}')

        setList = list(set(dimensions))
        my_dict = {i: dimensions.count(i) for i in setList}
        result = max(my_dict, key=lambda x: my_dict[x])
        width_size, height_size = map(int, map(float, result.split(",")))

        tmp_height_size = height_size

        while width_size * height_size < 4000000:
            tmp_height_size += 100
            tmp_height_percent = (tmp_height_size / float(height_size))
            tmp_width_size = int(round((float(width_size) * float(tmp_height_percent)),0))
            if tmp_height_size * tmp_width_size < 4000000:
                width_size = tmp_width_size
                height_size = tmp_height_size
                # image = image.resize((width_size, height_size))
            else:
                break

        while width_size * height_size > 4000000:
            tmp_height_size -= 100
            tmp_height_percent = (tmp_height_size / float(height_size))
            tmp_width_size = int((float(width_size) * float(tmp_height_percent)))
            if tmp_height_size * tmp_width_size > 4000000:
                width_size = tmp_width_size
                height_size = tmp_height_size
            else:
                break
        for j in range(0, total_pages):
            page = doc[j]
            n = page.get_text("dict")
            img_path_redacted = os.path.join(file_folder_redacted, f"Page{j + 1}.jpeg")
            img_path = os.path.join(fileFolder, f"Page{j + 1}.jpeg")

            # pix = page.get_pixmap(matrix=mat)
            pix = page.get_pixmap(matrix=mat)
            image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
            image.resize((width_size, height_size), resample=Image.BILINEAR).save(img_path, dpi=(300, 300), quality=80)
            for block in n["blocks"]:
                if block["type"] == 0 and 0 < block["bbox"][0] < n["width"] and block["bbox"][1] < n["height"]:
                    page.add_redact_annot(block["bbox"])

            page.apply_redactions(images=fitz.PDF_REDACT_IMAGE_NONE)
            pix = page.get_pixmap(matrix=mat)
            image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
            image.resize((width_size, height_size), resample=Image.BILINEAR).save(img_path_redacted, dpi=(300, 300),
                                                                                  quality=100)
            file["Pages"].append({
                "PageID": str(ObjectId()),
                "PageNum": j + 1,
                "Image": {
                    "Path": f"static/files/{str(fileID)}/Page{j + 1}.jpeg",
                    "Width": width_size,
                    "Height": height_size
                },
                "Redacted_Image": {
                    "Path": f"static/files/{str(fileID)}/redacted/Page{j + 1}.jpeg",
                    "Width": width_size,
                    "Height": height_size
                },
            })
        print("IMAGE EXTRACTED")
        process_pages(filePath, file)
        file["Status"] = "Ready"
        # filesDB.save(file)
        filesDB.update_one({"_id": fileID}, {"$set": {"Status": "Ready",
                                                      "TotalPages": total_pages, "Pages": file["Pages"]}})
    except Exception as e:
        filesDB.delete_one({"_id": ObjectId(fileID)})
        globals.boxesDB.delete_many({"fileId": fileID})
        globals.linesDB.delete_many({"fileId": fileID})
        fileFolder = os.path.join(globals.FILES_FOLDER, str(fileID))
        if os.path.exists(fileFolder):
            shutil.rmtree(fileFolder)


def pdf_process(filePath, overwrite):
    file_name = os.path.basename(filePath)
    file_without_ext = file_name.split(".")[0]
    file_exists = filesDB.find_one({"filename": file_without_ext})
    if file_exists:
        if overwrite:
            file_exists["filename"] = f"{file_without_ext}_OVERWRITTEN"
            filesDB.update_one({"_id": file_exists["_id"]}, {"$set": {"filename": file_exists["filename"]}})
        else:
            return "EXISTS"
    doc = {
        "filename": file_without_ext,
        "TotalPages": 0,
        "Fonts": [],
        "Properties": [
            {
                "key": "IBOOK_VERSION",
                "value": "",
                "isRequired": True
            },
            {
                "key": "FILE_TITLE",
                "value": "",
                "isRequired": True
            },
            {
                "key": "CREATOR",
                "value": "",
                "isRequired": True
            },
            {
                "key": "CREATOR_FILE_AS",
                "value": "",
                "isRequired": True
            },
            {
                "key": "CREATOR_ROLE",
                "value": "",
                "isRequired": True
            },
            {
                "key": "ISBN",
                "value": "",
                "isRequired": True
            },
            {
                "key": "DC_TERMS_MODIFIED",
                "value": "",
                "isRequired": True
            },
            {
                "key": "PUBLISHER",
                "value": "",
                "isRequired": True
            },
            {
                "key": "RIGHTS",
                "value": "",
                "isRequired": True
            },
            {
                "key": "LANGUAGE",
                "value": "",
                "isRequired": True
            },
            {
                "key": "COVERPAGE_VALUE",
                "value": "",
                "isRequired": True
            },
            {
                "key": "TITLEPAGE_VALUE",
                "value": "",
                "isRequired": True
            },
            {
                "key": "CHAPTER1_VALUE",
                "value": "",
                "isRequired": True
            },
            {
                "key": "COPYRIGHT_VALUE",
                "value": "",
                "isRequired": True
            },
            {
                "key": "COVERPAGE_NO",
                "value": "",
                "isRequired": True
            },
            {
                "key": "TITLEPAGE_NO",
                "value": "",
                "isRequired": True
            },
            {
                "key": "CHAPTER1_NO",
                "value": "",
                "isRequired": True
            },
            {
                "key": "COPYRIGHT_NO",
                "value": "",
                "isRequired": True
            },
            {
                "key": "TOC_TITLE",
                "value": "",
                "isRequired": True
            }
        ],
        "Pages": [],
        "Status": "Uploaded",
        "created_at": datetime.datetime.now().astimezone(pytz.timezone('Asia/Kolkata'))
    }
    file_id = filesDB.insert_one(doc).inserted_id
    file_folder = os.path.join(globals.FILES_FOLDER, str(file_id))
    Path(file_folder).mkdir(parents=True, exist_ok=True)
    threading.Thread(target=pdf2img, args=(file_folder, filePath, file_id)).start()
    print("Extracted")
    return {"Id": str(file_id), "Name": file_without_ext}


auth_args = {"Authorization": fields.Str(required=True)}


class Upload(Resource):
    files_args = {
        "file": fields.Field(validate=lambda file: "pdf" == file.filename.split(".")[-1].lower(), required=True)}
    upload_args = {'overwrite': fields.Str(required=True)}

    @use_kwargs(auth_args, location="headers")
    @use_kwargs(files_args, location="files")
    @use_kwargs(upload_args, location="form")
    def post(self, overwrite, file, Authorization):
        if overwrite == "FALSE":
            overwrite = False
        else:
            overwrite = True

        if auth.verify(str(Authorization).split(" ")[1]):
            file_path = os.path.join(globals.UPLOAD_FOLDER, file.filename)
            file.save(file_path)
            response = pdf_process(file_path, overwrite)

            if response == "EXISTS":
                return {"msg": "File Already Exists"}, 401
            else:
                return response, 200
        else:
            return {"msg": "Unauthorized! Access Denied"}, 401


class GetFiles(Resource):
    search_arg = {"searchPhase": fields.Str(required=True)}

    @use_kwargs(auth_args, location="headers")
    @use_kwargs(search_arg, location="query")
    def get(self, Authorization, searchPhase):
        if auth.verify(str(Authorization).split(" ")[1]):
            files = list(filesDB.find(
                {
                    "$and": [
                        {
                            "filename":
                                {
                                    '$regex': searchPhase, "$options": 'i'
                                }
                        },
                        {
                            "Status": "Ready"
                        }
                    ]
                },

                {
                    "_id": 1,
                    "filename": 1
                }
            ).sort("created_at", pymongo.DESCENDING).limit(50))
            return json.loads(json_util.dumps(files)), 200
        else:
            return "Unauthorized! Access Denied", 401


class FileStatus(Resource):
    fileid_arg = {"fileId": fields.Str(required=True)}

    @use_kwargs(auth_args, location="headers")
    @use_kwargs(fileid_arg, location="query")
    def get(self, Authorization, fileId):
        if auth.verify(str(Authorization).split(" ")[1]):
            cursor = filesDB.find_one({'_id': ObjectId(fileId)})
            if not cursor:
                return "File Not Found", 404
            if cursor["Status"] == "Ready":
                return {"msg": cursor["Status"]}, 200
        else:
            return {"msg": "Unauthorized! Access Denied"}, 401


class FilePropertiesFont(Resource):
    fileid_arg = {"fileId": fields.Str(required=True)}

    @use_kwargs(auth_args, location="headers")
    @use_kwargs(fileid_arg, location="query")
    def get(self, Authorization, fileId):
        if auth.verify(str(Authorization).split(" ")[1]):
            result = filesDB.find_one(
                {
                    '_id': ObjectId(fileId)
                },
                {
                    "Fonts": 1,
                    "_id": 0
                }
            )

            return {"Fonts": json.loads(json_util.dumps(result["Fonts"]))}, 200
        else:
            return {"msg": "Unauthorized! Access Denied"}, 401


class FileProperties(Resource):
    fileid_arg = {"fileId": fields.Str(required=True)}

    @use_kwargs(auth_args, location="headers")
    @use_kwargs(fileid_arg, location="query")
    def get(self, Authorization, fileId):
        if auth.verify(str(Authorization).split(" ")[1]):
            result = filesDB.find_one(
                {
                    '_id': ObjectId(fileId)
                },
                {
                    "Properties": 1,
                    "_id": 0
                }
            )

            return {"Properties": json.loads(json_util.dumps(result["Properties"]))}, 200
        else:
            return {"msg": "Unauthorized! Access Denied"}, 401


class FilePropertiesFontUpdate(Resource):
    fileid_arg = {
        "fileId": fields.Str(required=True),
        "fontId": fields.Str(required=True),
        "cssFontName": fields.Str(required=True),
        "fontPath": fields.Str(required=True),
        "ori_fontId": fields.Str(required=True),
        "ori_fontPath": fields.Str(required=True),
        "fontName": fields.Str(required=True),
        "ori_fontName": fields.Str(required=True)
    }

    @use_kwargs(auth_args, location="headers")
    @use_kwargs(fileid_arg)
    def put(self, Authorization, fileId, fontId, cssFontName, fontPath, ori_fontPath, ori_fontId, fontName,
            ori_fontName):
        if auth.verify(str(Authorization).split(" ")[1]):
            result = filesDB.update_one(
                {
                    "_id": ObjectId(fileId),
                    "Fonts.cssFontName": cssFontName
                },
                {
                    "$set": {
                        "Fonts.$.fontId": fontId,
                        "Fonts.$.ori_fontId": ori_fontId,
                        "Fonts.$.cssFontName": cssFontName,
                        "Fonts.$.fontPath": fontPath,
                        "Fonts.$.ori_fontPath": ori_fontPath,
                        "Fonts.$.ori_fontName": ori_fontName,
                        "Fonts.$.fontName": fontName
                    }
                }
            )
            return {"msg": result.modified_count}, 200
        else:
            return {"msg": "Unauthorized! Access Denied"}, 401


class FilePropertiesFontAdd(Resource):
    fileid_arg = {
        "fileId": fields.Str(required=True),
        "fontId": fields.Str(required=True),
        "cssFontName": fields.Str(required=True),
        "fontPath": fields.Str(required=True),
        "ori_fontId": fields.Str(required=True),
        "ori_fontPath": fields.Str(required=True),
        "fontName": fields.Str(required=True),
        "ori_fontName": fields.Str(required=True)
    }

    @use_kwargs(auth_args, location="headers")
    @use_kwargs(fileid_arg)
    def post(self, Authorization, fileId, fontId, cssFontName, fontPath, ori_fontPath, ori_fontId, fontName,
             ori_fontName):
    # def post(self, Authorization, fileId, args):
        if auth.verify(str(Authorization).split(" ")[1]):
            result = filesDB.update_one(
                {
                    '_id': ObjectId(fileId)
                },
                {
                    "$push": {
                        "Fonts": {
                            # "$each": args
                            "fontId": fontId,
                            "ori_fontId": ori_fontId,
                            "cssFontName": cssFontName,
                            "fontPath": fontPath,
                            "ori_fontPath": ori_fontPath,
                            "ori_fontName": ori_fontName,
                            "fontName": fontName
                        }
                    }
                }
            )
            return {"msg": result.modified_count}, 200
        else:
            return {"msg": "Unauthorized! Access Denied"}, 401


class FilePropertiesFontDelete(Resource):
    fileid_arg = {
        "fileId": fields.Str(required=True),
        "cssFontName": fields.Str(required=True)
    }

    @use_kwargs(auth_args, location="headers")
    @use_kwargs(fileid_arg, location="query")
    def delete(self, Authorization, fileId, cssFontName):
        if auth.verify(str(Authorization).split(" ")[1]):
            result = filesDB.update_one(
                {"_id": ObjectId(fileId)},
                {
                    "$pull":
                        {
                            "Fonts":
                                {
                                    "$elemMatch":
                                        {
                                            "cssFontName": cssFontName
                                        }
                                }
                        }
                }
            )
            return {"msg": result.modified_count}, 200
        else:
            return {"msg": "Unauthorized! Access Denied"}, 401


class FilePropertiesAdd(Resource):
    fileid_arg = {
        "fileId": fields.Str(required=True),
        "key": fields.Str(required=True),
        "value": fields.Str(required=True),
        "isRequired": fields.Boolean(required=True)
    }

    @use_kwargs(auth_args, location="headers")
    @use_kwargs(fileid_arg)
    def post(self, Authorization, fileId, key, value, isRequired):
        if auth.verify(str(Authorization).split(" ")[1]):
            result = filesDB.update_one(
                {
                    '_id': ObjectId(fileId)
                },
                {
                    "$push": {
                        "Properties": {
                            "key": key,
                            "value": value,
                            "Required": isRequired
                        }
                    }
                }
            )
            return {"msg": result.modified_count}, 200
        else:
            return {"msg": "Unauthorized! Access Denied"}, 401


class FilePropertiesUpdate(Resource):
    fileid_arg = {
        "fileId": fields.Str(required=True),
        "key": fields.Str(required=True),
        "value": fields.Str(required=True),
        "isRequired": fields.Str(required=True)
    }

    @use_kwargs(auth_args, location="headers")
    @use_kwargs(fileid_arg)
    def put(self, Authorization, fileId, key, value, isRequired):
        if auth.verify(str(Authorization).split(" ")[1]):
            result = filesDB.update_one(
                {
                    '_id': ObjectId(fileId),
                    "Properties.key": key
                },
                {
                    "$set": {
                        "Properties.$.value": value
                    }
                }
            )
            return {"msg": result.modified_count}, 200
        else:
            return {"msg": "Unauthorized! Access Denied"}, 401


class FilePropertiesDelete(Resource):
    fileid_arg = {
        "fileId": fields.Str(required=True),
        "key": fields.Str(required=True),
    }

    @use_kwargs(auth_args, location="headers")
    @use_kwargs(fileid_arg, location="query")
    def delete(self, Authorization, fileId, key):
        if auth.verify(str(Authorization).split(" ")[1]):
            result = filesDB.update_one(
                {
                    "_id": ObjectId(fileId)
                },
                {
                    "$pull":
                        {
                            "Properties":
                                {
                                    "key": key
                                }
                        }
                }
            )
            return {"msg": result.modified_count}, 200
        else:
            return {"msg": "Unauthorized! Access Denied"}, 401


class DeleteFile(Resource):
    fileid_arg = {"fileId": fields.Str(required=True)}

    @use_kwargs(auth_args, location="headers")
    @use_kwargs(fileid_arg, location="query")
    def delete(self, Authorization, fileId):
        if auth.verify(str(Authorization).split(" ")[1]):
            filesDB.delete_one({"_id": ObjectId(fileId)})
            globals.boxesDB.delete_many({"fileId": fileId})
            globals.linesDB.delete_many({"fileId": fileId})
            file_folder = os.path.join(globals.FILES_FOLDER, str(fileId))
            if os.path.exists(file_folder):
                shutil.rmtree(file_folder)
            return {"msg": "Deleted Successfully"}
        else:
            return {"msg": "Unauthorized! Access Denied"}, 401


@parser.error_handler
def handle_request_parsing_error(err, req, schema, *, error_status_code, error_headers):
    """webargs error handler that uses Flask-RESTful's abort function to return
    a JSON error response to the client.
    """
    code, msg = getattr(err, 'status_code', 400), getattr(err, 'message', 'Invalid Request')
    if not error_status_code:
        abort(400, errors=err.messages)
    else:
        abort(error_status_code, errors=err.messages)

Functions

def area(a, b)
Expand source code
def area(a, b):
    minimum = min(a["bbox"][3], b["bbox"][3])
    maximum = max(a["bbox"][1], b["bbox"][1])
    reverse_minimum = min(b["bbox"][3], a["bbox"][3])
    reverse_maximum = max(b["bbox"][1], a["bbox"][1])
    dx = minimum - maximum
    reverse_dx = reverse_minimum - reverse_maximum
    diff = a["bbox"][3] - a["bbox"][1]
    reverse_diff = b["bbox"][3] - b["bbox"][1]
    if dx >= 0 or reverse_dx >= 0:
        prob = (dx / diff) * 10
        reverse_prob = (reverse_dx / reverse_diff) * 10
        if prob > 7 or reverse_prob > 7:
            letter_spacing = (a["bbox"][2] - a["bbox"][0]) / len(a["text"])
            if b["bbox"][0] - a["bbox"][2] < 0 or b["bbox"][0] - a["bbox"][2] > (3 * letter_spacing):
                return "new-box"
            else:
                return "same-line"
        else:
            return "new-line"
    else:
        return "new-line"
def checkSameLine(line, newLines)
Expand source code
def checkSameLine(line,newLines):
    index = 0
    while index <= len(newLines) -1:
        prob = area(newLines[index], line)
        if prob in ["same-line","new-box"]:
            return index
        index += 1
    return -1
def handle_request_parsing_error(err, req, schema, *, error_status_code, error_headers)

webargs error handler that uses Flask-RESTful's abort function to return a JSON error response to the client.

Expand source code
@parser.error_handler
def handle_request_parsing_error(err, req, schema, *, error_status_code, error_headers):
    """webargs error handler that uses Flask-RESTful's abort function to return
    a JSON error response to the client.
    """
    code, msg = getattr(err, 'status_code', 400), getattr(err, 'message', 'Invalid Request')
    if not error_status_code:
        abort(400, errors=err.messages)
    else:
        abort(error_status_code, errors=err.messages)
def line_builder(x)
Expand source code
def line_builder(x):
    minimum = 0
    sort = []
    for y in x:
        if int(len(sort)) == 0:
            sort.append(y)
            minimum = y["bbox"][0]
        else:
            if y["bbox"][0] < minimum:
                sort.insert(0, y)
                minimum = y["bbox"][0]
            else:
                indx = 1
                while indx < int(len(sort)) and sort[indx]["bbox"][0] <= y["bbox"][0]:
                    indx += 1
                if indx == int(len(sort) - 1):
                    sort.append(y)
                else:
                    sort.insert(indx, y)
    return sort
def pdf2img(fileFolder, filePath, fileID)
Expand source code
def pdf2img(fileFolder, filePath, fileID):
    try:
        doc = fitz.open(filePath)
        shutil.copy2(filePath, os.path.join(fileFolder, str(fileID)+".pdf"))
        file_folder_redacted = os.path.join(fileFolder, "redacted")
        Path(file_folder_redacted).mkdir(parents=True, exist_ok=True)
        file = filesDB.find_one({"_id": fileID})
        total_pages = int(len(doc))
        file["TotalPages"] = total_pages
        mat = fitz.Matrix(300 / 72, 300 / 72)
        max_height = 0
        max_width = 0

        dimensions = []
        for pageNum in range(0, total_pages):
            page = doc[pageNum]

            dimensions.append(f'{page.cropbox.height}, {page.cropbox.width}')

        setList = list(set(dimensions))
        my_dict = {i: dimensions.count(i) for i in setList}
        result = max(my_dict, key=lambda x: my_dict[x])
        width_size, height_size = map(int, map(float, result.split(",")))

        tmp_height_size = height_size

        while width_size * height_size < 4000000:
            tmp_height_size += 100
            tmp_height_percent = (tmp_height_size / float(height_size))
            tmp_width_size = int(round((float(width_size) * float(tmp_height_percent)),0))
            if tmp_height_size * tmp_width_size < 4000000:
                width_size = tmp_width_size
                height_size = tmp_height_size
                # image = image.resize((width_size, height_size))
            else:
                break

        while width_size * height_size > 4000000:
            tmp_height_size -= 100
            tmp_height_percent = (tmp_height_size / float(height_size))
            tmp_width_size = int((float(width_size) * float(tmp_height_percent)))
            if tmp_height_size * tmp_width_size > 4000000:
                width_size = tmp_width_size
                height_size = tmp_height_size
            else:
                break
        for j in range(0, total_pages):
            page = doc[j]
            n = page.get_text("dict")
            img_path_redacted = os.path.join(file_folder_redacted, f"Page{j + 1}.jpeg")
            img_path = os.path.join(fileFolder, f"Page{j + 1}.jpeg")

            # pix = page.get_pixmap(matrix=mat)
            pix = page.get_pixmap(matrix=mat)
            image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
            image.resize((width_size, height_size), resample=Image.BILINEAR).save(img_path, dpi=(300, 300), quality=80)
            for block in n["blocks"]:
                if block["type"] == 0 and 0 < block["bbox"][0] < n["width"] and block["bbox"][1] < n["height"]:
                    page.add_redact_annot(block["bbox"])

            page.apply_redactions(images=fitz.PDF_REDACT_IMAGE_NONE)
            pix = page.get_pixmap(matrix=mat)
            image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
            image.resize((width_size, height_size), resample=Image.BILINEAR).save(img_path_redacted, dpi=(300, 300),
                                                                                  quality=100)
            file["Pages"].append({
                "PageID": str(ObjectId()),
                "PageNum": j + 1,
                "Image": {
                    "Path": f"static/files/{str(fileID)}/Page{j + 1}.jpeg",
                    "Width": width_size,
                    "Height": height_size
                },
                "Redacted_Image": {
                    "Path": f"static/files/{str(fileID)}/redacted/Page{j + 1}.jpeg",
                    "Width": width_size,
                    "Height": height_size
                },
            })
        print("IMAGE EXTRACTED")
        process_pages(filePath, file)
        file["Status"] = "Ready"
        # filesDB.save(file)
        filesDB.update_one({"_id": fileID}, {"$set": {"Status": "Ready",
                                                      "TotalPages": total_pages, "Pages": file["Pages"]}})
    except Exception as e:
        filesDB.delete_one({"_id": ObjectId(fileID)})
        globals.boxesDB.delete_many({"fileId": fileID})
        globals.linesDB.delete_many({"fileId": fileID})
        fileFolder = os.path.join(globals.FILES_FOLDER, str(fileID))
        if os.path.exists(fileFolder):
            shutil.rmtree(fileFolder)
def pdf_process(filePath, overwrite)
Expand source code
def pdf_process(filePath, overwrite):
    file_name = os.path.basename(filePath)
    file_without_ext = file_name.split(".")[0]
    file_exists = filesDB.find_one({"filename": file_without_ext})
    if file_exists:
        if overwrite:
            file_exists["filename"] = f"{file_without_ext}_OVERWRITTEN"
            filesDB.update_one({"_id": file_exists["_id"]}, {"$set": {"filename": file_exists["filename"]}})
        else:
            return "EXISTS"
    doc = {
        "filename": file_without_ext,
        "TotalPages": 0,
        "Fonts": [],
        "Properties": [
            {
                "key": "IBOOK_VERSION",
                "value": "",
                "isRequired": True
            },
            {
                "key": "FILE_TITLE",
                "value": "",
                "isRequired": True
            },
            {
                "key": "CREATOR",
                "value": "",
                "isRequired": True
            },
            {
                "key": "CREATOR_FILE_AS",
                "value": "",
                "isRequired": True
            },
            {
                "key": "CREATOR_ROLE",
                "value": "",
                "isRequired": True
            },
            {
                "key": "ISBN",
                "value": "",
                "isRequired": True
            },
            {
                "key": "DC_TERMS_MODIFIED",
                "value": "",
                "isRequired": True
            },
            {
                "key": "PUBLISHER",
                "value": "",
                "isRequired": True
            },
            {
                "key": "RIGHTS",
                "value": "",
                "isRequired": True
            },
            {
                "key": "LANGUAGE",
                "value": "",
                "isRequired": True
            },
            {
                "key": "COVERPAGE_VALUE",
                "value": "",
                "isRequired": True
            },
            {
                "key": "TITLEPAGE_VALUE",
                "value": "",
                "isRequired": True
            },
            {
                "key": "CHAPTER1_VALUE",
                "value": "",
                "isRequired": True
            },
            {
                "key": "COPYRIGHT_VALUE",
                "value": "",
                "isRequired": True
            },
            {
                "key": "COVERPAGE_NO",
                "value": "",
                "isRequired": True
            },
            {
                "key": "TITLEPAGE_NO",
                "value": "",
                "isRequired": True
            },
            {
                "key": "CHAPTER1_NO",
                "value": "",
                "isRequired": True
            },
            {
                "key": "COPYRIGHT_NO",
                "value": "",
                "isRequired": True
            },
            {
                "key": "TOC_TITLE",
                "value": "",
                "isRequired": True
            }
        ],
        "Pages": [],
        "Status": "Uploaded",
        "created_at": datetime.datetime.now().astimezone(pytz.timezone('Asia/Kolkata'))
    }
    file_id = filesDB.insert_one(doc).inserted_id
    file_folder = os.path.join(globals.FILES_FOLDER, str(file_id))
    Path(file_folder).mkdir(parents=True, exist_ok=True)
    threading.Thread(target=pdf2img, args=(file_folder, filePath, file_id)).start()
    print("Extracted")
    return {"Id": str(file_id), "Name": file_without_ext}
def process_pages(localFile, file)
Expand source code
def process_pages(localFile, file):
    pdf = localFile
    doc = fitz.open(pdf)
    file_id = str(file["_id"])
    for j in range(0, int(len(doc))):
        page = doc[j]
        n = page.get_text("dict")
        sorted_blocks = []
        file_page = file["Pages"][j]
        page_id = file_page["PageID"]
        minimum = 0

        # FilePage["Height"] = n["height"]
        # FilePage["Width"] = n["width"]
        img_height = file_page["Image"]["Height"]
        img_width = file_page["Image"]["Width"]
        h_multiplier = img_height / n["height"]
        w_multiplier = img_width / n["width"]
        file_page["Height-Multipler"] = h_multiplier
        file_page["Width-Multipler"] = w_multiplier

        for block in n["blocks"]:
            if block["type"] == 0 and 0 < block["bbox"][0] < n["width"] and block["bbox"][1] < n["height"]:
                page.add_redact_annot(block["bbox"])
                for lines in block["lines"]:
                    for span in lines["spans"]:
                        baseline = int(span["bbox"][1])
                        if int(len(sorted_blocks)) == 0:
                            sorted_blocks.append(span)
                            minimum = baseline
                        elif baseline < minimum:
                            sorted_blocks.insert(0, span)
                            minimum = baseline
                        else:
                            indx = 0
                            prob = area(sorted_blocks[indx], span)
                            while indx < len(sorted_blocks) and prob != "same-line" and prob != "new-box":
                                if indx is not len(sorted_blocks):
                                    prob = area(sorted_blocks[indx], span)
                                indx += 1
                            while indx < len(sorted_blocks) and (prob == "same-line" or prob == "new-box") and \
                                    sorted_blocks[indx]["bbox"][0] < span["bbox"][0]:
                                if indx != len(sorted_blocks):
                                    prob = area(sorted_blocks[indx], span)
                                indx += 1
                            sorted_blocks.insert(indx, span)
            # print(block)
        line_builder_blocks = {}
        lines_to_be_merged = []
        index = 0
        for i in range(0, int(len(sorted_blocks))):
            length = int(len(lines_to_be_merged))
            if int(length) > 0:
                prob = area(sorted_blocks[i - 1], sorted_blocks[i])
                if prob == "new-line" or prob == "new-box":
                    line_builder_blocks[index] = lines_to_be_merged.copy()
                    index = index + 1
                    lines_to_be_merged.clear()
                    sorted_blocks[i]["PROB"] = prob
                    lines_to_be_merged.append(sorted_blocks[i])
                else:
                    sorted_blocks[i]["PROB"] = prob
                    lines_to_be_merged.append(sorted_blocks[i])
            else:
                lines_to_be_merged.append(sorted_blocks[i])

        if int(len(lines_to_be_merged)) > 0:
            line_builder_blocks[index] = lines_to_be_merged.copy()

        built_lines = {}
        for f in range(0, int(len(line_builder_blocks))):
            xx = line_builder(line_builder_blocks[f])
            built_lines[f] = xx.copy()

        final_lines = []
        page_line_index = 0
        for f in range(0, int(len(built_lines))):
            xxx = string_builder(built_lines[f], w_multiplier, h_multiplier)
            for xx in xxx:
                if not xx["text"].isspace() and len(xx["text"]) > 0:
                    if page_line_index == 0:
                        xx["PROB"] = "new-line"
                    xx["pageId"] = page_id
                    xx["fileId"] = file_id
                    xx["current"] = 0
                    xx["ocr"] = ""
                    xx["parent"] = ""
                    xx["cursor"] = 0
                    xx["pageLineIndex"] = page_line_index
                    xx["wordSpacing"] = "normal"
                    xx["letterSpacing"] = "normal"
                    xx["fontStyle"] = "normal"
                    xx["textDecoration"] = "none"
                    xx["lineHeight"] = "normal"
                    xx["textRotate"] = 0
                    xx["fontWeight"] = "400"
                    xx["customCSS"] = ""
                    xx["_id"] = ObjectId()
                    xx["whiteSpace"] = "pre"
                    final_lines.append(xx.copy())
                    page_line_index += 1
        if len(final_lines) > 0:
            sorted_lines = sort(final_lines)
            for line in range(0, len(sorted_lines) - 1):
                sorted_lines[line]["pageLineIndex"] = line
            linesDB.insert_many(sorted_lines)

            for line in final_lines:
                line["current"] = 1
                line["cursor"] = 1
                line["parent"] = str(line["_id"])
                line["_id"] = ObjectId()

            linesDB.insert_many(final_lines)
        page.apply_redactions(fitz.PDF_REDACT_IMAGE_NONE)
        # pix = page.getPixmap()
        # pix.writeImage("page-%i-new.png" % page.number)
def sort(oriLines)
Expand source code
def sort(oriLines):
    newLines = []
    newLinesTexts = []
    for line in oriLines:
        # if line["text"] == "manier van bakken die nog ":
        #     print("hello")
        # line = oriLines[lineIndex]
        i = 0
        bbox = line["bbox"]
        if len(newLines) == 0:
            newLines.append(line)
            newLinesTexts.append(line["text"])
        else:
            # for i in range(0,len(newLines)-1):
            index = checkSameLine(line,newLines)
            if checkSameLine(line,newLines) >= 0:
                i = index
                while i <= len(newLines) -1 and area(newLines[i], line) in ["same-line","new-box"]:
                    bbox_list = newLines[i]["bbox"]
                    if bbox[0] > bbox_list[0]:
                        i += 1
                    else:
                        break
            else:
                while i <= len(newLines) - 1 and bbox[3] > newLines[i]["bbox"][3]:
                    i += 1

            newLines.insert(i, line)
            newLinesTexts.insert(i, line["text"])

    return newLines
def string_builder(x, w_multiplier, h_multiplier)
Expand source code
def string_builder(x, w_multiplier, h_multiplier):
    xxx = []
    xx = {}
    for y in x:
        if not xx:
            xx = y
            hex_val = hex(xx["color"]).split("0x")[-1]
            while len(hex_val) < 6:
                hex_val = "0" + hex_val
            xx["color"] = "#" + hex_val
            xx["size"] = str(int(int(xx["size"]) * h_multiplier)) + "px"
            del xx["flags"]
            del xx["ascender"]
            del xx["descender"]
            del xx["origin"]
        else:
            # style = '<span style="'
            y["size"] = str(int(int(y["size"]) * h_multiplier)) + "px"
            if not str(y["text"]).isspace():
                if xx["size"] != y["size"] or xx["font"] != y["font"] or xx["color"] != y["color"]:
                    xx["bboxScaled"] = [
                        int(xx["bbox"][0] * w_multiplier),
                        int(xx["bbox"][1] * h_multiplier),
                        int(xx["bbox"][2] * w_multiplier),
                        int(xx["bbox"][3] * h_multiplier)
                    ]
                    # xx["text"] = str(xx["text"]).strip()
                    xxx.append(xx.copy())
                    xx.clear()
                    xx = y
                    hex_val = hex(xx["color"]).split("0x")[-1]
                    while len(hex_val) < 6:
                        hex_val = "0" + hex_val
                    xx["color"] = "#" + hex_val
                    del xx["flags"]
                    del xx["ascender"]
                    del xx["descender"]
                    del xx["origin"]
                else:
                    [x1, y1, w1, h1] = y["bbox"]
                    [x2, y2, w2, h2] = xx["bbox"]
                    xx["bbox"] = [
                        min(x1, x2),
                        max(y1, y2),
                        max(w1, w2),
                        min(h1, h2)]
                    xx["text"] += y["text"]
    xx["bboxScaled"] = [
        int(xx["bbox"][0] * w_multiplier),
        int(xx["bbox"][1] * h_multiplier),
        int(xx["bbox"][2] * w_multiplier),
        int(xx["bbox"][3] * h_multiplier)
    ]
    # xx["text"] = str(xx["text"]).strip()
    xxx.append(xx.copy())
    return xxx

Classes

class DeleteFile

Represents an abstract RESTful resource. Concrete resources should extend from this class and expose methods for each supported HTTP method. If a resource is invoked with an unsupported HTTP method, the API will return a response with status 405 Method Not Allowed. Otherwise the appropriate method is called and passed all arguments from the url rule used when adding the resource to an Api instance. See :meth:~flask_restful.Api.add_resource for details.

Expand source code
class DeleteFile(Resource):
    fileid_arg = {"fileId": fields.Str(required=True)}

    @use_kwargs(auth_args, location="headers")
    @use_kwargs(fileid_arg, location="query")
    def delete(self, Authorization, fileId):
        if auth.verify(str(Authorization).split(" ")[1]):
            filesDB.delete_one({"_id": ObjectId(fileId)})
            globals.boxesDB.delete_many({"fileId": fileId})
            globals.linesDB.delete_many({"fileId": fileId})
            file_folder = os.path.join(globals.FILES_FOLDER, str(fileId))
            if os.path.exists(file_folder):
                shutil.rmtree(file_folder)
            return {"msg": "Deleted Successfully"}
        else:
            return {"msg": "Unauthorized! Access Denied"}, 401

Ancestors

  • flask_restful.Resource
  • flask.views.MethodView
  • flask.views.View

Class variables

var fileid_arg
var methods : Optional[List[str]]

Methods

def delete(self, Authorization, fileId)
Expand source code
@use_kwargs(auth_args, location="headers")
@use_kwargs(fileid_arg, location="query")
def delete(self, Authorization, fileId):
    if auth.verify(str(Authorization).split(" ")[1]):
        filesDB.delete_one({"_id": ObjectId(fileId)})
        globals.boxesDB.delete_many({"fileId": fileId})
        globals.linesDB.delete_many({"fileId": fileId})
        file_folder = os.path.join(globals.FILES_FOLDER, str(fileId))
        if os.path.exists(file_folder):
            shutil.rmtree(file_folder)
        return {"msg": "Deleted Successfully"}
    else:
        return {"msg": "Unauthorized! Access Denied"}, 401
class FileProperties

Represents an abstract RESTful resource. Concrete resources should extend from this class and expose methods for each supported HTTP method. If a resource is invoked with an unsupported HTTP method, the API will return a response with status 405 Method Not Allowed. Otherwise the appropriate method is called and passed all arguments from the url rule used when adding the resource to an Api instance. See :meth:~flask_restful.Api.add_resource for details.

Expand source code
class FileProperties(Resource):
    fileid_arg = {"fileId": fields.Str(required=True)}

    @use_kwargs(auth_args, location="headers")
    @use_kwargs(fileid_arg, location="query")
    def get(self, Authorization, fileId):
        if auth.verify(str(Authorization).split(" ")[1]):
            result = filesDB.find_one(
                {
                    '_id': ObjectId(fileId)
                },
                {
                    "Properties": 1,
                    "_id": 0
                }
            )

            return {"Properties": json.loads(json_util.dumps(result["Properties"]))}, 200
        else:
            return {"msg": "Unauthorized! Access Denied"}, 401

Ancestors

  • flask_restful.Resource
  • flask.views.MethodView
  • flask.views.View

Class variables

var fileid_arg
var methods : Optional[List[str]]

Methods

def get(self, Authorization, fileId)
Expand source code
@use_kwargs(auth_args, location="headers")
@use_kwargs(fileid_arg, location="query")
def get(self, Authorization, fileId):
    if auth.verify(str(Authorization).split(" ")[1]):
        result = filesDB.find_one(
            {
                '_id': ObjectId(fileId)
            },
            {
                "Properties": 1,
                "_id": 0
            }
        )

        return {"Properties": json.loads(json_util.dumps(result["Properties"]))}, 200
    else:
        return {"msg": "Unauthorized! Access Denied"}, 401
class FilePropertiesAdd

Represents an abstract RESTful resource. Concrete resources should extend from this class and expose methods for each supported HTTP method. If a resource is invoked with an unsupported HTTP method, the API will return a response with status 405 Method Not Allowed. Otherwise the appropriate method is called and passed all arguments from the url rule used when adding the resource to an Api instance. See :meth:~flask_restful.Api.add_resource for details.

Expand source code
class FilePropertiesAdd(Resource):
    fileid_arg = {
        "fileId": fields.Str(required=True),
        "key": fields.Str(required=True),
        "value": fields.Str(required=True),
        "isRequired": fields.Boolean(required=True)
    }

    @use_kwargs(auth_args, location="headers")
    @use_kwargs(fileid_arg)
    def post(self, Authorization, fileId, key, value, isRequired):
        if auth.verify(str(Authorization).split(" ")[1]):
            result = filesDB.update_one(
                {
                    '_id': ObjectId(fileId)
                },
                {
                    "$push": {
                        "Properties": {
                            "key": key,
                            "value": value,
                            "Required": isRequired
                        }
                    }
                }
            )
            return {"msg": result.modified_count}, 200
        else:
            return {"msg": "Unauthorized! Access Denied"}, 401

Ancestors

  • flask_restful.Resource
  • flask.views.MethodView
  • flask.views.View

Class variables

var fileid_arg
var methods : Optional[List[str]]

Methods

def post(self, Authorization, fileId, key, value, isRequired)
Expand source code
@use_kwargs(auth_args, location="headers")
@use_kwargs(fileid_arg)
def post(self, Authorization, fileId, key, value, isRequired):
    if auth.verify(str(Authorization).split(" ")[1]):
        result = filesDB.update_one(
            {
                '_id': ObjectId(fileId)
            },
            {
                "$push": {
                    "Properties": {
                        "key": key,
                        "value": value,
                        "Required": isRequired
                    }
                }
            }
        )
        return {"msg": result.modified_count}, 200
    else:
        return {"msg": "Unauthorized! Access Denied"}, 401
class FilePropertiesDelete

Represents an abstract RESTful resource. Concrete resources should extend from this class and expose methods for each supported HTTP method. If a resource is invoked with an unsupported HTTP method, the API will return a response with status 405 Method Not Allowed. Otherwise the appropriate method is called and passed all arguments from the url rule used when adding the resource to an Api instance. See :meth:~flask_restful.Api.add_resource for details.

Expand source code
class FilePropertiesDelete(Resource):
    fileid_arg = {
        "fileId": fields.Str(required=True),
        "key": fields.Str(required=True),
    }

    @use_kwargs(auth_args, location="headers")
    @use_kwargs(fileid_arg, location="query")
    def delete(self, Authorization, fileId, key):
        if auth.verify(str(Authorization).split(" ")[1]):
            result = filesDB.update_one(
                {
                    "_id": ObjectId(fileId)
                },
                {
                    "$pull":
                        {
                            "Properties":
                                {
                                    "key": key
                                }
                        }
                }
            )
            return {"msg": result.modified_count}, 200
        else:
            return {"msg": "Unauthorized! Access Denied"}, 401

Ancestors

  • flask_restful.Resource
  • flask.views.MethodView
  • flask.views.View

Class variables

var fileid_arg
var methods : Optional[List[str]]

Methods

def delete(self, Authorization, fileId, key)
Expand source code
@use_kwargs(auth_args, location="headers")
@use_kwargs(fileid_arg, location="query")
def delete(self, Authorization, fileId, key):
    if auth.verify(str(Authorization).split(" ")[1]):
        result = filesDB.update_one(
            {
                "_id": ObjectId(fileId)
            },
            {
                "$pull":
                    {
                        "Properties":
                            {
                                "key": key
                            }
                    }
            }
        )
        return {"msg": result.modified_count}, 200
    else:
        return {"msg": "Unauthorized! Access Denied"}, 401
class FilePropertiesFont

Represents an abstract RESTful resource. Concrete resources should extend from this class and expose methods for each supported HTTP method. If a resource is invoked with an unsupported HTTP method, the API will return a response with status 405 Method Not Allowed. Otherwise the appropriate method is called and passed all arguments from the url rule used when adding the resource to an Api instance. See :meth:~flask_restful.Api.add_resource for details.

Expand source code
class FilePropertiesFont(Resource):
    fileid_arg = {"fileId": fields.Str(required=True)}

    @use_kwargs(auth_args, location="headers")
    @use_kwargs(fileid_arg, location="query")
    def get(self, Authorization, fileId):
        if auth.verify(str(Authorization).split(" ")[1]):
            result = filesDB.find_one(
                {
                    '_id': ObjectId(fileId)
                },
                {
                    "Fonts": 1,
                    "_id": 0
                }
            )

            return {"Fonts": json.loads(json_util.dumps(result["Fonts"]))}, 200
        else:
            return {"msg": "Unauthorized! Access Denied"}, 401

Ancestors

  • flask_restful.Resource
  • flask.views.MethodView
  • flask.views.View

Class variables

var fileid_arg
var methods : Optional[List[str]]

Methods

def get(self, Authorization, fileId)
Expand source code
@use_kwargs(auth_args, location="headers")
@use_kwargs(fileid_arg, location="query")
def get(self, Authorization, fileId):
    if auth.verify(str(Authorization).split(" ")[1]):
        result = filesDB.find_one(
            {
                '_id': ObjectId(fileId)
            },
            {
                "Fonts": 1,
                "_id": 0
            }
        )

        return {"Fonts": json.loads(json_util.dumps(result["Fonts"]))}, 200
    else:
        return {"msg": "Unauthorized! Access Denied"}, 401
class FilePropertiesFontAdd

Represents an abstract RESTful resource. Concrete resources should extend from this class and expose methods for each supported HTTP method. If a resource is invoked with an unsupported HTTP method, the API will return a response with status 405 Method Not Allowed. Otherwise the appropriate method is called and passed all arguments from the url rule used when adding the resource to an Api instance. See :meth:~flask_restful.Api.add_resource for details.

Expand source code
class FilePropertiesFontAdd(Resource):
    fileid_arg = {
        "fileId": fields.Str(required=True),
        "fontId": fields.Str(required=True),
        "cssFontName": fields.Str(required=True),
        "fontPath": fields.Str(required=True),
        "ori_fontId": fields.Str(required=True),
        "ori_fontPath": fields.Str(required=True),
        "fontName": fields.Str(required=True),
        "ori_fontName": fields.Str(required=True)
    }

    @use_kwargs(auth_args, location="headers")
    @use_kwargs(fileid_arg)
    def post(self, Authorization, fileId, fontId, cssFontName, fontPath, ori_fontPath, ori_fontId, fontName,
             ori_fontName):
    # def post(self, Authorization, fileId, args):
        if auth.verify(str(Authorization).split(" ")[1]):
            result = filesDB.update_one(
                {
                    '_id': ObjectId(fileId)
                },
                {
                    "$push": {
                        "Fonts": {
                            # "$each": args
                            "fontId": fontId,
                            "ori_fontId": ori_fontId,
                            "cssFontName": cssFontName,
                            "fontPath": fontPath,
                            "ori_fontPath": ori_fontPath,
                            "ori_fontName": ori_fontName,
                            "fontName": fontName
                        }
                    }
                }
            )
            return {"msg": result.modified_count}, 200
        else:
            return {"msg": "Unauthorized! Access Denied"}, 401

Ancestors

  • flask_restful.Resource
  • flask.views.MethodView
  • flask.views.View

Class variables

var fileid_arg
var methods : Optional[List[str]]

Methods

def post(self, Authorization, fileId, fontId, cssFontName, fontPath, ori_fontPath, ori_fontId, fontName, ori_fontName)
Expand source code
@use_kwargs(auth_args, location="headers")
@use_kwargs(fileid_arg)
def post(self, Authorization, fileId, fontId, cssFontName, fontPath, ori_fontPath, ori_fontId, fontName,
         ori_fontName):
# def post(self, Authorization, fileId, args):
    if auth.verify(str(Authorization).split(" ")[1]):
        result = filesDB.update_one(
            {
                '_id': ObjectId(fileId)
            },
            {
                "$push": {
                    "Fonts": {
                        # "$each": args
                        "fontId": fontId,
                        "ori_fontId": ori_fontId,
                        "cssFontName": cssFontName,
                        "fontPath": fontPath,
                        "ori_fontPath": ori_fontPath,
                        "ori_fontName": ori_fontName,
                        "fontName": fontName
                    }
                }
            }
        )
        return {"msg": result.modified_count}, 200
    else:
        return {"msg": "Unauthorized! Access Denied"}, 401
class FilePropertiesFontDelete

Represents an abstract RESTful resource. Concrete resources should extend from this class and expose methods for each supported HTTP method. If a resource is invoked with an unsupported HTTP method, the API will return a response with status 405 Method Not Allowed. Otherwise the appropriate method is called and passed all arguments from the url rule used when adding the resource to an Api instance. See :meth:~flask_restful.Api.add_resource for details.

Expand source code
class FilePropertiesFontDelete(Resource):
    fileid_arg = {
        "fileId": fields.Str(required=True),
        "cssFontName": fields.Str(required=True)
    }

    @use_kwargs(auth_args, location="headers")
    @use_kwargs(fileid_arg, location="query")
    def delete(self, Authorization, fileId, cssFontName):
        if auth.verify(str(Authorization).split(" ")[1]):
            result = filesDB.update_one(
                {"_id": ObjectId(fileId)},
                {
                    "$pull":
                        {
                            "Fonts":
                                {
                                    "$elemMatch":
                                        {
                                            "cssFontName": cssFontName
                                        }
                                }
                        }
                }
            )
            return {"msg": result.modified_count}, 200
        else:
            return {"msg": "Unauthorized! Access Denied"}, 401

Ancestors

  • flask_restful.Resource
  • flask.views.MethodView
  • flask.views.View

Class variables

var fileid_arg
var methods : Optional[List[str]]

Methods

def delete(self, Authorization, fileId, cssFontName)
Expand source code
@use_kwargs(auth_args, location="headers")
@use_kwargs(fileid_arg, location="query")
def delete(self, Authorization, fileId, cssFontName):
    if auth.verify(str(Authorization).split(" ")[1]):
        result = filesDB.update_one(
            {"_id": ObjectId(fileId)},
            {
                "$pull":
                    {
                        "Fonts":
                            {
                                "$elemMatch":
                                    {
                                        "cssFontName": cssFontName
                                    }
                            }
                    }
            }
        )
        return {"msg": result.modified_count}, 200
    else:
        return {"msg": "Unauthorized! Access Denied"}, 401
class FilePropertiesFontUpdate

Represents an abstract RESTful resource. Concrete resources should extend from this class and expose methods for each supported HTTP method. If a resource is invoked with an unsupported HTTP method, the API will return a response with status 405 Method Not Allowed. Otherwise the appropriate method is called and passed all arguments from the url rule used when adding the resource to an Api instance. See :meth:~flask_restful.Api.add_resource for details.

Expand source code
class FilePropertiesFontUpdate(Resource):
    fileid_arg = {
        "fileId": fields.Str(required=True),
        "fontId": fields.Str(required=True),
        "cssFontName": fields.Str(required=True),
        "fontPath": fields.Str(required=True),
        "ori_fontId": fields.Str(required=True),
        "ori_fontPath": fields.Str(required=True),
        "fontName": fields.Str(required=True),
        "ori_fontName": fields.Str(required=True)
    }

    @use_kwargs(auth_args, location="headers")
    @use_kwargs(fileid_arg)
    def put(self, Authorization, fileId, fontId, cssFontName, fontPath, ori_fontPath, ori_fontId, fontName,
            ori_fontName):
        if auth.verify(str(Authorization).split(" ")[1]):
            result = filesDB.update_one(
                {
                    "_id": ObjectId(fileId),
                    "Fonts.cssFontName": cssFontName
                },
                {
                    "$set": {
                        "Fonts.$.fontId": fontId,
                        "Fonts.$.ori_fontId": ori_fontId,
                        "Fonts.$.cssFontName": cssFontName,
                        "Fonts.$.fontPath": fontPath,
                        "Fonts.$.ori_fontPath": ori_fontPath,
                        "Fonts.$.ori_fontName": ori_fontName,
                        "Fonts.$.fontName": fontName
                    }
                }
            )
            return {"msg": result.modified_count}, 200
        else:
            return {"msg": "Unauthorized! Access Denied"}, 401

Ancestors

  • flask_restful.Resource
  • flask.views.MethodView
  • flask.views.View

Class variables

var fileid_arg
var methods : Optional[List[str]]

Methods

def put(self, Authorization, fileId, fontId, cssFontName, fontPath, ori_fontPath, ori_fontId, fontName, ori_fontName)
Expand source code
@use_kwargs(auth_args, location="headers")
@use_kwargs(fileid_arg)
def put(self, Authorization, fileId, fontId, cssFontName, fontPath, ori_fontPath, ori_fontId, fontName,
        ori_fontName):
    if auth.verify(str(Authorization).split(" ")[1]):
        result = filesDB.update_one(
            {
                "_id": ObjectId(fileId),
                "Fonts.cssFontName": cssFontName
            },
            {
                "$set": {
                    "Fonts.$.fontId": fontId,
                    "Fonts.$.ori_fontId": ori_fontId,
                    "Fonts.$.cssFontName": cssFontName,
                    "Fonts.$.fontPath": fontPath,
                    "Fonts.$.ori_fontPath": ori_fontPath,
                    "Fonts.$.ori_fontName": ori_fontName,
                    "Fonts.$.fontName": fontName
                }
            }
        )
        return {"msg": result.modified_count}, 200
    else:
        return {"msg": "Unauthorized! Access Denied"}, 401
class FilePropertiesUpdate

Represents an abstract RESTful resource. Concrete resources should extend from this class and expose methods for each supported HTTP method. If a resource is invoked with an unsupported HTTP method, the API will return a response with status 405 Method Not Allowed. Otherwise the appropriate method is called and passed all arguments from the url rule used when adding the resource to an Api instance. See :meth:~flask_restful.Api.add_resource for details.

Expand source code
class FilePropertiesUpdate(Resource):
    fileid_arg = {
        "fileId": fields.Str(required=True),
        "key": fields.Str(required=True),
        "value": fields.Str(required=True),
        "isRequired": fields.Str(required=True)
    }

    @use_kwargs(auth_args, location="headers")
    @use_kwargs(fileid_arg)
    def put(self, Authorization, fileId, key, value, isRequired):
        if auth.verify(str(Authorization).split(" ")[1]):
            result = filesDB.update_one(
                {
                    '_id': ObjectId(fileId),
                    "Properties.key": key
                },
                {
                    "$set": {
                        "Properties.$.value": value
                    }
                }
            )
            return {"msg": result.modified_count}, 200
        else:
            return {"msg": "Unauthorized! Access Denied"}, 401

Ancestors

  • flask_restful.Resource
  • flask.views.MethodView
  • flask.views.View

Class variables

var fileid_arg
var methods : Optional[List[str]]

Methods

def put(self, Authorization, fileId, key, value, isRequired)
Expand source code
@use_kwargs(auth_args, location="headers")
@use_kwargs(fileid_arg)
def put(self, Authorization, fileId, key, value, isRequired):
    if auth.verify(str(Authorization).split(" ")[1]):
        result = filesDB.update_one(
            {
                '_id': ObjectId(fileId),
                "Properties.key": key
            },
            {
                "$set": {
                    "Properties.$.value": value
                }
            }
        )
        return {"msg": result.modified_count}, 200
    else:
        return {"msg": "Unauthorized! Access Denied"}, 401
class FileStatus

Represents an abstract RESTful resource. Concrete resources should extend from this class and expose methods for each supported HTTP method. If a resource is invoked with an unsupported HTTP method, the API will return a response with status 405 Method Not Allowed. Otherwise the appropriate method is called and passed all arguments from the url rule used when adding the resource to an Api instance. See :meth:~flask_restful.Api.add_resource for details.

Expand source code
class FileStatus(Resource):
    fileid_arg = {"fileId": fields.Str(required=True)}

    @use_kwargs(auth_args, location="headers")
    @use_kwargs(fileid_arg, location="query")
    def get(self, Authorization, fileId):
        if auth.verify(str(Authorization).split(" ")[1]):
            cursor = filesDB.find_one({'_id': ObjectId(fileId)})
            if not cursor:
                return "File Not Found", 404
            if cursor["Status"] == "Ready":
                return {"msg": cursor["Status"]}, 200
        else:
            return {"msg": "Unauthorized! Access Denied"}, 401

Ancestors

  • flask_restful.Resource
  • flask.views.MethodView
  • flask.views.View

Class variables

var fileid_arg
var methods : Optional[List[str]]

Methods

def get(self, Authorization, fileId)
Expand source code
@use_kwargs(auth_args, location="headers")
@use_kwargs(fileid_arg, location="query")
def get(self, Authorization, fileId):
    if auth.verify(str(Authorization).split(" ")[1]):
        cursor = filesDB.find_one({'_id': ObjectId(fileId)})
        if not cursor:
            return "File Not Found", 404
        if cursor["Status"] == "Ready":
            return {"msg": cursor["Status"]}, 200
    else:
        return {"msg": "Unauthorized! Access Denied"}, 401
class GetFiles

Represents an abstract RESTful resource. Concrete resources should extend from this class and expose methods for each supported HTTP method. If a resource is invoked with an unsupported HTTP method, the API will return a response with status 405 Method Not Allowed. Otherwise the appropriate method is called and passed all arguments from the url rule used when adding the resource to an Api instance. See :meth:~flask_restful.Api.add_resource for details.

Expand source code
class GetFiles(Resource):
    search_arg = {"searchPhase": fields.Str(required=True)}

    @use_kwargs(auth_args, location="headers")
    @use_kwargs(search_arg, location="query")
    def get(self, Authorization, searchPhase):
        if auth.verify(str(Authorization).split(" ")[1]):
            files = list(filesDB.find(
                {
                    "$and": [
                        {
                            "filename":
                                {
                                    '$regex': searchPhase, "$options": 'i'
                                }
                        },
                        {
                            "Status": "Ready"
                        }
                    ]
                },

                {
                    "_id": 1,
                    "filename": 1
                }
            ).sort("created_at", pymongo.DESCENDING).limit(50))
            return json.loads(json_util.dumps(files)), 200
        else:
            return "Unauthorized! Access Denied", 401

Ancestors

  • flask_restful.Resource
  • flask.views.MethodView
  • flask.views.View

Class variables

var methods : Optional[List[str]]
var search_arg

Methods

def get(self, Authorization, searchPhase)
Expand source code
@use_kwargs(auth_args, location="headers")
@use_kwargs(search_arg, location="query")
def get(self, Authorization, searchPhase):
    if auth.verify(str(Authorization).split(" ")[1]):
        files = list(filesDB.find(
            {
                "$and": [
                    {
                        "filename":
                            {
                                '$regex': searchPhase, "$options": 'i'
                            }
                    },
                    {
                        "Status": "Ready"
                    }
                ]
            },

            {
                "_id": 1,
                "filename": 1
            }
        ).sort("created_at", pymongo.DESCENDING).limit(50))
        return json.loads(json_util.dumps(files)), 200
    else:
        return "Unauthorized! Access Denied", 401
class Upload

Represents an abstract RESTful resource. Concrete resources should extend from this class and expose methods for each supported HTTP method. If a resource is invoked with an unsupported HTTP method, the API will return a response with status 405 Method Not Allowed. Otherwise the appropriate method is called and passed all arguments from the url rule used when adding the resource to an Api instance. See :meth:~flask_restful.Api.add_resource for details.

Expand source code
class Upload(Resource):
    files_args = {
        "file": fields.Field(validate=lambda file: "pdf" == file.filename.split(".")[-1].lower(), required=True)}
    upload_args = {'overwrite': fields.Str(required=True)}

    @use_kwargs(auth_args, location="headers")
    @use_kwargs(files_args, location="files")
    @use_kwargs(upload_args, location="form")
    def post(self, overwrite, file, Authorization):
        if overwrite == "FALSE":
            overwrite = False
        else:
            overwrite = True

        if auth.verify(str(Authorization).split(" ")[1]):
            file_path = os.path.join(globals.UPLOAD_FOLDER, file.filename)
            file.save(file_path)
            response = pdf_process(file_path, overwrite)

            if response == "EXISTS":
                return {"msg": "File Already Exists"}, 401
            else:
                return response, 200
        else:
            return {"msg": "Unauthorized! Access Denied"}, 401

Ancestors

  • flask_restful.Resource
  • flask.views.MethodView
  • flask.views.View

Class variables

var files_args
var methods : Optional[List[str]]
var upload_args

Methods

def post(self, overwrite, file, Authorization)
Expand source code
@use_kwargs(auth_args, location="headers")
@use_kwargs(files_args, location="files")
@use_kwargs(upload_args, location="form")
def post(self, overwrite, file, Authorization):
    if overwrite == "FALSE":
        overwrite = False
    else:
        overwrite = True

    if auth.verify(str(Authorization).split(" ")[1]):
        file_path = os.path.join(globals.UPLOAD_FOLDER, file.filename)
        file.save(file_path)
        response = pdf_process(file_path, overwrite)

        if response == "EXISTS":
            return {"msg": "File Already Exists"}, 401
        else:
            return response, 200
    else:
        return {"msg": "Unauthorized! Access Denied"}, 401