001/*
002 * (C) Copyright 2016-2018 Nuxeo (http://nuxeo.com/) and others.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 *
016 * Contributors:
017 *     Thibaud Arguillere
018 *     Miguel Nixo
019 */
020package org.nuxeo.ecm.platform.pdf.operations;
021
022import java.io.IOException;
023import java.util.ArrayList;
024import java.util.List;
025import org.apache.commons.lang3.StringUtils;
026import org.json.JSONArray;
027import org.json.JSONException;
028import org.json.JSONObject;
029import org.nuxeo.ecm.automation.core.Constants;
030import org.nuxeo.ecm.automation.core.annotations.Operation;
031import org.nuxeo.ecm.automation.core.annotations.OperationMethod;
032import org.nuxeo.ecm.automation.core.annotations.Param;
033import org.nuxeo.ecm.core.api.Blob;
034import org.nuxeo.ecm.platform.pdf.LinkInfo;
035import org.nuxeo.ecm.platform.pdf.PDFLinks;
036
037/**
038 * Returns a JSON string of an array of objects with page, subType, text and link fields.
039 * <p>
040 * If <code>getAll</code> is <code>false</code>, then <code>type</code> is required.
041 *
042 * @since 8.10
043 */
044@Operation(id = PDFExtractLinksOperation.ID, category = Constants.CAT_CONVERSION, label = "PDF: Extract Links",
045    description = "Returns a JSON string of an array of objects with page, subType, text and link fields. If getAll" +
046        " is true, returns all the links (Remote Go To, Launch and URI in the current version).")
047public class PDFExtractLinksOperation {
048
049    public static final String ID = "PDF.ExtractLinks";
050
051    @Param(name = "type", required = false, widget = Constants.W_OPTION, values = { "Launch", "Remote Go To", "URI" })
052    protected String type;
053
054    @Param(name = "getAll", required = false)
055    protected boolean getAll = false;
056
057    @OperationMethod
058    public String run(Blob inBlob) throws IOException, JSONException {
059        ArrayList<String> types = new ArrayList<>();
060        if (getAll) {
061            types.add("Launch");
062            types.add("Remote Go To");
063            types.add("URI");
064        } else {
065            if (StringUtils.isBlank(type)) {
066                throw new IllegalArgumentException("type cannot be empty if getAll is false");
067            }
068            types.add(type);
069        }
070        PDFLinks pdfl = new PDFLinks(inBlob);
071        JSONArray array = new JSONArray();
072        for (String theType : types) {
073            List<LinkInfo> links = new ArrayList<>();
074            switch (theType.toLowerCase()) {
075            case "remote go to":
076                links = pdfl.getRemoteGoToLinks();
077                break;
078            case "launch":
079                links = pdfl.getLaunchLinks();
080                break;
081            case "uri":
082                links = pdfl.getURILinks();
083                break;
084            }
085            for (LinkInfo li : links) {
086                JSONObject object = new JSONObject();
087                object.put("page", li.getPage());
088                object.put("subType", li.getSubType());
089                object.put("text", li.getText());
090                object.put("link", li.getLink());
091                array.put(object);
092            }
093        }
094        pdfl.close();
095        return array.toString();
096    }
097
098}