001/* 002 * (C) Copyright 2011-2015 Nuxeo SA (http://nuxeo.com/) and others. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 * 016 * Contributors: 017 * Mathieu Guillaume 018 * Florent Guillaume 019 */ 020package org.nuxeo.ecm.core.storage.sql; 021 022import static org.apache.commons.lang.StringUtils.isBlank; 023import static org.apache.commons.lang.StringUtils.isNotBlank; 024 025import java.io.File; 026import java.io.FileInputStream; 027import java.io.IOException; 028import java.net.URI; 029import java.net.URISyntaxException; 030import java.net.URL; 031import java.security.GeneralSecurityException; 032import java.security.KeyPair; 033import java.security.KeyStore; 034import java.security.PrivateKey; 035import java.security.PublicKey; 036import java.security.cert.Certificate; 037import java.util.Collection; 038import java.util.Date; 039import java.util.HashSet; 040import java.util.Set; 041import java.util.regex.Pattern; 042 043import org.apache.commons.codec.digest.DigestUtils; 044 045import javax.servlet.http.HttpServletRequest; 046 047import org.apache.commons.lang.StringUtils; 048import org.apache.commons.logging.Log; 049import org.apache.commons.logging.LogFactory; 050import org.nuxeo.common.Environment; 051import org.nuxeo.ecm.blob.AbstractBinaryGarbageCollector; 052import org.nuxeo.ecm.blob.AbstractCloudBinaryManager; 053import org.nuxeo.ecm.core.api.Blob; 054import org.nuxeo.ecm.core.blob.BlobManager.BlobInfo; 055import org.nuxeo.ecm.core.blob.ManagedBlob; 056import org.nuxeo.ecm.core.blob.binary.BinaryBlobProvider; 057import org.nuxeo.ecm.core.blob.binary.BinaryGarbageCollector; 058import org.nuxeo.ecm.core.blob.binary.FileStorage; 059import org.nuxeo.ecm.core.model.Document; 060import org.nuxeo.runtime.api.Framework; 061 062import com.amazonaws.AmazonClientException; 063import com.amazonaws.AmazonServiceException; 064import com.amazonaws.ClientConfiguration; 065import com.amazonaws.HttpMethod; 066import com.amazonaws.auth.AWSCredentialsProvider; 067import com.amazonaws.auth.InstanceProfileCredentialsProvider; 068import com.amazonaws.services.s3.AmazonS3; 069import com.amazonaws.services.s3.AmazonS3ClientBuilder; 070import com.amazonaws.services.s3.AmazonS3EncryptionClientBuilder; 071import com.amazonaws.services.s3.model.CannedAccessControlList; 072import com.amazonaws.services.s3.model.CryptoConfiguration; 073import com.amazonaws.services.s3.model.EncryptedPutObjectRequest; 074import com.amazonaws.services.s3.model.EncryptionMaterials; 075import com.amazonaws.services.s3.model.GeneratePresignedUrlRequest; 076import com.amazonaws.services.s3.model.GetObjectRequest; 077import com.amazonaws.services.s3.model.ObjectListing; 078import com.amazonaws.services.s3.model.ObjectMetadata; 079import com.amazonaws.services.s3.model.PutObjectRequest; 080import com.amazonaws.services.s3.model.S3ObjectSummary; 081import com.amazonaws.services.s3.model.StaticEncryptionMaterialsProvider; 082import com.amazonaws.services.s3.transfer.Download; 083import com.amazonaws.services.s3.transfer.TransferManager; 084import com.amazonaws.services.s3.transfer.TransferManagerBuilder; 085import com.amazonaws.services.s3.transfer.Upload; 086import com.google.common.base.MoreObjects; 087 088/** 089 * A Binary Manager that stores binaries as S3 BLOBs 090 * <p> 091 * The BLOBs are cached locally on first access for efficiency. 092 * <p> 093 * Because the BLOB length can be accessed independently of the binary stream, it is also cached in a simple text file 094 * if accessed before the stream. 095 */ 096public class S3BinaryManager extends AbstractCloudBinaryManager { 097 098 private static final String MD5 = "MD5"; // must be MD5 for Etag 099 100 @Override 101 protected String getDefaultDigestAlgorithm() { 102 return MD5; 103 } 104 105 private static final Log log = LogFactory.getLog(S3BinaryManager.class); 106 107 public static final String SYSTEM_PROPERTY_PREFIX = "nuxeo.s3storage"; 108 109 public static final String BUCKET_NAME_PROPERTY = "bucket"; 110 111 public static final String BUCKET_PREFIX_PROPERTY = "bucket_prefix"; 112 113 public static final String BUCKET_REGION_PROPERTY = "region"; 114 115 public static final String DEFAULT_BUCKET_REGION = "us-east-1"; // US East 116 117 public static final String AWS_ID_PROPERTY = "awsid"; 118 119 public static final String AWS_ID_ENV = "AWS_ACCESS_KEY_ID"; 120 121 public static final String AWS_SECRET_PROPERTY = "awssecret"; 122 123 public static final String AWS_SECRET_ENV = "AWS_SECRET_ACCESS_KEY"; 124 125 /** AWS ClientConfiguration default 50 */ 126 public static final String CONNECTION_MAX_PROPERTY = "connection.max"; 127 128 /** AWS ClientConfiguration default 3 (with exponential backoff) */ 129 public static final String CONNECTION_RETRY_PROPERTY = "connection.retry"; 130 131 /** AWS ClientConfiguration default 50*1000 = 50s */ 132 public static final String CONNECTION_TIMEOUT_PROPERTY = "connection.timeout"; 133 134 /** AWS ClientConfiguration default 50*1000 = 50s */ 135 public static final String SOCKET_TIMEOUT_PROPERTY = "socket.timeout"; 136 137 public static final String KEYSTORE_FILE_PROPERTY = "crypt.keystore.file"; 138 139 public static final String KEYSTORE_PASS_PROPERTY = "crypt.keystore.password"; 140 141 public static final String SERVERSIDE_ENCRYPTION_PROPERTY = "crypt.serverside"; 142 143 public static final String PRIVKEY_ALIAS_PROPERTY = "crypt.key.alias"; 144 145 public static final String PRIVKEY_PASS_PROPERTY = "crypt.key.password"; 146 147 public static final String ENDPOINT_PROPERTY = "endpoint"; 148 149 public static final String DIRECTDOWNLOAD_PROPERTY_COMPAT = "downloadfroms3"; 150 151 public static final String DIRECTDOWNLOAD_EXPIRE_PROPERTY_COMPAT = "downloadfroms3.expire"; 152 153 private static final Pattern MD5_RE = Pattern.compile("[0-9a-f]{32}"); 154 155 protected String bucketName; 156 157 protected String bucketNamePrefix; 158 159 protected AWSCredentialsProvider awsCredentialsProvider; 160 161 protected ClientConfiguration clientConfiguration; 162 163 protected EncryptionMaterials encryptionMaterials; 164 165 protected boolean isEncrypted; 166 167 protected CryptoConfiguration cryptoConfiguration; 168 169 protected boolean userServerSideEncryption; 170 171 protected AmazonS3 amazonS3; 172 173 protected TransferManager transferManager; 174 175 @Override 176 public void close() { 177 // this also shuts down the AmazonS3Client 178 transferManager.shutdownNow(); 179 super.close(); 180 } 181 182 /** 183 * Aborts uploads that crashed and are older than 1 day. 184 * 185 * @since 7.2 186 */ 187 protected void abortOldUploads() throws IOException { 188 int oneDay = 1000 * 60 * 60 * 24; 189 try { 190 transferManager.abortMultipartUploads(bucketName, new Date(System.currentTimeMillis() - oneDay)); 191 } catch (AmazonClientException e) { 192 throw new IOException("Failed to abort old uploads", e); 193 } 194 } 195 196 @Override 197 protected void setupCloudClient() throws IOException { 198 // Get settings from the configuration 199 bucketName = getProperty(BUCKET_NAME_PROPERTY); 200 bucketNamePrefix = MoreObjects.firstNonNull(getProperty(BUCKET_PREFIX_PROPERTY), StringUtils.EMPTY); 201 String bucketRegion = getProperty(BUCKET_REGION_PROPERTY); 202 if (isBlank(bucketRegion)) { 203 bucketRegion = DEFAULT_BUCKET_REGION; 204 } 205 String awsID = getProperty(AWS_ID_PROPERTY); 206 String awsSecret = getProperty(AWS_SECRET_PROPERTY); 207 208 String proxyHost = Framework.getProperty(Environment.NUXEO_HTTP_PROXY_HOST); 209 String proxyPort = Framework.getProperty(Environment.NUXEO_HTTP_PROXY_PORT); 210 String proxyLogin = Framework.getProperty(Environment.NUXEO_HTTP_PROXY_LOGIN); 211 String proxyPassword = Framework.getProperty(Environment.NUXEO_HTTP_PROXY_PASSWORD); 212 213 int maxConnections = getIntProperty(CONNECTION_MAX_PROPERTY); 214 int maxErrorRetry = getIntProperty(CONNECTION_RETRY_PROPERTY); 215 int connectionTimeout = getIntProperty(CONNECTION_TIMEOUT_PROPERTY); 216 int socketTimeout = getIntProperty(SOCKET_TIMEOUT_PROPERTY); 217 218 String keystoreFile = getProperty(KEYSTORE_FILE_PROPERTY); 219 String keystorePass = getProperty(KEYSTORE_PASS_PROPERTY); 220 String privkeyAlias = getProperty(PRIVKEY_ALIAS_PROPERTY); 221 String privkeyPass = getProperty(PRIVKEY_PASS_PROPERTY); 222 String endpoint = getProperty(ENDPOINT_PROPERTY); 223 String sseprop = getProperty(SERVERSIDE_ENCRYPTION_PROPERTY); 224 if (isNotBlank(sseprop)) { 225 userServerSideEncryption = Boolean.parseBoolean(sseprop); 226 } 227 228 // Fallback on default env keys for ID and secret 229 if (isBlank(awsID)) { 230 awsID = System.getenv(AWS_ID_ENV); 231 } 232 if (isBlank(awsSecret)) { 233 awsSecret = System.getenv(AWS_SECRET_ENV); 234 } 235 236 if (isBlank(bucketName)) { 237 throw new RuntimeException("Missing conf: " + BUCKET_NAME_PROPERTY); 238 } 239 240 if (!isBlank(bucketNamePrefix) && !bucketNamePrefix.endsWith("/")) { 241 log.warn(String.format("%s %s S3 bucket prefix should end by '/' " + ": added automatically.", 242 BUCKET_PREFIX_PROPERTY, bucketNamePrefix)); 243 bucketNamePrefix += "/"; 244 } 245 // set up credentials 246 if (isBlank(awsID) || isBlank(awsSecret)) { 247 awsCredentialsProvider = InstanceProfileCredentialsProvider.getInstance(); 248 try { 249 awsCredentialsProvider.getCredentials(); 250 } catch (AmazonClientException e) { 251 throw new RuntimeException("Missing AWS credentials and no instance role found"); 252 } 253 } else { 254 awsCredentialsProvider = new BasicAWSCredentialsProvider(awsID, awsSecret); 255 } 256 257 // set up client configuration 258 clientConfiguration = new ClientConfiguration(); 259 if (isNotBlank(proxyHost)) { 260 clientConfiguration.setProxyHost(proxyHost); 261 } 262 if (isNotBlank(proxyPort)) { 263 clientConfiguration.setProxyPort(Integer.parseInt(proxyPort)); 264 } 265 if (isNotBlank(proxyLogin)) { 266 clientConfiguration.setProxyUsername(proxyLogin); 267 } 268 if (proxyPassword != null) { // could be blank 269 clientConfiguration.setProxyPassword(proxyPassword); 270 } 271 if (maxConnections > 0) { 272 clientConfiguration.setMaxConnections(maxConnections); 273 } 274 if (maxErrorRetry >= 0) { // 0 is allowed 275 clientConfiguration.setMaxErrorRetry(maxErrorRetry); 276 } 277 if (connectionTimeout >= 0) { // 0 is allowed 278 clientConfiguration.setConnectionTimeout(connectionTimeout); 279 } 280 if (socketTimeout >= 0) { // 0 is allowed 281 clientConfiguration.setSocketTimeout(socketTimeout); 282 } 283 284 // set up encryption 285 encryptionMaterials = null; 286 if (isNotBlank(keystoreFile)) { 287 boolean confok = true; 288 if (keystorePass == null) { // could be blank 289 log.error("Keystore password missing"); 290 confok = false; 291 } 292 if (isBlank(privkeyAlias)) { 293 log.error("Key alias missing"); 294 confok = false; 295 } 296 if (privkeyPass == null) { // could be blank 297 log.error("Key password missing"); 298 confok = false; 299 } 300 if (!confok) { 301 throw new RuntimeException("S3 Crypto configuration incomplete"); 302 } 303 try { 304 // Open keystore 305 File ksFile = new File(keystoreFile); 306 FileInputStream ksStream = new FileInputStream(ksFile); 307 KeyStore keystore = KeyStore.getInstance(KeyStore.getDefaultType()); 308 keystore.load(ksStream, keystorePass.toCharArray()); 309 ksStream.close(); 310 // Get keypair for alias 311 if (!keystore.isKeyEntry(privkeyAlias)) { 312 throw new RuntimeException("Alias " + privkeyAlias + " is missing or not a key alias"); 313 } 314 PrivateKey privKey = (PrivateKey) keystore.getKey(privkeyAlias, privkeyPass.toCharArray()); 315 Certificate cert = keystore.getCertificate(privkeyAlias); 316 PublicKey pubKey = cert.getPublicKey(); 317 KeyPair keypair = new KeyPair(pubKey, privKey); 318 // Get encryptionMaterials from keypair 319 encryptionMaterials = new EncryptionMaterials(keypair); 320 cryptoConfiguration = new CryptoConfiguration(); 321 } catch (IOException | GeneralSecurityException e) { 322 throw new RuntimeException("Could not read keystore: " + keystoreFile + ", alias: " + privkeyAlias, e); 323 } 324 } 325 isEncrypted = encryptionMaterials != null; 326 327 // Try to create bucket if it doesn't exist 328 if (!isEncrypted) { 329 amazonS3 = AmazonS3ClientBuilder.standard() 330 .withCredentials(awsCredentialsProvider) 331 .withClientConfiguration(clientConfiguration) 332 .withRegion(bucketRegion) 333 .build(); 334 } else { 335 amazonS3 = AmazonS3EncryptionClientBuilder.standard() 336 .withClientConfiguration(clientConfiguration) 337 .withCryptoConfiguration(cryptoConfiguration) 338 .withCredentials(awsCredentialsProvider) 339 .withRegion(bucketRegion) 340 .withEncryptionMaterials(new StaticEncryptionMaterialsProvider(encryptionMaterials)) 341 .build(); 342 } 343 if (isNotBlank(endpoint)) { 344 amazonS3.setEndpoint(endpoint); 345 } 346 347 try { 348 if (!amazonS3.doesBucketExist(bucketName)) { 349 amazonS3.createBucket(bucketName); 350 amazonS3.setBucketAcl(bucketName, CannedAccessControlList.Private); 351 } 352 } catch (AmazonClientException e) { 353 throw new IOException(e); 354 } 355 356 // compat for NXP-17895, using "downloadfroms3", to be removed 357 // these two fields have already been initialized by the base class initialize() 358 // using standard property "directdownload" 359 String dd = getProperty(DIRECTDOWNLOAD_PROPERTY_COMPAT); 360 if (dd != null) { 361 directDownload = Boolean.parseBoolean(dd); 362 } 363 int dde = getIntProperty(DIRECTDOWNLOAD_EXPIRE_PROPERTY_COMPAT); 364 if (dde >= 0) { 365 directDownloadExpire = dde; 366 } 367 368 transferManager = TransferManagerBuilder.standard().withS3Client(amazonS3).build(); 369 abortOldUploads(); 370 } 371 372 protected void removeBinary(String digest) { 373 amazonS3.deleteObject(bucketName, bucketNamePrefix + digest); 374 } 375 376 @Override 377 protected String getSystemPropertyPrefix() { 378 return SYSTEM_PROPERTY_PREFIX; 379 } 380 381 @Override 382 protected BinaryGarbageCollector instantiateGarbageCollector() { 383 return new S3BinaryGarbageCollector(this); 384 } 385 386 @Override 387 public void removeBinaries(Collection<String> digests) { 388 digests.forEach(this::removeBinary); 389 } 390 391 protected static boolean isMissingKey(AmazonClientException e) { 392 if (e instanceof AmazonServiceException) { 393 AmazonServiceException ase = (AmazonServiceException) e; 394 return (ase.getStatusCode() == 404) || "NoSuchKey".equals(ase.getErrorCode()) 395 || "Not Found".equals(e.getMessage()); 396 } 397 return false; 398 } 399 400 public static boolean isMD5(String digest) { 401 return MD5_RE.matcher(digest).matches(); 402 } 403 404 @Override 405 protected FileStorage getFileStorage() { 406 return new S3FileStorage(); 407 } 408 409 public class S3FileStorage implements FileStorage { 410 411 @Override 412 public void storeFile(String digest, File file) throws IOException { 413 long t0 = 0; 414 if (log.isDebugEnabled()) { 415 t0 = System.currentTimeMillis(); 416 log.debug("storing blob " + digest + " to S3"); 417 } 418 String key = bucketNamePrefix + digest; 419 try { 420 amazonS3.getObjectMetadata(bucketName, key); 421 if (log.isDebugEnabled()) { 422 log.debug("blob " + digest + " is already in S3"); 423 } 424 } catch (AmazonClientException e) { 425 if (!isMissingKey(e)) { 426 throw new IOException(e); 427 } 428 // not already present -> store the blob 429 PutObjectRequest request; 430 if (!isEncrypted) { 431 request = new PutObjectRequest(bucketName, key, file); 432 if (userServerSideEncryption) { 433 ObjectMetadata objectMetadata = new ObjectMetadata(); 434 objectMetadata.setSSEAlgorithm(ObjectMetadata.AES_256_SERVER_SIDE_ENCRYPTION); 435 request.setMetadata(objectMetadata); 436 } 437 } else { 438 request = new EncryptedPutObjectRequest(bucketName, key, file); 439 } 440 Upload upload = transferManager.upload(request); 441 try { 442 upload.waitForUploadResult(); 443 } catch (AmazonClientException ee) { 444 throw new IOException(ee); 445 } catch (InterruptedException ee) { 446 // reset interrupted status 447 Thread.currentThread().interrupt(); 448 // continue interrupt 449 throw new RuntimeException(ee); 450 } finally { 451 if (log.isDebugEnabled()) { 452 long dtms = System.currentTimeMillis() - t0; 453 log.debug("stored blob " + digest + " to S3 in " + dtms + "ms"); 454 } 455 } 456 } 457 } 458 459 @Override 460 public boolean fetchFile(String digest, File file) throws IOException { 461 long t0 = 0; 462 if (log.isDebugEnabled()) { 463 t0 = System.currentTimeMillis(); 464 log.debug("fetching blob " + digest + " from S3"); 465 } 466 try { 467 Download download = transferManager.download(new GetObjectRequest(bucketName, bucketNamePrefix + digest), file); 468 download.waitForCompletion(); 469 // Check ETag it is by default MD5 if not multipart 470 if (!isEncrypted && !digest.equals(download.getObjectMetadata().getETag())) { 471 // In case of multipart it will happen, verify the downloaded file 472 String currentDigest; 473 try (FileInputStream input = new FileInputStream(file)) { 474 currentDigest = DigestUtils.md5Hex(input); 475 } 476 if (!currentDigest.equals(digest)) { 477 log.error("Invalid ETag in S3, currentDigest=" + currentDigest + " expectedDigest=" + digest); 478 throw new IOException("Invalid S3 object, it is corrupted expected digest is " + digest + " got " + currentDigest); 479 } 480 } 481 return true; 482 } catch (AmazonClientException e) { 483 if (!isMissingKey(e)) { 484 throw new IOException(e); 485 } 486 return false; 487 } catch (InterruptedException e) { 488 // reset interrupted status 489 Thread.currentThread().interrupt(); 490 // continue interrupt 491 throw new RuntimeException(e); 492 } finally { 493 if (log.isDebugEnabled()) { 494 long dtms = System.currentTimeMillis() - t0; 495 log.debug("fetched blob " + digest + " from S3 in " + dtms + "ms"); 496 } 497 } 498 499 } 500 } 501 502 /** 503 * Garbage collector for S3 binaries that stores the marked (in use) binaries in memory. 504 */ 505 public static class S3BinaryGarbageCollector extends AbstractBinaryGarbageCollector<S3BinaryManager> { 506 507 protected S3BinaryGarbageCollector(S3BinaryManager binaryManager) { 508 super(binaryManager); 509 } 510 511 @Override 512 public String getId() { 513 return "s3:" + binaryManager.bucketName; 514 } 515 516 @Override 517 public Set<String> getUnmarkedBlobs() { 518 // list S3 objects in the bucket 519 // record those not marked 520 Set<String> unmarked = new HashSet<>(); 521 ObjectListing list = null; 522 do { 523 if (list == null) { 524 list = binaryManager.amazonS3.listObjects(binaryManager.bucketName, binaryManager.bucketNamePrefix); 525 } else { 526 list = binaryManager.amazonS3.listNextBatchOfObjects(list); 527 } 528 int prefixLength = binaryManager.bucketNamePrefix.length(); 529 for (S3ObjectSummary summary : list.getObjectSummaries()) { 530 String digest = summary.getKey().substring(prefixLength); 531 if (!isMD5(digest)) { 532 // ignore files that cannot be MD5 digests for 533 // safety 534 continue; 535 } 536 long length = summary.getSize(); 537 if (marked.contains(digest)) { 538 status.numBinaries++; 539 status.sizeBinaries += length; 540 } else { 541 status.numBinariesGC++; 542 status.sizeBinariesGC += length; 543 // record file to delete 544 unmarked.add(digest); 545 marked.remove(digest); // optimize memory 546 } 547 } 548 } while (list.isTruncated()); 549 550 return unmarked; 551 } 552 } 553 554 // ******************** BlobProvider ******************** 555 556 @Override 557 public Blob readBlob(BlobInfo blobInfo) throws IOException { 558 // just delegate to avoid copy/pasting code 559 return new BinaryBlobProvider(this).readBlob(blobInfo); 560 } 561 562 @Override 563 public String writeBlob(Blob blob, Document doc) throws IOException { 564 // just delegate to avoid copy/pasting code 565 return new BinaryBlobProvider(this).writeBlob(blob, doc); 566 } 567 568 @Override 569 protected boolean isDirectDownload() { 570 return directDownload; 571 } 572 573 @Override 574 protected URI getRemoteUri(String digest, ManagedBlob blob, HttpServletRequest servletRequest) throws IOException { 575 String key = bucketNamePrefix + digest; 576 Date expiration = new Date(); 577 expiration.setTime(expiration.getTime() + directDownloadExpire * 1000); 578 GeneratePresignedUrlRequest request = new GeneratePresignedUrlRequest(bucketName, key, HttpMethod.GET); 579 request.addRequestParameter("response-content-type", getContentTypeHeader(blob)); 580 request.addRequestParameter("response-content-disposition", getContentDispositionHeader(blob, null)); 581 request.setExpiration(expiration); 582 URL url = amazonS3.generatePresignedUrl(request); 583 try { 584 return url.toURI(); 585 } catch (URISyntaxException e) { 586 throw new IOException(e); 587 } 588 } 589 590}