In this article you will find:
...
Open the backend of your Sitefinity instance
Navigate to Administartion → Settings and click Advanced (your-site-domain/Sitefinity/Administration/Settings/Advanced)
Open the Hawksearch configuration
Under document size limit enter 4000KB100KB
Save the changes
Upload content above the limit
...
Open the backend of your Sitefinity instance
Navigate to Content → Documents & Files
Upload files above the document size limit e.g. 10MB20MB
Navigate to Administration → Search indexes (your-site-domain/Sitefinity/Administration/Search indexes)
Open your currently used and active Index
Select Documents from the scope. This will add the documents you have uploaded to the index
Note |
---|
During indexing the files are stripped and only the text content is extracted. Some files contain a lot of metadata or embedded resources (e.g. photos) so a 10MB 20MB .pdf may only contain 1MB 2MB of actual data. |
Info |
---|
Setup search serviceIn order to index documents above the document size limit you need to inherit the HawksearchService class and override the AdaptDocuments. Here we will demonstrate how to: |
...
Empty content field
Code Block | ||||
---|---|---|---|---|
| ||||
using System;
using System.Collections.Generic;
using System.Linq;
using Hawksearch.Search;
using Telerik.Sitefinity.Services.Search.Data;
using Telerik.Sitefinity.Configuration;
using Telerik.Sitefinity.Services.Search.Model;
using Hawksearch.Configuration;
using Hawksearch.SDK.Indexing;
namespace SitefinityWebApp.Search
{
public class CustomSearchService : HawksearchService
{
private const string DocumentContentType = "Telerik.Sitefinity.Libraries.Model.Document";
protected override List<SubmitDocument> AdaptDocuments(IEnumerable<IDocument> documents)
{
var doc = documents.ToList().FirstOrDefault();
var documentList = new List<IDocument>(documents);
if (doc != null)
{
var contentTypeField = doc.Fields.FirstOrDefault(f => f.Name == "ContentType");
if (contentTypeField != null)
{
if (string.Equals(contentTypeField.Value.ToString(), DocumentContentType, StringComparison.InvariantCultureIgnoreCase))
{
var configManager = ConfigManager.GetManager();
var hawkConfig = configManager.GetSection<HawkSearchConfig>();
documentList.Clear();
foreach (var document in documents)
{
var modifiedDocument = document;
var documentSize = this.CalculateDocumentSize(document);
if (documentSize > hawkConfig.DocumentSizeLimit)
{
modifiedDocument = this.ModifyDocument(document);
}
documentList.Add(modifiedDocument);
}
}
}
}
return base.AdaptDocuments(documentList);
}
private double CalculateDocumentSize(IDocument document)
{
var documentSize = 0.0;
foreach (var field in document.Fields)
{
if (field.Value != null)
{
documentSize += System.Text.Encoding.Unicode.GetByteCount(field.Value.ToString()) / 1024.0;
}
}
return documentSize;
}
private IDocument ModifyDocument(IDocument document)
{
var fields = new List<IField>(document.Fields);
var contentField = document.Fields.FirstOrDefault(f => f.Name == "Content");
if (contentField != null)
{
contentField.Value = string.Empty;
}
var modifiedDocument = new Document(fields, document.IdentityField.Name);
return modifiedDocument;
}
}
} |
Note |
---|
Take the first 500 words
Code Block | ||||
---|---|---|---|---|
| ||||
using Once you implement the code in Visual Studio , build your solution and you will also have to reindex the index you are using from Administrator → Search Indexes → Action → Reindex |
Info |
---|
Expected resultsNow if you Inspect your frontend page you should be able to see the title of your large document, but not the content. There will be no content field as well in the XHR search → results → document fields |
Take the first 500 words
Code Block | ||
---|---|---|
| ||
using System; using System.Collections.Generic; using System.Linq; using Hawksearch.Search; using Telerik.Sitefinity.Services.Search.Data; using Telerik.Sitefinity.Configuration; using Telerik.Sitefinity.Services.Search.Model; using Hawksearch.Configuration; using Hawksearch.SDK.Indexing; using Field = Telerik.Sitefinity.Services.Search.Publishing.Field; namespace SitefinityWebApp.Search { public class CustomSearchService : HawksearchService { private const string DocumentContentType = "Telerik.Sitefinity.Libraries.Model.Document"; protected override List<SubmitDocument> AdaptDocuments(IEnumerable<IDocument> documents) { var doc = documents.ToList().FirstOrDefault(); var documentList = new List<IDocument>(documents); if (doc != null) { var contentTypeField = doc.Fields.FirstOrDefault(f => f.Name == "ContentType"); if (contentTypeField != null) { if (string.Equals(contentTypeField.Value.ToString(), DocumentContentType, StringComparison.InvariantCultureIgnoreCase)) { var configManager = ConfigManager.GetManager(); var hawkConfig = configManager.GetSection<HawkSearchConfig>(); documentList.Clear(); foreach (var document in documents) { var modifiedDocument = document; var documentSize = this.CalculateDocumentSize(document); if (documentSize > hawkConfig.DocumentSizeLimit) { modifiedDocument = this.ModifyDocument(document); } documentList.Add(modifiedDocument); } } } } return base.AdaptDocuments(documentList); } private double CalculateDocumentSize(IDocument document) { var documentSize = 0.0; foreach (var field in document.Fields) { if (field.Value != null) { documentSize += System.Text.Encoding.Unicode.GetByteCount(field.Value.ToString()) / 1024.0; } } return documentSize; } private IDocument ModifyDocument(IDocument document) { var wordLimit = 500; var fields = new List<IField>(document.Fields); var contentField = document.Fields.FirstOrDefault(f => f.Name == "Content"); fields.Remove(contentField); contentField = this.ExtractFieldContent(contentField, wordLimit); fields.Add(contentField); var modifiedDocument = new Document(fields, document.IdentityField.Name); return modifiedDocument; } private IField ExtractFieldContent(IField contentField, int wordLimit) { var fieldValue = contentField.Value.ToString(); if (!string.IsNullOrWhiteSpace(fieldValue)) { var modifiedContent = string.Join(" ", fieldValue.Split(' ').Take(wordLimit).ToArray()); contentField = new Field { Name = "Content", Value = modifiedContent }; } return contentField; } } } |
Note |
---|
Once you implement the code in Visual Studio , build your solution and you will also have to reindex the index you are using from Administrator → Search Indexes → Action → Reindex |
Info |
---|
Expected resultsNow if you Inspect your frontend page you should be able to see the content with first 500 symbols of your large document. There will be also a content field with 500 symbols in the XHR search → results → document fields |
Register Custom Search Service
...