Skip to content
This repository was archived by the owner on Jan 19, 2021. It is now read-only.

Commit e7c9da0

Browse files
authored
Merge pull request #1575 from wobba/GetSearchCrawlLog
Added cmdlet for fetching crawllog entries.
2 parents 4aa955f + e0835ce commit e7c9da0

File tree

3 files changed

+192
-0
lines changed

3 files changed

+192
-0
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
2121
- Added -Timeout option to Add-PnPApp
2222
- Added -CollapseSpecification option to Submit-PnPSearchQuery
2323
- Added -InSiteHierarchy to Get-PnPField to search for fields in the site collection
24+
- Added Get-PnPSearchCrawlLog
2425

2526
### Changed
2627
- Fix for issue where using Add-PnPFile and setting Created and Modified did not update values
Lines changed: 190 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,190 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.Linq;
4+
using System.Management.Automation;
5+
using Microsoft.SharePoint.Client;
6+
using Microsoft.SharePoint.Client.Search.Administration;
7+
using SharePointPnP.PowerShell.CmdletHelpAttributes;
8+
9+
namespace SharePointPnP.PowerShell.Commands.Search
10+
{
11+
public enum LogLevel
12+
{
13+
All = -1,
14+
Success = 0,
15+
Warning = 1,
16+
Error = 2
17+
}
18+
19+
public enum ContentSource
20+
{
21+
Sites,
22+
UserProfiles
23+
}
24+
25+
public class CrawlEntry
26+
{
27+
public string Url { get; set; }
28+
public DateTime CrawlTime { get; set; }
29+
public DateTime ItemTime { get; set; }
30+
public LogLevel LogLevel { get; set; }
31+
public string Status { get; set; }
32+
public int ItemId { get; set; }
33+
public int ContentSourceId { get; set; }
34+
}
35+
36+
[Cmdlet(VerbsCommon.Get, "PnPSearchCrawlLog", DefaultParameterSetName = "Xml")]
37+
[CmdletHelp("Returns entries from the SharePoint search crawl log",
38+
SupportedPlatform = CmdletSupportedPlatform.Online,
39+
Category = CmdletHelpCategory.Search)]
40+
[CmdletExample(
41+
Code = @"PS:> Get-PnPSearchCrawlLog",
42+
Remarks = "Returns the last 100 crawl log entries for site content.",
43+
SortOrder = 1)]
44+
[CmdletExample(
45+
Code = @"PS:> Get-PnPSearchCrawlLog ""https://<tenant>-my.sharepoint.com/personal""",
46+
Remarks = "Returns the last 100 crawl log entries for OneDrive content.",
47+
SortOrder = 2)]
48+
[CmdletExample(
49+
Code = @"PS:> Get-PnPSearchCrawlLog -ContentSource UserProfiles ",
50+
Remarks = "Returns the last 100 crawl log entries for user profiles.",
51+
SortOrder = 3)]
52+
[CmdletExample(
53+
Code = @"PS:> Get-PnPSearchCrawlLog -ContentSource UserProfiles -Filter ""mikael""",
54+
Remarks = @"Returns the last 100 crawl log entries for user profiles with the term ""mikael"" in the user principal name.",
55+
SortOrder = 4)]
56+
[CmdletExample(
57+
Code = @"PS:> Get-PnPSearchCrawlLog -ContentSource Sites LogLevel Error -RowLimit 10",
58+
Remarks = @"Returns the last 10 crawl log entries with a state of Error for site content.",
59+
SortOrder = 5)]
60+
[CmdletExample(
61+
Code = @"PS:> Get-PnPSearchCrawlLog -EndDate (Get-Date).AddDays(-100)",
62+
Remarks = @"Returns the last 100 crawl log entries for site content up until 100 days ago.",
63+
SortOrder = 6)]
64+
public class GetSearchCrawlLog : PnPWebCmdlet
65+
{
66+
[Parameter(Mandatory = false, HelpMessage = "Filter what log entries to return (All, Success, Warning, Error). Defaults to All")]
67+
public LogLevel LogLevel = LogLevel.All;
68+
69+
[Parameter(Mandatory = false, HelpMessage = "Number of entries to return. Defaults to 100.")]
70+
public int RowLimit = 100;
71+
72+
[Parameter(Mandatory = false, HelpMessage = "Filter to limit what is being returned. Has to be a URL prefix for SharePoint content, and part of a user principal name for user profiles. Wildcard characters are not supported.")]
73+
public string Filter;
74+
75+
[Parameter(Mandatory = false, HelpMessage = "Content to retrieve (Sites, User Profiles). Defaults to Sites.")]
76+
public ContentSource ContentSource = ContentSource.Sites;
77+
78+
[Parameter(Mandatory = false, HelpMessage = "Start date to start getting entries from. Defaults to start of time.")]
79+
public DateTime StartDate = DateTime.MinValue;
80+
81+
[Parameter(Mandatory = false, HelpMessage = "End date to stop getting entries from. Default to current time.")]
82+
public DateTime EndDate = DateTime.UtcNow.AddDays(1);
83+
84+
private const int MaxRows = 100000;
85+
86+
protected override void ExecuteCmdlet()
87+
{
88+
try
89+
{
90+
var crawlLog = new DocumentCrawlLog(ClientContext, ClientContext.Site);
91+
ClientContext.Load(crawlLog);
92+
93+
int contentSourceId;
94+
switch (ContentSource)
95+
{
96+
case ContentSource.Sites:
97+
contentSourceId = GetContentSourceIdForSites(crawlLog);
98+
break;
99+
case ContentSource.UserProfiles:
100+
contentSourceId = GetContentSourceIdForUserProfiles(crawlLog);
101+
break;
102+
default:
103+
throw new ArgumentOutOfRangeException();
104+
}
105+
106+
string postFilter = string.Empty;
107+
if (string.IsNullOrWhiteSpace(Filter) && ContentSource == ContentSource.Sites)
108+
{
109+
Filter = $"https://{GetHostName()}.sharepoint.com";
110+
}
111+
112+
int origLimit = RowLimit;
113+
if (ContentSource == ContentSource.UserProfiles)
114+
{
115+
postFilter = Filter;
116+
Filter = $"https://{GetHostName()}-my.sharepoint.com";
117+
RowLimit = MaxRows;
118+
}
119+
120+
var logEntries = crawlLog.GetCrawledUrls(false, RowLimit, Filter, true, contentSourceId, (int)LogLevel, -1, StartDate, EndDate);
121+
ClientContext.ExecuteQueryRetry();
122+
var entries = new List<CrawlEntry>(logEntries.Value.Rows.Count);
123+
foreach (var dictionary in logEntries.Value.Rows)
124+
{
125+
var entry = MapCrawlLogEntry(dictionary);
126+
if (string.IsNullOrWhiteSpace(postFilter))
127+
{
128+
entries.Add(entry);
129+
}
130+
else if (entry.Url.Contains(postFilter))
131+
{
132+
entries.Add(entry);
133+
}
134+
135+
}
136+
WriteObject(entries.Take(origLimit).OrderByDescending(i => i.CrawlTime).ToList());
137+
}
138+
catch (Exception e)
139+
{
140+
WriteError(new ErrorRecord(new Exception("Make sure you are granted access to the crawl log via the SharePoint search admin center at https://<tenant>-admin.sharepoint.com/_layouts/15/searchadmin/TA_searchadministration.aspx"), e.Message, ErrorCategory.AuthenticationError, null));
141+
}
142+
}
143+
144+
#region Helper functions
145+
146+
private string GetHostName()
147+
{
148+
return new Uri(ClientContext.Url).Host.Replace("-admin", "").Replace("-public", "").Replace("-my", "").Replace(".sharepoint.com", "");
149+
}
150+
151+
private int GetContentSourceIdForSites(DocumentCrawlLog crawlLog)
152+
{
153+
var hostName = GetHostName();
154+
var spContent = crawlLog.GetCrawledUrls(false, 10, $"https://{hostName}.sharepoint.com/sites", true, -1, (int)LogLevel.All, -1, DateTime.Now.AddDays(-100), DateTime.Now.AddDays(1));
155+
ClientContext.ExecuteQueryRetry();
156+
return (int)spContent.Value.Rows.First()["ContentSourceID"];
157+
}
158+
159+
private int GetContentSourceIdForUserProfiles(DocumentCrawlLog crawlLog)
160+
{
161+
var hostName = GetHostName();
162+
var peopleContent = crawlLog.GetCrawledUrls(false, 100, $"sps3s://{hostName}-my.sharepoint.com", true, -1, (int)LogLevel.All, -1, DateTime.Now.AddDays(-100), DateTime.Now.AddDays(1));
163+
ClientContext.ExecuteQueryRetry();
164+
return (int)peopleContent.Value.Rows.First()["ContentSourceID"];
165+
}
166+
167+
private static CrawlEntry MapCrawlLogEntry(Dictionary<string, object> dictionary)
168+
{
169+
var entry = new CrawlEntry
170+
{
171+
ItemId = (int)dictionary["URLID"],
172+
ContentSourceId = (int)dictionary["ContentSourceID"],
173+
Url = dictionary["FullUrl"].ToString(),
174+
CrawlTime = (DateTime)dictionary["TimeStampUtc"]
175+
};
176+
long.TryParse(dictionary["LastRepositoryModifiedTime"] + "", out long ticks);
177+
if (ticks != 0)
178+
{
179+
var itemDate = DateTime.FromFileTimeUtc(ticks);
180+
entry.ItemTime = itemDate;
181+
}
182+
entry.LogLevel =
183+
(LogLevel)Enum.Parse(typeof(LogLevel), dictionary["ErrorLevel"].ToString());
184+
185+
entry.Status = dictionary["StatusMessage"] + "";
186+
return entry;
187+
}
188+
#endregion
189+
}
190+
}

Commands/SharePointPnP.PowerShell.Commands.csproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -488,6 +488,7 @@
488488
<Compile Include="RecordsManagement\GetListManualRecordDeclaration.cs" />
489489
<Compile Include="RecordsManagement\GetInPlaceRecordsManagement.cs" />
490490
<Compile Include="RecordsManagement\SetListRecordDeclaration.cs" />
491+
<Compile Include="Search\GetSearchCrawlLog.cs" />
491492
<Compile Include="SiteDesigns\InvokeSiteDesign.cs" />
492493
<Compile Include="SiteDesigns\SetSiteDesign.cs" />
493494
<Compile Include="Diagnostic\MeasureResponseTimeMode.cs" />

0 commit comments

Comments
 (0)